Created
April 22, 2022 16:56
-
-
Save pashu123/175d4f7cc614c1c1ab5d6861bc425bb5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:16: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
/ math.sqrt(query.size(-1)) | |
/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/jit/_trace.py:983: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: | |
Tensor-likes are not close! | |
Mismatched elements: 97297 / 98304 (99.0%) | |
Greatest absolute difference: 24.81045150756836 at index (0, 71, 4) (up to 1e-05 allowed) | |
Greatest relative difference: inf at index (0, 0, 5) (up to 1e-05 allowed) | |
_check_trace( | |
Traceback (most recent call last): | |
File "/home/prashant/dSHARK/shark/examples/bert_torch.py", line 21, in <module> | |
shark_module = SharkInference( | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/shark_runner.py", line 105, in __init__ | |
self.shark_runner = SharkRunner(self.model, self.input, dynamic, device, | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/shark_runner.py", line 62, in __init__ | |
self.torch_mlir_module = get_torch_mlir_module(model, input, dynamic, | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/torch_mlir_utils.py", line 114, in get_torch_mlir_module | |
module = shark_jit_trace(module, input, dynamic, tracing_required) | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/torch_mlir_utils.py", line 77, in shark_jit_trace | |
traced_module = torch.jit.trace_module(module, {"forward": input}) | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/jit/_trace.py", line 983, in trace_module | |
_check_trace( | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context | |
return func(*args, **kwargs) | |
File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/jit/_trace.py", line 526, in _check_trace | |
raise TracingCheckError(*diag_info) | |
torch.jit._trace.TracingCheckError: Tracing failed sanity checks! | |
ERROR: Graphs differed across invocations! | |
Graph diff: | |
graph(%self.1 : __torch__.BERT_torch, | |
%tokens : Tensor): | |
%model : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model) | |
%_11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks) | |
%dropout : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_11) | |
+ %model.505 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.503 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.505) | |
+ %_11.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.503) | |
+ %feed_forward : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.39) | |
+ %w_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward) | |
%model.503 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.501 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.503) | |
- %_11.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.501) | |
? ^ | |
+ %_11.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.501) | |
? ^ | |
- %feed_forward : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.39) | |
? ^ | |
+ %feed_forward.93 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.37) | |
? +++ ^ | |
- %w_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward) | |
? ^^^ ^^^^^ ^^^^^ ^^^ | |
+ %dropout.143 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.93) | |
? ^^^^^^^^^^^ ^ +++++ ^ +++++ ^^^^^^^ +++ | |
%model.501 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.499 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.501) | |
- %_11.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.499) | |
? ^ | |
+ %_11.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.499) | |
? ^ | |
- %feed_forward.93 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.37) | |
? ^ ^ | |
+ %feed_forward.91 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.35) | |
? ^ ^ | |
- %dropout.143 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.93) | |
+ %activation : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.91) | |
%model.499 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.497 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.499) | |
- %_11.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.497) | |
? ^ | |
+ %_11.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.497) | |
? ^ | |
- %feed_forward.91 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.35) | |
? - ^ | |
+ %feed_forward.89 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.33) | |
? + ^ | |
- %activation : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.91) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ - | |
+ %w_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.89) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ + | |
%model.497 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.495 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.497) | |
- %_11.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.495) | |
? ^ | |
+ %_11.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.495) | |
? ^ | |
+ %output_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_11.31) | |
- %feed_forward.89 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.33) | |
- %w_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.89) | |
%model.495 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.493 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.495) | |
- %_11.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.493) | |
- %output_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_11.31) | |
- %model.493 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.491 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.493) | |
- %_11.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.491) | |
? ^ | |
+ %_11.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.493) | |
? ^ | |
%lambda_module : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.29) | |
%attention : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module) | |
%output_linear : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention) | |
+ %model.493 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.491 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.493) | |
+ %_11.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.491) | |
+ %lambda_module.143 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.27) | |
+ %attention.357 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.143) | |
+ %dropout.137 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.357) | |
+ %dropout.139 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.137) | |
%model.491 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.489 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.491) | |
- %_11.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.489) | |
? ^ | |
+ %_11.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.489) | |
? ^ | |
- %lambda_module.141 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.27) | |
? ^ | |
+ %lambda_module.141 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.25) | |
? ^ | |
- %attention.357 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.141) | |
? ^ | |
+ %attention.353 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.141) | |
? ^ | |
+ %attention.355 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.353) | |
- %dropout.137 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.357) | |
- %dropout.139 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.137) | |
%model.489 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.487 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.489) | |
- %_11.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.487) | |
? ^ | |
+ %_11.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.487) | |
? ^ | |
- %lambda_module.139 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.25) | |
? ^ | |
+ %lambda_module.139 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.23) | |
? ^ | |
- %attention.353 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.139) | |
? ^ | |
+ %attention.351 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.139) | |
? ^ | |
- %attention.355 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.353) | |
+ %linear_layers : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.351) | |
+ %_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers) | |
%model.487 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.485 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.487) | |
- %_11.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.485) | |
? ^ | |
+ %_11.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.485) | |
? ^ | |
- %lambda_module.137 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.23) | |
? ^ | |
+ %lambda_module.137 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.21) | |
? ^ | |
- %attention.351 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.137) | |
? ^^ | |
+ %attention.349 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.137) | |
? ^^ | |
- %linear_layers : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.351) | |
? ^^ | |
+ %linear_layers.213 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.349) | |
? ++++ ^^ | |
- %_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers) | |
? ^ ^ | |
+ %_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.213) | |
? ^ ^ ++++ | |
%model.485 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.483 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.485) | |
- %_11.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.483) | |
? - | |
+ %_11.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.483) | |
? + | |
- %lambda_module.135 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.21) | |
? - | |
+ %lambda_module.135 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.19) | |
? + | |
- %attention.349 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.135) | |
? ^ | |
+ %attention.347 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.135) | |
? ^ | |
- %linear_layers.213 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.349) | |
? ^ ^ | |
+ %linear_layers.211 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.347) | |
? ^ ^ | |
- %_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.213) | |
? ^ ^ ^ | |
+ %_0 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.211) | |
? ^ ^ ^ | |
%model.483 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.481 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.483) | |
- %_11.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.481) | |
? ^ | |
+ %_11.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.481) | |
? ^ | |
+ %input_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_11.17) | |
- %lambda_module.133 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.19) | |
- %attention.347 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.133) | |
- %linear_layers.211 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.347) | |
- %_0 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.211) | |
%model.481 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.479 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.481) | |
- %_11.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.479) | |
? ^^^^ ^ | |
+ %_10 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.479) | |
? ^ ^ | |
- %input_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_11.17) | |
+ %dropout.133 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_10) | |
%model.479 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.477 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.479) | |
- %_10 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.477) | |
+ %_10.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.477) | |
? +++ | |
- %dropout.133 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_10) | |
+ %feed_forward.87 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.39) | |
+ %w_2.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.87) | |
%model.477 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.475 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.477) | |
- %_10.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.475) | |
? ^ | |
+ %_10.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.475) | |
? ^ | |
- %feed_forward.87 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.39) | |
? ^ ^ | |
+ %feed_forward.85 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.37) | |
? ^ ^ | |
- %w_2.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.87) | |
? ^^^ - ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.131 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.85) | |
? ^^^^^^^ ++ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.475 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.473 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.475) | |
- %_10.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.473) | |
? ^ | |
+ %_10.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.473) | |
? ^ | |
- %feed_forward.85 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.37) | |
? ^ ^ | |
+ %feed_forward.83 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.35) | |
? ^ ^ | |
- %dropout.131 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.85) | |
+ %activation.21 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.83) | |
%model.473 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.471 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.473) | |
- %_10.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.471) | |
? ^ | |
+ %_10.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.471) | |
? ^ | |
- %feed_forward.83 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.35) | |
? ^ ^ | |
+ %feed_forward.81 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.33) | |
? ^ ^ | |
- %activation.21 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.83) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.81) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.471 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.469 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.471) | |
- %_10.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.469) | |
? ^ | |
+ %_10.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.469) | |
? ^ | |
+ %output_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_10.31) | |
- %feed_forward.81 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.33) | |
- %w_1.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.81) | |
%model.469 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.467 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.469) | |
- %_10.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.467) | |
? ^^ | |
+ %_10.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.467) | |
? ^^ | |
- %output_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_10.31) | |
+ %lambda_module.133 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.29) | |
+ %attention.345 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.133) | |
+ %output_linear.69 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.345) | |
%model.467 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.465 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.467) | |
- %_10.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.465) | |
? ^ | |
+ %_10.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.465) | |
? ^ | |
- %lambda_module.131 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.29) | |
? ^ | |
+ %lambda_module.131 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.27) | |
? ^ | |
- %attention.345 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.131) | |
? ^ | |
+ %attention.343 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.131) | |
? ^ | |
- %output_linear.69 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.345) | |
+ %dropout.125 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.343) | |
+ %dropout.127 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.125) | |
%model.465 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.463 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.465) | |
- %_10.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.463) | |
? ^ | |
+ %_10.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.463) | |
? ^ | |
- %lambda_module.129 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.27) | |
? ^ | |
+ %lambda_module.129 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.25) | |
? ^ | |
- %attention.343 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.129) | |
? - | |
+ %attention.339 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.129) | |
? + | |
+ %attention.341 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.339) | |
- %dropout.125 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.343) | |
- %dropout.127 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.125) | |
%model.463 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.461 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.463) | |
- %_10.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.461) | |
? ^ | |
+ %_10.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.461) | |
? ^ | |
- %lambda_module.127 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.25) | |
? ^ | |
+ %lambda_module.127 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.23) | |
? ^ | |
- %attention.339 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.127) | |
? ^ | |
+ %attention.337 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.127) | |
? ^ | |
- %attention.341 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.339) | |
+ %linear_layers.209 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.337) | |
+ %_2.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.209) | |
%model.461 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.459 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.461) | |
- %_10.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.459) | |
? ^ | |
+ %_10.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.459) | |
? ^ | |
- %lambda_module.125 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.23) | |
? ^ | |
+ %lambda_module.125 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.21) | |
? ^ | |
- %attention.337 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.125) | |
? ^ | |
+ %attention.335 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.125) | |
? ^ | |
- %linear_layers.209 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.337) | |
? ^ ^ | |
+ %linear_layers.207 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.335) | |
? ^ ^ | |
- %_2.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.209) | |
? ^ ^ ^ | |
+ %_1.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.207) | |
? ^ ^ ^ | |
%model.459 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.457 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.459) | |
- %_10.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.457) | |
? - | |
+ %_10.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.457) | |
? + | |
- %lambda_module.123 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.21) | |
? - | |
+ %lambda_module.123 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.19) | |
? + | |
- %attention.335 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.123) | |
? ^ | |
+ %attention.333 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.123) | |
? ^ | |
- %linear_layers.207 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.335) | |
? ^ ^ | |
+ %linear_layers.205 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.333) | |
? ^ ^ | |
- %_1.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.207) | |
? ^ ^ ^ ^ | |
+ %_0.113 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.205) | |
? ^ ^ ^ ^ | |
%model.457 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.455 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.457) | |
- %_10.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.455) | |
? ^ | |
+ %_10.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.455) | |
? ^ | |
+ %input_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_10.17) | |
- %lambda_module.121 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.19) | |
- %attention.333 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.121) | |
- %linear_layers.205 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.333) | |
- %_0.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.205) | |
%model.455 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.453 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.455) | |
- %_10.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.453) | |
? ^^^^^ ^^ | |
+ %_9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.453) | |
? ^ ^ | |
- %input_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_10.17) | |
+ %dropout.121 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_9) | |
%model.453 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.451 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.453) | |
- %_9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.451) | |
+ %_9.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.451) | |
? +++ | |
- %dropout.121 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_9) | |
+ %feed_forward.79 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.39) | |
+ %w_2.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.79) | |
%model.451 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.449 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.451) | |
- %_9.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.449) | |
? ^ | |
+ %_9.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.449) | |
? ^ | |
- %feed_forward.79 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.39) | |
? ^ ^ | |
+ %feed_forward.77 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.37) | |
? ^ ^ | |
- %w_2.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.79) | |
? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.119 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.77) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.449 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.447 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.449) | |
- %_9.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.447) | |
? ^ | |
+ %_9.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.447) | |
? ^ | |
- %feed_forward.77 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.37) | |
? ^ ^ | |
+ %feed_forward.75 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.35) | |
? ^ ^ | |
- %dropout.119 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.77) | |
+ %activation.19 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.75) | |
%model.447 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.445 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.447) | |
- %_9.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.445) | |
? ^ | |
+ %_9.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.445) | |
? ^ | |
- %feed_forward.75 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.35) | |
? ^ ^ | |
+ %feed_forward.73 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.33) | |
? ^ ^ | |
- %activation.19 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.75) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.73) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.445 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.443 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.445) | |
- %_9.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.443) | |
? ^ | |
+ %_9.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.443) | |
? ^ | |
+ %output_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_9.31) | |
- %feed_forward.73 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.33) | |
- %w_1.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.73) | |
%model.443 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.441 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.443) | |
- %_9.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.441) | |
? ^^ | |
+ %_9.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.441) | |
? ^^ | |
- %output_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_9.31) | |
+ %lambda_module.121 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.29) | |
+ %attention.331 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.121) | |
+ %output_linear.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.331) | |
%model.441 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.439 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.441) | |
- %_9.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.439) | |
? ^ | |
+ %_9.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.439) | |
? ^ | |
- %lambda_module.119 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.29) | |
? ^ | |
+ %lambda_module.119 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.27) | |
? ^ | |
- %attention.331 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.119) | |
? ^^ | |
+ %attention.329 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.119) | |
? ^^ | |
- %output_linear.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.331) | |
+ %dropout.113 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.329) | |
+ %dropout.115 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.113) | |
%model.439 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.437 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.439) | |
- %_9.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.437) | |
? ^ | |
+ %_9.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.437) | |
? ^ | |
- %lambda_module.117 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.27) | |
? ^ | |
+ %lambda_module.117 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.25) | |
? ^ | |
- %attention.329 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.117) | |
? ^ | |
+ %attention.325 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.117) | |
? ^ | |
+ %attention.327 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.325) | |
- %dropout.113 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.329) | |
- %dropout.115 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.113) | |
%model.437 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.435 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.437) | |
- %_9.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.435) | |
? ^ | |
+ %_9.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.435) | |
? ^ | |
- %lambda_module.115 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.25) | |
? ^ | |
+ %lambda_module.115 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.23) | |
? ^ | |
- %attention.325 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.115) | |
? ^ | |
+ %attention.323 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.115) | |
? ^ | |
- %attention.327 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.325) | |
+ %linear_layers.203 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.323) | |
+ %_2.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.203) | |
%model.435 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.433 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.435) | |
- %_9.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.433) | |
? ^ | |
+ %_9.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.433) | |
? ^ | |
- %lambda_module.113 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.23) | |
? ^ | |
+ %lambda_module.113 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.21) | |
? ^ | |
- %attention.323 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.113) | |
? ^ | |
+ %attention.321 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.113) | |
? ^ | |
- %linear_layers.203 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.323) | |
? ^ ^ | |
+ %linear_layers.201 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.321) | |
? ^ ^ | |
- %_2.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.203) | |
? ^ ^ ^ | |
+ %_1.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.201) | |
? ^ ^ ^ | |
%model.433 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.431 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.433) | |
- %_9.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.431) | |
? - | |
+ %_9.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.431) | |
? + | |
- %lambda_module.111 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.21) | |
? - | |
+ %lambda_module.111 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.19) | |
? + | |
- %attention.321 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.111) | |
? - | |
+ %attention.319 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.111) | |
? + | |
- %linear_layers.201 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.321) | |
? -- - | |
+ %linear_layers.199 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.319) | |
? ++ + | |
- %_1.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.201) | |
? ^ ^^ ^ -- | |
+ %_0.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.199) | |
? ^ ^^ ^ ++ | |
%model.431 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.429 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.431) | |
- %_9.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.429) | |
? ^ | |
+ %_9.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.429) | |
? ^ | |
+ %input_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_9.17) | |
- %lambda_module.109 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.19) | |
- %attention.319 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.109) | |
- %linear_layers.199 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.319) | |
- %_0.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.199) | |
%model.429 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.427 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.429) | |
- %_9.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.427) | |
? ^^^^ ^ | |
+ %_8 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.427) | |
? ^ ^ | |
- %input_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_9.17) | |
+ %dropout.109 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_8) | |
%model.427 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.425 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.427) | |
- %_8 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.425) | |
+ %_8.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.425) | |
? +++ | |
- %dropout.109 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_8) | |
+ %feed_forward.71 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.39) | |
+ %w_2.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.71) | |
%model.425 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.423 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.425) | |
- %_8.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.423) | |
? ^ | |
+ %_8.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.423) | |
? ^ | |
- %feed_forward.71 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.39) | |
? ^^ ^ | |
+ %feed_forward.69 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.37) | |
? ^^ ^ | |
- %w_2.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.71) | |
? ^^^ ^^^^^ ^^^^^ ^^^ ^^ | |
+ %dropout.107 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.69) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^^ | |
%model.423 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.421 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.423) | |
- %_8.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.421) | |
? ^ | |
+ %_8.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.421) | |
? ^ | |
- %feed_forward.69 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.37) | |
? ^ ^ | |
+ %feed_forward.67 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.35) | |
? ^ ^ | |
- %dropout.107 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.69) | |
+ %activation.17 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.67) | |
%model.421 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.419 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.421) | |
- %_8.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.419) | |
? ^ | |
+ %_8.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.419) | |
? ^ | |
- %feed_forward.67 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.35) | |
? ^ ^ | |
+ %feed_forward.65 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.33) | |
? ^ ^ | |
- %activation.17 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.67) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.65) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.419 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.417 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.419) | |
- %_8.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.417) | |
? ^ | |
+ %_8.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.417) | |
? ^ | |
+ %output_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_8.31) | |
- %feed_forward.65 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.33) | |
- %w_1.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.65) | |
%model.417 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.415 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.417) | |
- %_8.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.415) | |
? ^^ | |
+ %_8.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.415) | |
? ^^ | |
- %output_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_8.31) | |
+ %lambda_module.109 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.29) | |
+ %attention.317 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.109) | |
+ %output_linear.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.317) | |
%model.415 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.413 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.415) | |
- %_8.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.413) | |
? ^ | |
+ %_8.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.413) | |
? ^ | |
- %lambda_module.107 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.29) | |
? ^ | |
+ %lambda_module.107 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.27) | |
? ^ | |
- %attention.317 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.107) | |
? ^ | |
+ %attention.315 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.107) | |
? ^ | |
- %output_linear.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.317) | |
+ %dropout.101 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.315) | |
+ %dropout.103 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.101) | |
%model.413 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.411 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.413) | |
- %_8.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.411) | |
? ^ | |
+ %_8.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.411) | |
? ^ | |
- %lambda_module.105 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.27) | |
? ^ | |
+ %lambda_module.105 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.25) | |
? ^ | |
- %attention.315 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.105) | |
? ^ | |
+ %attention.311 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.105) | |
? ^ | |
+ %attention.313 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.311) | |
- %dropout.101 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.315) | |
- %dropout.103 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.101) | |
%model.411 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.409 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.411) | |
- %_8.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.409) | |
? ^ | |
+ %_8.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.409) | |
? ^ | |
- %lambda_module.103 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.25) | |
? ^ | |
+ %lambda_module.103 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.23) | |
? ^ | |
- %attention.311 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.103) | |
? ^^ | |
+ %attention.309 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.103) | |
? ^^ | |
- %attention.313 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.311) | |
+ %linear_layers.197 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.309) | |
+ %_2.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.197) | |
%model.409 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.407 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.409) | |
- %_8.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.407) | |
? ^ | |
+ %_8.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.407) | |
? ^ | |
- %lambda_module.101 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.23) | |
? ^ | |
+ %lambda_module.101 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.21) | |
? ^ | |
- %attention.309 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.101) | |
? ^ | |
+ %attention.307 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.101) | |
? ^ | |
- %linear_layers.197 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.309) | |
? ^ ^ | |
+ %linear_layers.195 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.307) | |
? ^ ^ | |
- %_2.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.197) | |
? ^ ^ ^ | |
+ %_1.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.195) | |
? ^ ^ ^ | |
%model.407 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.405 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.407) | |
- %_8.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.405) | |
? - | |
+ %_8.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.405) | |
? + | |
- %lambda_module.99 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.21) | |
? - | |
+ %lambda_module.99 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.19) | |
? + | |
- %attention.307 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.99) | |
? ^ | |
+ %attention.305 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.99) | |
? ^ | |
- %linear_layers.195 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.307) | |
? ^ ^ | |
+ %linear_layers.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.305) | |
? ^ ^ | |
- %_1.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.195) | |
? ^ ^ ^ ^ | |
+ %_0.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.193) | |
? ^ ^ ^ ^ | |
%model.405 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.403 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.405) | |
- %_8.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.403) | |
? ^ | |
+ %_8.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.403) | |
? ^ | |
+ %input_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_8.17) | |
- %lambda_module.97 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.19) | |
- %attention.305 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.97) | |
- %linear_layers.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.305) | |
- %_0.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.193) | |
%model.403 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.401 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.403) | |
- %_8.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.401) | |
? --- ^ | |
+ %_7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.401) | |
? ^ | |
- %input_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_8.17) | |
+ %dropout.97 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_7) | |
%model.401 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.399 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.401) | |
- %_7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.399) | |
+ %_7.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.399) | |
? +++ | |
- %dropout.97 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_7) | |
+ %feed_forward.63 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.39) | |
+ %w_2.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.63) | |
%model.399 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.397 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.399) | |
- %_7.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.397) | |
? ^ | |
+ %_7.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.397) | |
? ^ | |
- %feed_forward.63 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.39) | |
? ^ ^ | |
+ %feed_forward.61 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.37) | |
? ^ ^ | |
- %w_2.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.63) | |
? ^^^ ^ ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.95 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.61) | |
? ^^^^^^^ ^ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.397 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.395 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.397) | |
- %_7.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.395) | |
? ^ | |
+ %_7.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.395) | |
? ^ | |
- %feed_forward.61 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.37) | |
? ^^ ^ | |
+ %feed_forward.59 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.35) | |
? ^^ ^ | |
- %dropout.95 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.61) | |
+ %activation.15 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.59) | |
%model.395 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.393 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.395) | |
- %_7.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.393) | |
? ^ | |
+ %_7.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.393) | |
? ^ | |
- %feed_forward.59 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.35) | |
? ^ ^ | |
+ %feed_forward.57 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.33) | |
? ^ ^ | |
- %activation.15 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.59) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.57) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.393 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.391 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.393) | |
- %_7.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.391) | |
? ^ | |
+ %_7.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.391) | |
? ^ | |
+ %output_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_7.31) | |
- %feed_forward.57 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.33) | |
- %w_1.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.57) | |
%model.391 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.389 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.391) | |
- %_7.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.389) | |
? ^^ | |
+ %_7.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.389) | |
? ^^ | |
- %output_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_7.31) | |
+ %lambda_module.97 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.29) | |
+ %attention.303 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.97) | |
+ %output_linear.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.303) | |
%model.389 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.387 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.389) | |
- %_7.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.387) | |
? ^ | |
+ %_7.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.387) | |
? ^ | |
- %lambda_module.95 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.29) | |
? ^ | |
+ %lambda_module.95 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.27) | |
? ^ | |
- %attention.303 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.95) | |
? ^ | |
+ %attention.301 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.95) | |
? ^ | |
- %output_linear.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.303) | |
+ %dropout.89 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.301) | |
+ %dropout.91 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.89) | |
%model.387 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.385 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.387) | |
- %_7.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.385) | |
? ^ | |
+ %_7.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.385) | |
? ^ | |
- %lambda_module.93 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.27) | |
? ^ | |
+ %lambda_module.93 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.25) | |
? ^ | |
- %attention.301 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.93) | |
? ^^^ | |
+ %attention.297 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.93) | |
? ^^^ | |
+ %attention.299 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.297) | |
- %dropout.89 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.301) | |
- %dropout.91 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.89) | |
%model.385 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.383 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.385) | |
- %_7.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.383) | |
? ^ | |
+ %_7.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.383) | |
? ^ | |
- %lambda_module.91 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.25) | |
? ^ | |
+ %lambda_module.91 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.23) | |
? ^ | |
- %attention.297 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.91) | |
? ^ | |
+ %attention.295 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.91) | |
? ^ | |
- %attention.299 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.297) | |
+ %linear_layers.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.295) | |
+ %_2.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.191) | |
%model.383 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.381 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.383) | |
- %_7.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.381) | |
? ^ | |
+ %_7.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.381) | |
? ^ | |
- %lambda_module.89 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.23) | |
? ^ | |
+ %lambda_module.89 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.21) | |
? ^ | |
- %attention.295 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.89) | |
? ^ | |
+ %attention.293 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.89) | |
? ^ | |
- %linear_layers.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.295) | |
? - ^ | |
+ %linear_layers.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.293) | |
? + ^ | |
- %_2.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.191) | |
? ^ ^ - | |
+ %_1.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.189) | |
? ^ ^ + | |
%model.381 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.379 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.381) | |
- %_7.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.379) | |
? - | |
+ %_7.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.379) | |
? + | |
- %lambda_module.87 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.21) | |
? - | |
+ %lambda_module.87 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.19) | |
? + | |
- %attention.293 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.87) | |
? ^ | |
+ %attention.291 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.87) | |
? ^ | |
- %linear_layers.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.293) | |
? ^ ^ | |
+ %linear_layers.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.291) | |
? ^ ^ | |
- %_1.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.189) | |
? ^ ^ ^ ^ | |
+ %_0.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.187) | |
? ^ ^ ^ ^ | |
%model.379 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.377 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.379) | |
- %_7.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.377) | |
? ^ | |
+ %_7.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.377) | |
? ^ | |
+ %input_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_7.17) | |
- %lambda_module.85 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.19) | |
- %attention.291 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.85) | |
- %linear_layers.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.291) | |
- %_0.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.187) | |
%model.377 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.375 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.377) | |
- %_7.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.375) | |
? ^^^^ ^ | |
+ %_6 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.375) | |
? ^ ^ | |
- %input_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_7.17) | |
+ %dropout.85 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_6) | |
%model.375 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.373 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.375) | |
- %_6 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.373) | |
+ %_6.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.373) | |
? +++ | |
- %dropout.85 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_6) | |
+ %feed_forward.55 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.39) | |
+ %w_2.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.55) | |
%model.373 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.371 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.373) | |
- %_6.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.371) | |
? ^ | |
+ %_6.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.371) | |
? ^ | |
- %feed_forward.55 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.39) | |
? ^ ^ | |
+ %feed_forward.53 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.37) | |
? ^ ^ | |
- %w_2.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.55) | |
? ^^^ ^ ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.83 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.53) | |
? ^^^^^^^ ^ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.371 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.369 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.371) | |
- %_6.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.369) | |
? ^ | |
+ %_6.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.369) | |
? ^ | |
- %feed_forward.53 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.37) | |
? ^ ^ | |
+ %feed_forward.51 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.35) | |
? ^ ^ | |
- %dropout.83 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.53) | |
+ %activation.13 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.51) | |
%model.369 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.367 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.369) | |
- %_6.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.367) | |
? ^ | |
+ %_6.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.367) | |
? ^ | |
- %feed_forward.51 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.35) | |
? ^^ ^ | |
+ %feed_forward.49 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.33) | |
? ^^ ^ | |
- %activation.13 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.51) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^^ | |
+ %w_1.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.49) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^^ | |
%model.367 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.365 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.367) | |
- %_6.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.365) | |
? ^ | |
+ %_6.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.365) | |
? ^ | |
+ %output_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_6.31) | |
- %feed_forward.49 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.33) | |
- %w_1.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.49) | |
%model.365 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.363 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.365) | |
- %_6.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.363) | |
? ^^ | |
+ %_6.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.363) | |
? ^^ | |
- %output_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_6.31) | |
+ %lambda_module.85 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.29) | |
+ %attention.289 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.85) | |
+ %output_linear.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.289) | |
%model.363 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.361 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.363) | |
- %_6.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.361) | |
? ^ | |
+ %_6.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.361) | |
? ^ | |
- %lambda_module.83 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.29) | |
? ^ | |
+ %lambda_module.83 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.27) | |
? ^ | |
- %attention.289 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.83) | |
? ^ | |
+ %attention.287 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.83) | |
? ^ | |
- %output_linear.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.289) | |
+ %dropout.77 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.287) | |
+ %dropout.79 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.77) | |
%model.361 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.359 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.361) | |
- %_6.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.359) | |
? ^ | |
+ %_6.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.359) | |
? ^ | |
- %lambda_module.81 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.27) | |
? ^ | |
+ %lambda_module.81 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.25) | |
? ^ | |
- %attention.287 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.81) | |
? ^ | |
+ %attention.283 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.81) | |
? ^ | |
+ %attention.285 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.283) | |
- %dropout.77 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.287) | |
- %dropout.79 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.77) | |
%model.359 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.357 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.359) | |
- %_6.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.357) | |
? ^ | |
+ %_6.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.357) | |
? ^ | |
- %lambda_module.79 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.25) | |
? ^ | |
+ %lambda_module.79 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.23) | |
? ^ | |
- %attention.283 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.79) | |
? ^ | |
+ %attention.281 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.79) | |
? ^ | |
- %attention.285 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.283) | |
+ %linear_layers.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.281) | |
+ %_2.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.185) | |
%model.357 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.355 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.357) | |
- %_6.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.355) | |
? ^ | |
+ %_6.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.355) | |
? ^ | |
- %lambda_module.77 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.23) | |
? ^ | |
+ %lambda_module.77 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.21) | |
? ^ | |
- %attention.281 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.77) | |
? ^^ | |
+ %attention.279 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.77) | |
? ^^ | |
- %linear_layers.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.281) | |
? ^ ^^ | |
+ %linear_layers.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.279) | |
? ^ ^^ | |
- %_2.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.185) | |
? ^ ^ ^ | |
+ %_1.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.183) | |
? ^ ^ ^ | |
%model.355 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.353 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.355) | |
- %_6.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.353) | |
? - | |
+ %_6.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.353) | |
? + | |
- %lambda_module.75 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.21) | |
? - | |
+ %lambda_module.75 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.19) | |
? + | |
- %attention.279 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.75) | |
? ^ | |
+ %attention.277 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.75) | |
? ^ | |
- %linear_layers.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.279) | |
? ^ ^ | |
+ %linear_layers.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.277) | |
? ^ ^ | |
- %_1.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.183) | |
? ^ ^ ^ ^ | |
+ %_0.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.181) | |
? ^ ^ ^ ^ | |
%model.353 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.351 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.353) | |
- %_6.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.351) | |
? ^ | |
+ %_6.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.351) | |
? ^ | |
+ %input_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_6.17) | |
- %lambda_module.73 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.19) | |
- %attention.277 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.73) | |
- %linear_layers.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.277) | |
- %_0.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.181) | |
%model.351 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.349 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.351) | |
- %_6.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.349) | |
? ^^^^ ^ | |
+ %_5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.349) | |
? ^ ^ | |
- %input_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_6.17) | |
+ %dropout.73 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_5) | |
%model.349 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.347 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.349) | |
- %_5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.347) | |
+ %_5.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.347) | |
? +++ | |
- %dropout.73 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_5) | |
+ %feed_forward.47 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.39) | |
+ %w_2.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.47) | |
%model.347 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.345 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.347) | |
- %_5.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.345) | |
? ^ | |
+ %_5.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.345) | |
? ^ | |
- %feed_forward.47 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.39) | |
? ^ ^ | |
+ %feed_forward.45 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.37) | |
? ^ ^ | |
- %w_2.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.47) | |
? ^^^ - ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.71 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.45) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.345 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.343 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.345) | |
- %_5.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.343) | |
? ^ | |
+ %_5.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.343) | |
? ^ | |
- %feed_forward.45 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.37) | |
? ^ ^ | |
+ %feed_forward.43 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.35) | |
? ^ ^ | |
- %dropout.71 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.45) | |
+ %activation.11 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.43) | |
%model.343 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.341 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.343) | |
- %_5.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.341) | |
? ^ | |
+ %_5.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.341) | |
? ^ | |
- %feed_forward.43 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.35) | |
? ^ ^ | |
+ %feed_forward.41 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.33) | |
? ^ ^ | |
- %activation.11 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.43) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.41) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.341 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.339 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.341) | |
- %_5.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.339) | |
? ^ | |
+ %_5.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.339) | |
? ^ | |
+ %output_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_5.31) | |
- %feed_forward.41 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.33) | |
- %w_1.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.41) | |
%model.339 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.337 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.339) | |
- %_5.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.337) | |
? ^^ | |
+ %_5.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.337) | |
? ^^ | |
- %output_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_5.31) | |
+ %lambda_module.73 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.29) | |
+ %attention.275 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.73) | |
+ %output_linear.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.275) | |
%model.337 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.335 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.337) | |
- %_5.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.335) | |
? ^ | |
+ %_5.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.335) | |
? ^ | |
- %lambda_module.71 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.29) | |
? ^ | |
+ %lambda_module.71 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.27) | |
? ^ | |
- %attention.275 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.71) | |
? ^ | |
+ %attention.273 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.71) | |
? ^ | |
- %output_linear.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.275) | |
+ %dropout.65 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.273) | |
+ %dropout.67 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.65) | |
%model.335 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.333 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.335) | |
- %_5.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.333) | |
? ^ | |
+ %_5.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.333) | |
? ^ | |
- %lambda_module.69 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.27) | |
? ^ | |
+ %lambda_module.69 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.25) | |
? ^ | |
- %attention.273 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.69) | |
? ^^ | |
+ %attention.269 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.69) | |
? ^^ | |
+ %attention.271 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.269) | |
- %dropout.65 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.273) | |
- %dropout.67 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.65) | |
%model.333 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.331 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.333) | |
- %_5.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.331) | |
? ^ | |
+ %_5.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.331) | |
? ^ | |
- %lambda_module.67 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.25) | |
? ^ | |
+ %lambda_module.67 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.23) | |
? ^ | |
- %attention.269 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.67) | |
? ^ | |
+ %attention.267 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.67) | |
? ^ | |
- %attention.271 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.269) | |
+ %linear_layers.179 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.267) | |
+ %_2.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.179) | |
%model.331 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.329 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.331) | |
- %_5.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.329) | |
? ^ | |
+ %_5.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.329) | |
? ^ | |
- %lambda_module.65 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.23) | |
? ^ | |
+ %lambda_module.65 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.21) | |
? ^ | |
- %attention.267 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.65) | |
? ^ | |
+ %attention.265 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.65) | |
? ^ | |
- %linear_layers.179 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.267) | |
? ^ ^ | |
+ %linear_layers.177 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.265) | |
? ^ ^ | |
- %_2.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.179) | |
? ^ ^ ^ | |
+ %_1.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.177) | |
? ^ ^ ^ | |
%model.329 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.327 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.329) | |
- %_5.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.327) | |
? - | |
+ %_5.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.327) | |
? + | |
- %lambda_module.63 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.21) | |
? - | |
+ %lambda_module.63 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.19) | |
? + | |
- %attention.265 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.63) | |
? ^ | |
+ %attention.263 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.63) | |
? ^ | |
- %linear_layers.177 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.265) | |
? ^ ^ | |
+ %linear_layers.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.263) | |
? ^ ^ | |
- %_1.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.177) | |
? ^ ^ ^ ^ | |
+ %_0.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.175) | |
? ^ ^ ^ ^ | |
%model.327 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.325 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.327) | |
- %_5.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.325) | |
? ^ | |
+ %_5.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.325) | |
? ^ | |
+ %input_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_5.17) | |
- %lambda_module.61 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.19) | |
- %attention.263 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.61) | |
- %linear_layers.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.263) | |
- %_0.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.175) | |
%model.325 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.323 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.325) | |
- %_5.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.323) | |
? ^^^^ ^ | |
+ %_4 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.323) | |
? ^ ^ | |
- %input_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_5.17) | |
+ %dropout.61 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_4) | |
%model.323 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.321 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.323) | |
- %_4 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.321) | |
+ %_4.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.321) | |
? +++ | |
- %dropout.61 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_4) | |
+ %feed_forward.39 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.39) | |
+ %w_2.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.39) | |
%model.321 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.319 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.321) | |
- %_4.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.319) | |
? ^ | |
+ %_4.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.319) | |
? ^ | |
- %feed_forward.39 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.39) | |
? ^ ^ | |
+ %feed_forward.37 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.37) | |
? ^ ^ | |
- %w_2.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.39) | |
? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.59 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.37) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.319 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.317 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.319) | |
- %_4.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.317) | |
? ^ | |
+ %_4.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.317) | |
? ^ | |
- %feed_forward.37 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.37) | |
? ^ ^ | |
+ %feed_forward.35 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.35) | |
? ^ ^ | |
- %dropout.59 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.37) | |
+ %activation.9 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.35) | |
%model.317 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.315 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.317) | |
- %_4.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.315) | |
? ^ | |
+ %_4.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.315) | |
? ^ | |
- %feed_forward.35 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.35) | |
? ^ ^ | |
+ %feed_forward.33 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.33) | |
? ^ ^ | |
- %activation.9 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.35) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.33) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.315 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.313 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.315) | |
- %_4.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.313) | |
? ^ | |
+ %_4.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.313) | |
? ^ | |
+ %output_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_4.31) | |
- %feed_forward.33 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.33) | |
- %w_1.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.33) | |
%model.313 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.311 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.313) | |
- %_4.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.311) | |
? ^^ | |
+ %_4.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.311) | |
? ^^ | |
- %output_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_4.31) | |
+ %lambda_module.61 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.29) | |
+ %attention.261 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.61) | |
+ %output_linear.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.261) | |
%model.311 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.309 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.311) | |
- %_4.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.309) | |
? ^ | |
+ %_4.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.309) | |
? ^ | |
- %lambda_module.59 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.29) | |
? ^ | |
+ %lambda_module.59 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.27) | |
? ^ | |
- %attention.261 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.59) | |
? ^^ | |
+ %attention.259 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.59) | |
? ^^ | |
- %output_linear.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.261) | |
+ %dropout.53 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.259) | |
+ %dropout.55 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.53) | |
%model.309 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.307 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.309) | |
- %_4.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.307) | |
? ^ | |
+ %_4.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.307) | |
? ^ | |
- %lambda_module.57 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.27) | |
? ^ | |
+ %lambda_module.57 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.25) | |
? ^ | |
- %attention.259 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.57) | |
? ^ | |
+ %attention.255 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.57) | |
? ^ | |
+ %attention.257 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.255) | |
- %dropout.53 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.259) | |
- %dropout.55 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.53) | |
%model.307 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.305 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.307) | |
- %_4.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.305) | |
? ^ | |
+ %_4.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.305) | |
? ^ | |
- %lambda_module.55 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.25) | |
? ^ | |
+ %lambda_module.55 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.23) | |
? ^ | |
- %attention.255 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.55) | |
? ^ | |
+ %attention.253 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.55) | |
? ^ | |
- %attention.257 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.255) | |
+ %linear_layers.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.253) | |
+ %_2.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.173) | |
%model.305 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.303 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.305) | |
- %_4.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.303) | |
? ^ | |
+ %_4.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.303) | |
? ^ | |
- %lambda_module.53 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.23) | |
? ^ | |
+ %lambda_module.53 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.21) | |
? ^ | |
- %attention.253 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.53) | |
? ^ | |
+ %attention.251 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.53) | |
? ^ | |
- %linear_layers.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.253) | |
? ^ ^ | |
+ %linear_layers.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.251) | |
? ^ ^ | |
- %_2.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.173) | |
? ^ ^ ^ | |
+ %_1.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.171) | |
? ^ ^ ^ | |
%model.303 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.301 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.303) | |
- %_4.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.301) | |
? - | |
+ %_4.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.301) | |
? + | |
- %lambda_module.51 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.21) | |
? - | |
+ %lambda_module.51 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.19) | |
? + | |
- %attention.251 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.51) | |
? ^^ | |
+ %attention.249 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.51) | |
? ^^ | |
- %linear_layers.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.251) | |
? ^^ ^^ | |
+ %linear_layers.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.249) | |
? ^^ ^^ | |
- %_1.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.171) | |
? ^^^ ^ ^^ | |
+ %_0.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.169) | |
? ++ ^^ ^ ^^ | |
%model.301 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.299 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.301) | |
- %_4.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.299) | |
? ^ | |
+ %_4.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.299) | |
? ^ | |
+ %input_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_4.17) | |
- %lambda_module.49 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.19) | |
- %attention.249 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.49) | |
- %linear_layers.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.249) | |
- %_0.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.169) | |
%model.299 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.297 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.299) | |
- %_4.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.297) | |
? ^^^^ ^ | |
+ %_3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.297) | |
? ^ ^ | |
- %input_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_4.17) | |
+ %dropout.49 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_3) | |
%model.297 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.295 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.297) | |
- %_3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.295) | |
+ %_3.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.295) | |
? +++ | |
- %dropout.49 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_3) | |
+ %feed_forward.31 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.39) | |
+ %w_2.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.31) | |
%model.295 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.293 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.295) | |
- %_3.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.293) | |
? ^ | |
+ %_3.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.293) | |
? ^ | |
- %feed_forward.31 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.39) | |
? ^^ ^ | |
+ %feed_forward.29 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.37) | |
? ^^ ^ | |
- %w_2.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.31) | |
? ^^^ ^^^^^ ^^^^^ ^^^ ^^ | |
+ %dropout.47 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.29) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^^ | |
%model.293 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.291 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.293) | |
- %_3.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.291) | |
? ^ | |
+ %_3.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.291) | |
? ^ | |
- %feed_forward.29 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.37) | |
? ^ ^ | |
+ %feed_forward.27 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.35) | |
? ^ ^ | |
- %dropout.47 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.29) | |
+ %activation.7 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.27) | |
%model.291 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.289 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.291) | |
- %_3.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.289) | |
? ^ | |
+ %_3.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.289) | |
? ^ | |
- %feed_forward.27 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.35) | |
? ^ ^ | |
+ %feed_forward.25 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.33) | |
? ^ ^ | |
- %activation.7 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.27) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.25) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.289 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.287 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.289) | |
- %_3.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.287) | |
? ^ | |
+ %_3.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.287) | |
? ^ | |
+ %output_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_3.31) | |
- %feed_forward.25 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.33) | |
- %w_1.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.25) | |
%model.287 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.285 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.287) | |
- %_3.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.285) | |
? ^^ | |
+ %_3.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.285) | |
? ^^ | |
- %output_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_3.31) | |
+ %lambda_module.49 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.29) | |
+ %attention.247 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.49) | |
+ %output_linear.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.247) | |
%model.285 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.283 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.285) | |
- %_3.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.283) | |
? ^ | |
+ %_3.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.283) | |
? ^ | |
- %lambda_module.47 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.29) | |
? ^ | |
+ %lambda_module.47 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.27) | |
? ^ | |
- %attention.247 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.47) | |
? ^ | |
+ %attention.245 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.47) | |
? ^ | |
- %output_linear.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.247) | |
+ %dropout.41 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.245) | |
+ %dropout.43 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.41) | |
%model.283 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.281 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.283) | |
- %_3.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.281) | |
? ^ | |
+ %_3.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.281) | |
? ^ | |
- %lambda_module.45 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.27) | |
? ^ | |
+ %lambda_module.45 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.25) | |
? ^ | |
- %attention.245 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.45) | |
? ^ | |
+ %attention.241 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.45) | |
? ^ | |
+ %attention.243 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.241) | |
- %dropout.41 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.245) | |
- %dropout.43 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.41) | |
%model.281 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.279 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.281) | |
- %_3.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.279) | |
? ^ | |
+ %_3.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.279) | |
? ^ | |
- %lambda_module.43 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.25) | |
? ^ | |
+ %lambda_module.43 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.23) | |
? ^ | |
- %attention.241 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.43) | |
? ^^ | |
+ %attention.239 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.43) | |
? ^^ | |
- %attention.243 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.241) | |
+ %linear_layers.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.239) | |
+ %_2.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.167) | |
%model.279 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.277 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.279) | |
- %_3.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.277) | |
? ^ | |
+ %_3.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.277) | |
? ^ | |
- %lambda_module.41 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.23) | |
? ^ | |
+ %lambda_module.41 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.21) | |
? ^ | |
- %attention.239 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.41) | |
? ^ | |
+ %attention.237 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.41) | |
? ^ | |
- %linear_layers.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.239) | |
? ^ ^ | |
+ %linear_layers.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.237) | |
? ^ ^ | |
- %_2.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.167) | |
? ^ ^ ^ | |
+ %_1.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.165) | |
? ^ ^ ^ | |
%model.277 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.275 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.277) | |
- %_3.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.275) | |
? - | |
+ %_3.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.275) | |
? + | |
- %lambda_module.39 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.21) | |
? - | |
+ %lambda_module.39 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.19) | |
? + | |
- %attention.237 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.39) | |
? ^ | |
+ %attention.235 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.39) | |
? ^ | |
- %linear_layers.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.237) | |
? ^ ^ | |
+ %linear_layers.163 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.235) | |
? ^ ^ | |
- %_1.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.165) | |
? ^ ^ ^ ^ | |
+ %_0.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.163) | |
? ^ ^ ^ ^ | |
%model.275 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.273 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.275) | |
- %_3.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.273) | |
? ^ | |
+ %_3.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.273) | |
? ^ | |
+ %input_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_3.17) | |
- %lambda_module.37 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.19) | |
- %attention.235 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.37) | |
- %linear_layers.163 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.235) | |
- %_0.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.163) | |
%model.273 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.271 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.273) | |
- %_3.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.271) | |
? ^ ^^ ^ | |
+ %_2.95 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.271) | |
? ^ ^^ ^ | |
- %input_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_3.17) | |
+ %dropout.37 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_2.95) | |
%model.271 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.269 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.271) | |
- %_2.95 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.269) | |
? ^ | |
+ %_2.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.269) | |
? ^ | |
- %dropout.37 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_2.95) | |
+ %feed_forward.23 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.93) | |
+ %w_2.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.23) | |
%model.269 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.267 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.269) | |
- %_2.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.267) | |
? ^ | |
+ %_2.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.267) | |
? ^ | |
- %feed_forward.23 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.93) | |
? ^ ^ | |
+ %feed_forward.21 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.91) | |
? ^ ^ | |
- %w_2.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.23) | |
? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.35 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.21) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.267 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.265 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.267) | |
- %_2.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.265) | |
? - | |
+ %_2.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.265) | |
? + | |
- %feed_forward.21 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.91) | |
? - - | |
+ %feed_forward.19 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.89) | |
? + + | |
- %dropout.35 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.21) | |
+ %activation.5 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.19) | |
%model.265 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.263 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.265) | |
- %_2.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.263) | |
? ^ | |
+ %_2.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.263) | |
? ^ | |
- %feed_forward.19 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.89) | |
? ^ ^ | |
+ %feed_forward.17 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.87) | |
? ^ ^ | |
- %activation.5 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.19) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.17) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.263 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.261 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.263) | |
- %_2.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.261) | |
? ^ | |
+ %_2.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.261) | |
? ^ | |
+ %output_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_2.85) | |
- %feed_forward.17 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.87) | |
- %w_1.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.17) | |
%model.261 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.259 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.261) | |
- %_2.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.259) | |
? ^ | |
+ %_2.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.259) | |
? ^ | |
- %output_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_2.85) | |
+ %lambda_module.37 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.83) | |
+ %attention.233 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.37) | |
+ %output_linear.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.233) | |
%model.259 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.257 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.259) | |
- %_2.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.257) | |
? ^ | |
+ %_2.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.257) | |
? ^ | |
- %lambda_module.35 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.83) | |
? ^ | |
+ %lambda_module.35 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.81) | |
? ^ | |
- %attention.233 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.35) | |
? ^ | |
+ %attention.231 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.35) | |
? ^ | |
- %output_linear.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.233) | |
+ %dropout.29 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.231) | |
+ %dropout.31 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.29) | |
%model.257 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.255 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.257) | |
- %_2.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.255) | |
? ^^ | |
+ %_2.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.255) | |
? ^^ | |
- %lambda_module.33 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.81) | |
? ^^ | |
+ %lambda_module.33 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.79) | |
? ^^ | |
- %attention.231 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.33) | |
? ^^ | |
+ %attention.227 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.33) | |
? ^^ | |
+ %attention.229 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.227) | |
- %dropout.29 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.231) | |
- %dropout.31 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.29) | |
%model.255 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.253 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.255) | |
- %_2.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.253) | |
? ^ | |
+ %_2.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.253) | |
? ^ | |
- %lambda_module.31 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.79) | |
? ^ | |
+ %lambda_module.31 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.75) | |
? ^ | |
- %attention.227 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.31) | |
? ^ | |
+ %attention.225 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.31) | |
? ^ | |
- %attention.229 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.227) | |
+ %linear_layers.161 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.225) | |
+ %_2.77 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.161) | |
%model.253 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.251 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.253) | |
- %_2.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.251) | |
? ^ | |
+ %_2.73 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.251) | |
? ^ | |
- %lambda_module.29 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.75) | |
? ^ | |
+ %lambda_module.29 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.73) | |
? ^ | |
- %attention.225 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.29) | |
? ^ | |
+ %attention.223 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.29) | |
? ^ | |
- %linear_layers.161 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.225) | |
? ^^ ^ | |
+ %linear_layers.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.223) | |
? ^^ ^ | |
- %_2.77 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.161) | |
? ^ ^^ ^ ^^ | |
+ %_1.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.159) | |
? ^ ^^ ^ ^^ | |
%model.251 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.249 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.251) | |
- %_2.73 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.249) | |
? ^ | |
+ %_2.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.249) | |
? ^ | |
- %lambda_module.27 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.73) | |
? ^ | |
+ %lambda_module.27 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.71) | |
? ^ | |
- %attention.223 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.27) | |
? ^ | |
+ %attention.221 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.27) | |
? ^ | |
- %linear_layers.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.223) | |
? ^ ^ | |
+ %linear_layers.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.221) | |
? ^ ^ | |
- %_1.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.159) | |
? ^ ^ ^ ^ | |
+ %_0.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.157) | |
? ^ ^ ^ ^ | |
%model.249 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.247 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.249) | |
- %_2.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.247) | |
? ^^ | |
+ %_2.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.247) | |
? ^^ | |
+ %input_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_2.69) | |
- %lambda_module.25 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.71) | |
- %attention.221 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.25) | |
- %linear_layers.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.221) | |
- %_0.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.157) | |
%model.247 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.245 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.247) | |
- %_2.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.245) | |
? ^ - ^ | |
+ %_1.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.245) | |
? ^ + ^ | |
- %input_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_2.69) | |
+ %dropout.25 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_1.93) | |
%model.245 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.243 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.245) | |
- %_1.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.243) | |
? ^ | |
+ %_1.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.243) | |
? ^ | |
- %dropout.25 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_1.93) | |
+ %feed_forward.15 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.91) | |
+ %w_2.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.15) | |
%model.243 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.241 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.243) | |
- %_1.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.241) | |
? - | |
+ %_1.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.241) | |
? + | |
- %feed_forward.15 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.91) | |
? ^ - | |
+ %feed_forward.13 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.89) | |
? ^ + | |
- %w_2.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.15) | |
? ^^ - ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.23 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.13) | |
? ^^^^^^^^ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.241 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.239 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.241) | |
- %_1.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.239) | |
? ^ | |
+ %_1.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.239) | |
? ^ | |
- %feed_forward.13 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.89) | |
? ^ ^ | |
+ %feed_forward.11 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.87) | |
? ^ ^ | |
- %dropout.23 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.13) | |
+ %activation.3 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.11) | |
%model.239 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.237 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.239) | |
- %_1.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.237) | |
? ^ | |
+ %_1.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.237) | |
? ^ | |
- %feed_forward.11 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.87) | |
? ^^ ^ | |
+ %feed_forward.9 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.85) | |
? ^ ^ | |
- %activation.3 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.11) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^^ | |
+ %w_1.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.9) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.237 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.235 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.237) | |
- %_1.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.235) | |
? ^ | |
+ %_1.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.235) | |
? ^ | |
+ %output_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_1.83) | |
- %feed_forward.9 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.85) | |
- %w_1.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.9) | |
%model.235 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.233 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.235) | |
- %_1.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.233) | |
? ^ | |
+ %_1.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.233) | |
? ^ | |
- %output_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_1.83) | |
+ %lambda_module.25 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.81) | |
+ %attention.219 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.25) | |
+ %output_linear.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.219) | |
%model.233 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.231 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.233) | |
- %_1.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.231) | |
? ^^ | |
+ %_1.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.231) | |
? ^^ | |
- %lambda_module.23 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.81) | |
? ^^ | |
+ %lambda_module.23 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.79) | |
? ^^ | |
- %attention.219 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.23) | |
? ^ | |
+ %attention.217 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.23) | |
? ^ | |
- %output_linear.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.219) | |
+ %dropout.17 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.217) | |
+ %dropout.19 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.17) | |
%model.231 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.229 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.231) | |
- %_1.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.229) | |
? ^ | |
+ %_1.77 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.229) | |
? ^ | |
- %lambda_module.21 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.79) | |
? ^ | |
+ %lambda_module.21 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.77) | |
? ^ | |
- %attention.217 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.21) | |
? ^ | |
+ %attention.213 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.21) | |
? ^ | |
+ %attention.215 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.213) | |
- %dropout.17 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.217) | |
- %dropout.19 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.17) | |
%model.229 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.227 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.229) | |
- %_1.77 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.227) | |
? ^ | |
+ %_1.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.227) | |
? ^ | |
- %lambda_module.19 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.77) | |
? ^ | |
+ %lambda_module.19 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.75) | |
? ^ | |
- %attention.213 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.19) | |
? ^ | |
+ %attention.211 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.19) | |
? ^ | |
- %attention.215 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.213) | |
+ %linear_layers.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.211) | |
+ %_2.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.155) | |
%model.227 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.225 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.227) | |
- %_1.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.225) | |
? ^ | |
+ %_1.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.225) | |
? ^ | |
- %lambda_module.17 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.75) | |
? ^ | |
+ %lambda_module.17 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.71) | |
? ^ | |
- %attention.211 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.17) | |
? ^^ | |
+ %attention.209 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.17) | |
? ^^ | |
- %linear_layers.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.211) | |
? ^ ^^ | |
+ %linear_layers.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.209) | |
? ^ ^^ | |
- %_2.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.155) | |
? ^ - ^ ^ | |
+ %_1.73 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.153) | |
? ^ + ^ ^ | |
%model.225 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.223 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.225) | |
- %_1.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.223) | |
? ^^ | |
+ %_1.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.223) | |
? ^^ | |
- %lambda_module.15 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.71) | |
? ^^ | |
+ %lambda_module.15 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.69) | |
? ^^ | |
- %attention.209 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.15) | |
? ^ | |
+ %attention.207 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.15) | |
? ^ | |
- %linear_layers.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.209) | |
? ^ ^ | |
+ %linear_layers.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.207) | |
? ^ ^ | |
- %_1.73 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.153) | |
? ^ ^^ ^ ^ | |
+ %_0.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.151) | |
? ^ ^^ ^ ^ | |
%model.223 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.221 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.223) | |
- %_1.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.221) | |
? ^ | |
+ %_1.67 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.221) | |
? ^ | |
+ %input_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_1.67) | |
- %lambda_module.13 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.69) | |
- %attention.207 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.13) | |
- %linear_layers.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.207) | |
- %_0.93 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.151) | |
%model.221 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.219 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.221) | |
- %_1.67 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.219) | |
? ^ ^^ ^ | |
+ %_0.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.219) | |
? ^ ^^ ^ | |
- %input_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_1.67) | |
+ %dropout.13 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_0.93) | |
%model.219 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.217 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.219) | |
%_0.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.217) | |
- %dropout.13 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_0.91) | |
+ %feed_forward.7 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.91) | |
+ %w_2.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.7) | |
%model.217 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.215 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.217) | |
%_0.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.215) | |
- %feed_forward.7 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.89) | |
? ^ | |
+ %feed_forward.5 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.89) | |
? ^ | |
- %w_2.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.7) | |
? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
+ %dropout.11 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.5) | |
? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
%model.215 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.213 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.215) | |
%_0.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.213) | |
- %feed_forward.5 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.87) | |
? ^ | |
+ %feed_forward.3 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.87) | |
? ^ | |
- %dropout.11 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.5) | |
+ %activation.1 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.3) | |
%model.213 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.211 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.213) | |
%_0.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.211) | |
- %feed_forward.3 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.85) | |
? ^ | |
+ %feed_forward.1 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.85) | |
? ^ | |
- %activation.1 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.3) | |
? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
+ %w_1.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.1) | |
? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
%model.211 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.209 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.211) | |
%_0.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.209) | |
+ %output_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_0.83) | |
- %feed_forward.1 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.83) | |
- %w_1.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.1) | |
%model.209 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.207 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.209) | |
%_0.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.207) | |
- %output_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_0.81) | |
+ %lambda_module.13 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.81) | |
+ %attention.205 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.13) | |
+ %output_linear.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.205) | |
%model.207 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.205 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.207) | |
%_0.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.205) | |
%lambda_module.11 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.79) | |
- %attention.205 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.11) | |
? ^ | |
+ %attention.203 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.11) | |
? ^ | |
- %output_linear.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.205) | |
+ %dropout.5 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.203) | |
+ %dropout.7 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.5) | |
%model.205 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.203 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.205) | |
%_0.77 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.203) | |
%lambda_module.9 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.77) | |
- %attention.203 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.9) | |
? ^^^ | |
+ %attention.199 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.9) | |
? ^^^ | |
+ %attention.201 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.199) | |
- %dropout.5 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.203) | |
- %dropout.7 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.5) | |
%model.203 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.201 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.203) | |
%_0.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.201) | |
%lambda_module.7 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.75) | |
- %attention.199 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.7) | |
? ^ | |
+ %attention.197 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.7) | |
? ^ | |
- %attention.201 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.199) | |
+ %linear_layers.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.197) | |
+ %_2.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.149) | |
%model.201 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.199 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.201) | |
%_0.73 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.199) | |
%lambda_module.5 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.73) | |
- %attention.197 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.5) | |
? ^ | |
+ %attention.195 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.5) | |
? ^ | |
- %linear_layers.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.197) | |
? ^ ^ | |
+ %linear_layers.147 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.195) | |
? ^ ^ | |
- %_2.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.149) | |
? ^ ^ ^ | |
+ %_1.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.147) | |
? ^ ^ ^ | |
%model.199 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.197 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.199) | |
- %_0.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.197) | |
? ^^ | |
+ %_0.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.197) | |
? ^^ | |
- %lambda_module.3 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.71) | |
? ^^ | |
+ %lambda_module.3 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.69) | |
? ^^ | |
- %attention.195 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.3) | |
? ^ | |
+ %attention.193 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.3) | |
? ^ | |
- %linear_layers.147 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.195) | |
? ^ ^ | |
+ %linear_layers.145 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.193) | |
? ^ ^ | |
- %_1.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.147) | |
? --- ^ ^ | |
+ %_0.71 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.145) | |
? +++ ^ ^ | |
%model.197 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.195 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.197) | |
%_0.67 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.195) | |
+ %input_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_0.67) | |
- %lambda_module.1 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.67) | |
- %attention.193 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.1) | |
- %linear_layers.145 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.193) | |
- %_0.69 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.145) | |
%model.195 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %embedding : __torch__.bert_pytorch.model.embedding.bert.BERTEmbedding = prim::GetAttr[name="embedding"](%model.195) | |
- %transformer_blocks.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.195) | |
- %_0.65 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.193) | |
- %input_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_0.65) | |
%model.193 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %embedding : __torch__.bert_pytorch.model.embedding.bert.BERTEmbedding = prim::GetAttr[name="embedding"](%model.193) | |
- %model.191 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.191) | |
? ^ ^ | |
+ %transformer_blocks.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.193) | |
? ^ ^ | |
- %_11.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.191) | |
? ^ | |
+ %_11.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.193) | |
? ^ | |
%attention.191 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.15) | |
%output_linear.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.191) | |
%bias.95 : Tensor = prim::GetAttr[name="bias"](%output_linear.47) | |
- %model.189 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? - | |
+ %model.191 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? + | |
- %transformer_blocks.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.189) | |
? - - | |
+ %transformer_blocks.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.191) | |
? + + | |
- %_11.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.189) | |
? - | |
+ %_11.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.191) | |
? + | |
%attention.189 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.13) | |
%output_linear.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.189) | |
%weight.99 : Tensor = prim::GetAttr[name="weight"](%output_linear.45) | |
- %model.187 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.189 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.187) | |
? ^ ^ | |
+ %transformer_blocks.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.189) | |
? ^ ^ | |
- %_11.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.187) | |
? ^ | |
+ %_11.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.189) | |
? ^ | |
%attention.187 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.11) | |
%linear_layers.143 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.187) | |
%_2.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.143) | |
%bias.93 : Tensor = prim::GetAttr[name="bias"](%_2.63) | |
- %model.185 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.187 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.185) | |
? ^ ^ | |
+ %transformer_blocks.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.187) | |
? ^ ^ | |
- %_11.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.185) | |
? ^ | |
+ %_11.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.187) | |
? ^ | |
%attention.185 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.9) | |
%linear_layers.141 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.185) | |
%_2.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.141) | |
%weight.97 : Tensor = prim::GetAttr[name="weight"](%_2.61) | |
- %model.183 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.185 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.183) | |
? ^ ^ | |
+ %transformer_blocks.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.185) | |
? ^ ^ | |
- %_11.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.183) | |
? ^ | |
+ %_11.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.185) | |
? ^ | |
%attention.183 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.7) | |
%linear_layers.139 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.183) | |
%_1.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.139) | |
%bias.91 : Tensor = prim::GetAttr[name="bias"](%_1.63) | |
- %model.181 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.183 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.181) | |
? ^ ^ | |
+ %transformer_blocks.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.183) | |
? ^ ^ | |
- %_11.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.181) | |
? ^ | |
+ %_11.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.183) | |
? ^ | |
%attention.181 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.5) | |
%linear_layers.137 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.181) | |
%_1.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.137) | |
%weight.95 : Tensor = prim::GetAttr[name="weight"](%_1.61) | |
+ %model.181 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.181) | |
+ %_11.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.181) | |
+ %attention.179 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.3) | |
+ %linear_layers.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.179) | |
+ %_0.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.135) | |
+ %bias.89 : Tensor = prim::GetAttr[name="bias"](%_0.65) | |
%model.179 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.179 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.179) | |
- %_11.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.179) | |
? ^ | |
+ %_11.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.179) | |
? ^ | |
- %attention.179 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.3) | |
? ^ ^ | |
+ %attention.177 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.1) | |
? ^ ^ | |
- %linear_layers.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.179) | |
? ^ ^ | |
+ %linear_layers.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.177) | |
? ^ ^ | |
- %_0.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.135) | |
? ^ | |
+ %_0.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.133) | |
? ^ | |
- %bias.89 : Tensor = prim::GetAttr[name="bias"](%_0.63) | |
? ^ ^^ - ^ ^^ | |
+ %weight.93 : Tensor = prim::GetAttr[name="weight"](%_0.63) | |
? ^^ ^^^ + ^^ ^^^ | |
%model.177 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.177 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.177) | |
- %_11.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.177) | |
- %attention.177 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.1) | |
- %linear_layers.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.177) | |
- %_0.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.133) | |
- %weight.93 : Tensor = prim::GetAttr[name="weight"](%_0.61) | |
- %model.175 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.175) | |
- %_10.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.175) | |
? ^ | |
+ %_10.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.177) | |
? ^ | |
%attention.175 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.15) | |
%output_linear.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.175) | |
%bias.87 : Tensor = prim::GetAttr[name="bias"](%output_linear.43) | |
- %model.173 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.175 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.173) | |
? ^ ^ | |
+ %transformer_blocks.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.175) | |
? ^ ^ | |
- %_10.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.173) | |
? ^ | |
+ %_10.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.175) | |
? ^ | |
%attention.173 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.13) | |
%output_linear.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.173) | |
%weight.91 : Tensor = prim::GetAttr[name="weight"](%output_linear.41) | |
- %model.171 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.173 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.171) | |
? ^ ^ | |
+ %transformer_blocks.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.173) | |
? ^ ^ | |
- %_10.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.171) | |
? ^ | |
+ %_10.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.173) | |
? ^ | |
%attention.171 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.11) | |
%linear_layers.131 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.171) | |
%_2.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.131) | |
%bias.85 : Tensor = prim::GetAttr[name="bias"](%_2.59) | |
- %model.169 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.171 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.169) | |
? ^^ ^^ | |
+ %transformer_blocks.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.171) | |
? ^^ ^^ | |
- %_10.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.169) | |
? ^^ | |
+ %_10.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.171) | |
? ^^ | |
%attention.169 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.9) | |
%linear_layers.129 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.169) | |
%_2.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.129) | |
%weight.89 : Tensor = prim::GetAttr[name="weight"](%_2.57) | |
- %model.167 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.169 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.167) | |
? ^ ^ | |
+ %transformer_blocks.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.169) | |
? ^ ^ | |
- %_10.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.167) | |
? ^ | |
+ %_10.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.169) | |
? ^ | |
%attention.167 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.7) | |
%linear_layers.127 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.167) | |
%_1.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.127) | |
%bias.83 : Tensor = prim::GetAttr[name="bias"](%_1.59) | |
- %model.165 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.167 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.165) | |
? ^ ^ | |
+ %transformer_blocks.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.167) | |
? ^ ^ | |
- %_10.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.165) | |
? ^ | |
+ %_10.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.167) | |
? ^ | |
%attention.165 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.5) | |
%linear_layers.125 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.165) | |
%_1.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.125) | |
%weight.87 : Tensor = prim::GetAttr[name="weight"](%_1.57) | |
+ %model.165 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.165) | |
+ %_10.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.165) | |
+ %attention.163 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.3) | |
+ %linear_layers.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.163) | |
+ %_0.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.123) | |
+ %bias.81 : Tensor = prim::GetAttr[name="bias"](%_0.61) | |
%model.163 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.163 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.163) | |
- %_10.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.163) | |
? ^ | |
+ %_10.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.163) | |
? ^ | |
- %attention.163 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.3) | |
? ^ ^ | |
+ %attention.161 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.1) | |
? ^ ^ | |
- %linear_layers.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.163) | |
? ^ ^ | |
+ %linear_layers.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.161) | |
? ^ ^ | |
- %_0.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.123) | |
? ^ | |
+ %_0.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.121) | |
? ^ | |
- %bias.81 : Tensor = prim::GetAttr[name="bias"](%_0.59) | |
? ^ ^^ ^ ^ ^^ | |
+ %weight.85 : Tensor = prim::GetAttr[name="weight"](%_0.59) | |
? ^^ ^^^ ^ ^^ ^^^ | |
%model.161 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.161 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.161) | |
- %_10.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.161) | |
- %attention.161 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.1) | |
- %linear_layers.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.161) | |
- %_0.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.121) | |
- %weight.85 : Tensor = prim::GetAttr[name="weight"](%_0.57) | |
- %model.159 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.159) | |
- %_9.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.159) | |
? ^^ | |
+ %_9.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.161) | |
? ^^ | |
%attention.159 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.15) | |
%output_linear.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.159) | |
%bias.79 : Tensor = prim::GetAttr[name="bias"](%output_linear.39) | |
- %model.157 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.159 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.157) | |
? ^ ^ | |
+ %transformer_blocks.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.159) | |
? ^ ^ | |
- %_9.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.157) | |
? ^ | |
+ %_9.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.159) | |
? ^ | |
%attention.157 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.13) | |
%output_linear.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.157) | |
%weight.83 : Tensor = prim::GetAttr[name="weight"](%output_linear.37) | |
- %model.155 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.157 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.155) | |
? ^ ^ | |
+ %transformer_blocks.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.157) | |
? ^ ^ | |
- %_9.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.155) | |
? ^ | |
+ %_9.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.157) | |
? ^ | |
%attention.155 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.11) | |
%linear_layers.119 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.155) | |
%_2.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.119) | |
%bias.77 : Tensor = prim::GetAttr[name="bias"](%_2.55) | |
- %model.153 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.155 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.153) | |
? ^ ^ | |
+ %transformer_blocks.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.155) | |
? ^ ^ | |
- %_9.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.153) | |
? ^ | |
+ %_9.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.155) | |
? ^ | |
%attention.153 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.9) | |
%linear_layers.117 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.153) | |
%_2.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.117) | |
%weight.81 : Tensor = prim::GetAttr[name="weight"](%_2.53) | |
- %model.151 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.153 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.151) | |
? ^ ^ | |
+ %transformer_blocks.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.153) | |
? ^ ^ | |
- %_9.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.151) | |
? ^ | |
+ %_9.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.153) | |
? ^ | |
%attention.151 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.7) | |
%linear_layers.115 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.151) | |
%_1.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.115) | |
%bias.75 : Tensor = prim::GetAttr[name="bias"](%_1.55) | |
- %model.149 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.151 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.149) | |
? ^^ ^^ | |
+ %transformer_blocks.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.151) | |
? ^^ ^^ | |
- %_9.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.149) | |
? ^^ | |
+ %_9.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.151) | |
? ^^ | |
%attention.149 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.5) | |
%linear_layers.113 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.149) | |
%_1.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.113) | |
%weight.79 : Tensor = prim::GetAttr[name="weight"](%_1.53) | |
+ %model.149 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.149) | |
+ %_9.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.149) | |
+ %attention.147 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.3) | |
+ %linear_layers.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.147) | |
+ %_0.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.111) | |
+ %bias.73 : Tensor = prim::GetAttr[name="bias"](%_0.57) | |
%model.147 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.147 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.147) | |
- %_9.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.147) | |
? ^ | |
+ %_9.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.147) | |
? ^ | |
- %attention.147 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.3) | |
? ^ ^ | |
+ %attention.145 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.1) | |
? ^ ^ | |
- %linear_layers.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.147) | |
? ^^ ^ | |
+ %linear_layers.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.145) | |
? ^^ ^ | |
- %_0.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.111) | |
? ^^ | |
+ %_0.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.109) | |
? ^^ | |
- %bias.73 : Tensor = prim::GetAttr[name="bias"](%_0.55) | |
? ^ ^^ ^ ^ ^^ | |
+ %weight.77 : Tensor = prim::GetAttr[name="weight"](%_0.55) | |
? ^^ ^^^ ^ ^^ ^^^ | |
%model.145 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.145 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.145) | |
- %_9.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.145) | |
- %attention.145 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.1) | |
- %linear_layers.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.145) | |
- %_0.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.109) | |
- %weight.77 : Tensor = prim::GetAttr[name="weight"](%_0.53) | |
- %model.143 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.143 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.143) | |
- %_8.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.143) | |
? ^ | |
+ %_8.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.145) | |
? ^ | |
%attention.143 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.15) | |
%output_linear.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.143) | |
%bias.71 : Tensor = prim::GetAttr[name="bias"](%output_linear.35) | |
- %model.141 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.143 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.141 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.141) | |
? ^ ^ | |
+ %transformer_blocks.143 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.143) | |
? ^ ^ | |
- %_8.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.141) | |
? ^ | |
+ %_8.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.143) | |
? ^ | |
%attention.141 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.13) | |
%output_linear.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.141) | |
%weight.75 : Tensor = prim::GetAttr[name="weight"](%output_linear.33) | |
- %model.139 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.141 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.139 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.139) | |
? ^^ ^^ | |
+ %transformer_blocks.141 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.141) | |
? ^^ ^^ | |
- %_8.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.139) | |
? ^^ | |
+ %_8.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.141) | |
? ^^ | |
%attention.139 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.11) | |
%linear_layers.107 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.139) | |
%_2.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.107) | |
%bias.69 : Tensor = prim::GetAttr[name="bias"](%_2.51) | |
- %model.137 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.139 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.137 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.137) | |
? ^ ^ | |
+ %transformer_blocks.139 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.139) | |
? ^ ^ | |
- %_8.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.137) | |
? ^ | |
+ %_8.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.139) | |
? ^ | |
%attention.137 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.9) | |
%linear_layers.105 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.137) | |
%_2.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.105) | |
%weight.73 : Tensor = prim::GetAttr[name="weight"](%_2.49) | |
- %model.135 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.137 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.135) | |
? ^ ^ | |
+ %transformer_blocks.137 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.137) | |
? ^ ^ | |
- %_8.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.135) | |
? ^ | |
+ %_8.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.137) | |
? ^ | |
%attention.135 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.7) | |
%linear_layers.103 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.135) | |
%_1.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.103) | |
%bias.67 : Tensor = prim::GetAttr[name="bias"](%_1.51) | |
- %model.133 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.135 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.133) | |
? ^ ^ | |
+ %transformer_blocks.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.135) | |
? ^ ^ | |
- %_8.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.133) | |
? ^ | |
+ %_8.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.135) | |
? ^ | |
%attention.133 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.5) | |
%linear_layers.101 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.133) | |
%_1.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.101) | |
%weight.71 : Tensor = prim::GetAttr[name="weight"](%_1.49) | |
+ %model.133 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.133) | |
+ %_8.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.133) | |
+ %attention.131 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.3) | |
+ %linear_layers.99 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.131) | |
+ %_0.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.99) | |
+ %bias.65 : Tensor = prim::GetAttr[name="bias"](%_0.53) | |
%model.131 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.131 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.131) | |
- %_8.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.131) | |
? ^ | |
+ %_8.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.131) | |
? ^ | |
- %attention.131 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.3) | |
? ^^ ^ | |
+ %attention.129 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.1) | |
? ^^ ^ | |
- %linear_layers.99 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.131) | |
? ^ ^^ | |
+ %linear_layers.97 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.129) | |
? ^ ^^ | |
- %_0.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.99) | |
? ^ | |
+ %_0.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.97) | |
? ^ | |
- %bias.65 : Tensor = prim::GetAttr[name="bias"](%_0.51) | |
? ^ ^^ ^ ^ ^^ | |
+ %weight.69 : Tensor = prim::GetAttr[name="weight"](%_0.51) | |
? ^^ ^^^ ^ ^^ ^^^ | |
%model.129 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.129 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.129) | |
- %_8.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.129) | |
- %attention.129 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.1) | |
- %linear_layers.97 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.129) | |
- %_0.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.97) | |
- %weight.69 : Tensor = prim::GetAttr[name="weight"](%_0.49) | |
- %model.127 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.127 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.127) | |
- %_7.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.127) | |
? ^ | |
+ %_7.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.129) | |
? ^ | |
%attention.127 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.15) | |
%output_linear.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.127) | |
%bias.63 : Tensor = prim::GetAttr[name="bias"](%output_linear.31) | |
- %model.125 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.127 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.125 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.125) | |
? ^ ^ | |
+ %transformer_blocks.127 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.127) | |
? ^ ^ | |
- %_7.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.125) | |
? ^ | |
+ %_7.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.127) | |
? ^ | |
%attention.125 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.13) | |
%output_linear.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.125) | |
%weight.67 : Tensor = prim::GetAttr[name="weight"](%output_linear.29) | |
- %model.123 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.125 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.123) | |
? ^ ^ | |
+ %transformer_blocks.125 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.125) | |
? ^ ^ | |
- %_7.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.123) | |
? ^ | |
+ %_7.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.125) | |
? ^ | |
%attention.123 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.11) | |
%linear_layers.95 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.123) | |
%_2.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.95) | |
%bias.61 : Tensor = prim::GetAttr[name="bias"](%_2.47) | |
- %model.121 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.123 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.121) | |
? ^ ^ | |
+ %transformer_blocks.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.123) | |
? ^ ^ | |
- %_7.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.121) | |
? ^ | |
+ %_7.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.123) | |
? ^ | |
%attention.121 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.9) | |
%linear_layers.93 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.121) | |
%_2.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.93) | |
%weight.65 : Tensor = prim::GetAttr[name="weight"](%_2.45) | |
- %model.119 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? - | |
+ %model.121 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? + | |
- %transformer_blocks.119 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.119) | |
? - - | |
+ %transformer_blocks.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.121) | |
? + + | |
- %_7.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.119) | |
? - | |
+ %_7.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.121) | |
? + | |
%attention.119 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.7) | |
%linear_layers.91 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.119) | |
%_1.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.91) | |
%bias.59 : Tensor = prim::GetAttr[name="bias"](%_1.47) | |
- %model.117 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.119 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.117 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.117) | |
? ^ ^ | |
+ %transformer_blocks.119 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.119) | |
? ^ ^ | |
- %_7.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.117) | |
? ^ | |
+ %_7.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.119) | |
? ^ | |
%attention.117 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.5) | |
%linear_layers.89 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.117) | |
%_1.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.89) | |
%weight.63 : Tensor = prim::GetAttr[name="weight"](%_1.45) | |
+ %model.117 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.117 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.117) | |
+ %_7.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.117) | |
+ %attention.115 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.3) | |
+ %linear_layers.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.115) | |
+ %_0.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.87) | |
+ %bias.57 : Tensor = prim::GetAttr[name="bias"](%_0.49) | |
%model.115 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.115 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.115) | |
- %_7.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.115) | |
? ^ | |
+ %_7.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.115) | |
? ^ | |
- %attention.115 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.3) | |
? ^ ^ | |
+ %attention.113 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.1) | |
? ^ ^ | |
- %linear_layers.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.115) | |
? ^ ^ | |
+ %linear_layers.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.113) | |
? ^ ^ | |
- %_0.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.87) | |
? ^ | |
+ %_0.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.85) | |
? ^ | |
- %bias.57 : Tensor = prim::GetAttr[name="bias"](%_0.47) | |
? ^ ^^ ^^ ^ ^^ | |
+ %weight.61 : Tensor = prim::GetAttr[name="weight"](%_0.47) | |
? ^^ ^^^ ^^ ^^ ^^^ | |
%model.113 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.113 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.113) | |
- %_7.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.113) | |
- %attention.113 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.1) | |
- %linear_layers.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.113) | |
- %_0.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.85) | |
- %weight.61 : Tensor = prim::GetAttr[name="weight"](%_0.45) | |
- %model.111 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.111) | |
- %_6.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.111) | |
? ^ | |
+ %_6.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.113) | |
? ^ | |
%attention.111 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.15) | |
%output_linear.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.111) | |
%bias.55 : Tensor = prim::GetAttr[name="bias"](%output_linear.27) | |
- %model.109 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.111 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.109) | |
? ^^ ^^ | |
+ %transformer_blocks.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.111) | |
? ^^ ^^ | |
- %_6.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.109) | |
? ^^ | |
+ %_6.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.111) | |
? ^^ | |
%attention.109 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.13) | |
%output_linear.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.109) | |
%weight.59 : Tensor = prim::GetAttr[name="weight"](%output_linear.25) | |
- %model.107 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.109 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.107 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.107) | |
? ^ ^ | |
+ %transformer_blocks.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.109) | |
? ^ ^ | |
- %_6.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.107) | |
? ^ | |
+ %_6.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.109) | |
? ^ | |
%attention.107 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.11) | |
%linear_layers.83 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.107) | |
%_2.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.83) | |
%bias.53 : Tensor = prim::GetAttr[name="bias"](%_2.43) | |
- %model.105 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.107 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.105 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.105) | |
? ^ ^ | |
+ %transformer_blocks.107 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.107) | |
? ^ ^ | |
- %_6.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.105) | |
? ^ | |
+ %_6.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.107) | |
? ^ | |
%attention.105 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.9) | |
%linear_layers.81 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.105) | |
%_2.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.81) | |
%weight.57 : Tensor = prim::GetAttr[name="weight"](%_2.41) | |
- %model.103 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.105 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.103 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.103) | |
? ^ ^ | |
+ %transformer_blocks.105 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.105) | |
? ^ ^ | |
- %_6.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.103) | |
? ^ | |
+ %_6.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.105) | |
? ^ | |
%attention.103 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.7) | |
%linear_layers.79 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.103) | |
%_1.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.79) | |
%bias.51 : Tensor = prim::GetAttr[name="bias"](%_1.43) | |
- %model.101 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.103 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.101 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.101) | |
? ^ ^ | |
+ %transformer_blocks.103 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.103) | |
? ^ ^ | |
- %_6.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.101) | |
? ^ | |
+ %_6.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.103) | |
? ^ | |
%attention.101 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.5) | |
%linear_layers.77 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.101) | |
%_1.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.77) | |
%weight.55 : Tensor = prim::GetAttr[name="weight"](%_1.41) | |
+ %model.101 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.101 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.101) | |
+ %_6.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.101) | |
+ %attention.99 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.3) | |
+ %linear_layers.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.99) | |
+ %_0.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.75) | |
+ %bias.49 : Tensor = prim::GetAttr[name="bias"](%_0.45) | |
%model.99 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.99 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.99) | |
- %_6.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.99) | |
? ^ | |
+ %_6.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.99) | |
? ^ | |
- %attention.99 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.3) | |
? ^ ^ | |
+ %attention.97 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.1) | |
? ^ ^ | |
- %linear_layers.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.99) | |
? ^ ^ | |
+ %linear_layers.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.97) | |
? ^ ^ | |
- %_0.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.75) | |
? ^ | |
+ %_0.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.73) | |
? ^ | |
- %bias.49 : Tensor = prim::GetAttr[name="bias"](%_0.43) | |
? ^ ^^ ^^ ^ ^^ | |
+ %weight.53 : Tensor = prim::GetAttr[name="weight"](%_0.43) | |
? ^^ ^^^ ^^ ^^ ^^^ | |
%model.97 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.97 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.97) | |
- %_6.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.97) | |
- %attention.97 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.1) | |
- %linear_layers.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.97) | |
- %_0.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.73) | |
- %weight.53 : Tensor = prim::GetAttr[name="weight"](%_0.41) | |
- %model.95 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.95 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.95) | |
- %_5.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.95) | |
? ^ | |
+ %_5.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.97) | |
? ^ | |
%attention.95 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.15) | |
%output_linear.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.95) | |
%bias.47 : Tensor = prim::GetAttr[name="bias"](%output_linear.23) | |
- %model.93 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.95 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.93 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.93) | |
? ^ ^ | |
+ %transformer_blocks.95 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.95) | |
? ^ ^ | |
- %_5.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.93) | |
? ^ | |
+ %_5.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.95) | |
? ^ | |
%attention.93 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.13) | |
%output_linear.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.93) | |
%weight.51 : Tensor = prim::GetAttr[name="weight"](%output_linear.21) | |
- %model.91 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.93 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.91 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.91) | |
? ^ ^ | |
+ %transformer_blocks.93 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.93) | |
? ^ ^ | |
- %_5.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.91) | |
? ^ | |
+ %_5.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.93) | |
? ^ | |
%attention.91 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.11) | |
%linear_layers.71 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.91) | |
%_2.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.71) | |
%bias.45 : Tensor = prim::GetAttr[name="bias"](%_2.39) | |
- %model.89 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? - | |
+ %model.91 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? + | |
- %transformer_blocks.89 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.89) | |
? - - | |
+ %transformer_blocks.91 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.91) | |
? + + | |
- %_5.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.89) | |
? - | |
+ %_5.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.91) | |
? + | |
%attention.89 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.9) | |
%linear_layers.69 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.89) | |
%_2.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.69) | |
%weight.49 : Tensor = prim::GetAttr[name="weight"](%_2.37) | |
- %model.87 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.89 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.87) | |
? ^ ^ | |
+ %transformer_blocks.89 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.89) | |
? ^ ^ | |
- %_5.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.87) | |
? ^ | |
+ %_5.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.89) | |
? ^ | |
%attention.87 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.7) | |
%linear_layers.67 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.87) | |
%_1.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.67) | |
%bias.43 : Tensor = prim::GetAttr[name="bias"](%_1.39) | |
- %model.85 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.87 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.85) | |
? ^ ^ | |
+ %transformer_blocks.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.87) | |
? ^ ^ | |
- %_5.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.85) | |
? ^ | |
+ %_5.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.87) | |
? ^ | |
%attention.85 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.5) | |
%linear_layers.65 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.85) | |
%_1.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.65) | |
%weight.47 : Tensor = prim::GetAttr[name="weight"](%_1.37) | |
+ %model.85 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.85) | |
+ %_5.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.85) | |
+ %attention.83 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.3) | |
+ %linear_layers.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.83) | |
+ %_0.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.63) | |
+ %bias.41 : Tensor = prim::GetAttr[name="bias"](%_0.41) | |
%model.83 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.83 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.83) | |
- %_5.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.83) | |
? ^ | |
+ %_5.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.83) | |
? ^ | |
- %attention.83 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.3) | |
? ^ ^ | |
+ %attention.81 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.1) | |
? ^ ^ | |
- %linear_layers.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.83) | |
? ^ ^ | |
+ %linear_layers.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.81) | |
? ^ ^ | |
- %_0.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.63) | |
? ^ | |
+ %_0.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.61) | |
? ^ | |
- %bias.41 : Tensor = prim::GetAttr[name="bias"](%_0.39) | |
? ^ ^^ ^ ^ ^^ | |
+ %weight.45 : Tensor = prim::GetAttr[name="weight"](%_0.39) | |
? ^^ ^^^ ^ ^^ ^^^ | |
%model.81 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.81 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.81) | |
- %_5.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.81) | |
- %attention.81 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.1) | |
- %linear_layers.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.81) | |
- %_0.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.61) | |
- %weight.45 : Tensor = prim::GetAttr[name="weight"](%_0.37) | |
- %model.79 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.79 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.79) | |
- %_4.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.79) | |
? ^^ | |
+ %_4.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.81) | |
? ^^ | |
%attention.79 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.15) | |
%output_linear.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.79) | |
%bias.39 : Tensor = prim::GetAttr[name="bias"](%output_linear.19) | |
- %model.77 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.79 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.77 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.77) | |
? ^ ^ | |
+ %transformer_blocks.79 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.79) | |
? ^ ^ | |
- %_4.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.77) | |
? ^ | |
+ %_4.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.79) | |
? ^ | |
%attention.77 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.13) | |
%output_linear.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.77) | |
%weight.43 : Tensor = prim::GetAttr[name="weight"](%output_linear.17) | |
- %model.75 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.77 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.75) | |
? ^ ^ | |
+ %transformer_blocks.77 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.77) | |
? ^ ^ | |
- %_4.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.75) | |
? ^ | |
+ %_4.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.77) | |
? ^ | |
%attention.75 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.11) | |
%linear_layers.59 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.75) | |
%_2.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.59) | |
%bias.37 : Tensor = prim::GetAttr[name="bias"](%_2.35) | |
- %model.73 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.75 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.73) | |
? ^ ^ | |
+ %transformer_blocks.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.75) | |
? ^ ^ | |
- %_4.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.73) | |
? ^ | |
+ %_4.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.75) | |
? ^ | |
%attention.73 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.9) | |
%linear_layers.57 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.73) | |
%_2.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.57) | |
%weight.41 : Tensor = prim::GetAttr[name="weight"](%_2.33) | |
- %model.71 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.73 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.71 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.71) | |
? ^ ^ | |
+ %transformer_blocks.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.73) | |
? ^ ^ | |
- %_4.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.71) | |
? ^ | |
+ %_4.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.73) | |
? ^ | |
%attention.71 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.7) | |
%linear_layers.55 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.71) | |
%_1.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.55) | |
%bias.35 : Tensor = prim::GetAttr[name="bias"](%_1.35) | |
- %model.69 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.71 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.69 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.69) | |
? ^^ ^^ | |
+ %transformer_blocks.71 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.71) | |
? ^^ ^^ | |
- %_4.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.69) | |
? ^^ | |
+ %_4.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.71) | |
? ^^ | |
%attention.69 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.5) | |
%linear_layers.53 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.69) | |
%_1.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.53) | |
%weight.39 : Tensor = prim::GetAttr[name="weight"](%_1.33) | |
+ %model.69 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.69 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.69) | |
+ %_4.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.69) | |
+ %attention.67 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.3) | |
+ %linear_layers.51 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.67) | |
+ %_0.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.51) | |
+ %bias.33 : Tensor = prim::GetAttr[name="bias"](%_0.37) | |
%model.67 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.67 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.67) | |
- %_4.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.67) | |
? ^ | |
+ %_4.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.67) | |
? ^ | |
- %attention.67 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.3) | |
? ^ ^ | |
+ %attention.65 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.1) | |
? ^ ^ | |
- %linear_layers.51 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.67) | |
? ^^ ^ | |
+ %linear_layers.49 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.65) | |
? ^^ ^ | |
- %_0.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.51) | |
? ^^ | |
+ %_0.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.49) | |
? ^^ | |
- %bias.33 : Tensor = prim::GetAttr[name="bias"](%_0.35) | |
? ^ ^^ ^ ^ ^^ | |
+ %weight.37 : Tensor = prim::GetAttr[name="weight"](%_0.35) | |
? ^^ ^^^ ^ ^^ ^^^ | |
%model.65 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.65 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.65) | |
- %_4.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.65) | |
- %attention.65 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.1) | |
- %linear_layers.49 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.65) | |
- %_0.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.49) | |
- %weight.37 : Tensor = prim::GetAttr[name="weight"](%_0.33) | |
- %model.63 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.63) | |
- %_3.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.63) | |
? ^ | |
+ %_3.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.65) | |
? ^ | |
%attention.63 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.15) | |
%output_linear.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.63) | |
%bias.31 : Tensor = prim::GetAttr[name="bias"](%output_linear.15) | |
- %model.61 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.63 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.61) | |
? ^ ^ | |
+ %transformer_blocks.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.63) | |
? ^ ^ | |
- %_3.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.61) | |
? ^ | |
+ %_3.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.63) | |
? ^ | |
%attention.61 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.13) | |
%output_linear.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.61) | |
%weight.35 : Tensor = prim::GetAttr[name="weight"](%output_linear.13) | |
- %model.59 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.61 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.59 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.59) | |
? ^^ ^^ | |
+ %transformer_blocks.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.61) | |
? ^^ ^^ | |
- %_3.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.59) | |
? ^^ | |
+ %_3.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.61) | |
? ^^ | |
%attention.59 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.11) | |
%linear_layers.47 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.59) | |
%_2.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.47) | |
%bias.29 : Tensor = prim::GetAttr[name="bias"](%_2.31) | |
- %model.57 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.59 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.57 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.57) | |
? ^ ^ | |
+ %transformer_blocks.59 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.59) | |
? ^ ^ | |
- %_3.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.57) | |
? ^ | |
+ %_3.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.59) | |
? ^ | |
%attention.57 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.9) | |
%linear_layers.45 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.57) | |
%_2.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.45) | |
%weight.33 : Tensor = prim::GetAttr[name="weight"](%_2.29) | |
- %model.55 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.57 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.55 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.55) | |
? ^ ^ | |
+ %transformer_blocks.57 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.57) | |
? ^ ^ | |
- %_3.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.55) | |
? ^ | |
+ %_3.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.57) | |
? ^ | |
%attention.55 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.7) | |
%linear_layers.43 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.55) | |
%_1.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.43) | |
%bias.27 : Tensor = prim::GetAttr[name="bias"](%_1.31) | |
- %model.53 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.55 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.53 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.53) | |
? ^ ^ | |
+ %transformer_blocks.55 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.55) | |
? ^ ^ | |
- %_3.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.53) | |
? ^ | |
+ %_3.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.55) | |
? ^ | |
%attention.53 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.5) | |
%linear_layers.41 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.53) | |
%_1.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.41) | |
%weight.31 : Tensor = prim::GetAttr[name="weight"](%_1.29) | |
+ %model.53 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.53 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.53) | |
+ %_3.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.53) | |
+ %attention.51 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.3) | |
+ %linear_layers.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.51) | |
+ %_0.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.39) | |
+ %bias.25 : Tensor = prim::GetAttr[name="bias"](%_0.33) | |
%model.51 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.51 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.51) | |
- %_3.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.51) | |
? ^ | |
+ %_3.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.51) | |
? ^ | |
- %attention.51 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.3) | |
? ^^ ^ | |
+ %attention.49 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.1) | |
? ^^ ^ | |
- %linear_layers.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.51) | |
? ^ ^^ | |
+ %linear_layers.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.49) | |
? ^ ^^ | |
- %_0.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.39) | |
? ^ | |
+ %_0.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.37) | |
? ^ | |
- %bias.25 : Tensor = prim::GetAttr[name="bias"](%_0.31) | |
? ^ ^^ ^ ^ ^^ | |
+ %weight.29 : Tensor = prim::GetAttr[name="weight"](%_0.31) | |
? ^^ ^^^ ^ ^^ ^^^ | |
%model.49 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.49 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.49) | |
- %_3.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.49) | |
- %attention.49 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.1) | |
- %linear_layers.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.49) | |
- %_0.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.37) | |
- %weight.29 : Tensor = prim::GetAttr[name="weight"](%_0.29) | |
- %model.47 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.47 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.47) | |
- %_2.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.47) | |
? ^ | |
+ %_2.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.49) | |
? ^ | |
%attention.47 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.27) | |
%output_linear.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.47) | |
%bias.23 : Tensor = prim::GetAttr[name="bias"](%output_linear.11) | |
- %model.45 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.47 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.45 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.45) | |
? ^ ^ | |
+ %transformer_blocks.47 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.47) | |
? ^ ^ | |
- %_2.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.45) | |
? ^ | |
+ %_2.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.47) | |
? ^ | |
%attention.45 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.25) | |
%output_linear.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.45) | |
%weight.27 : Tensor = prim::GetAttr[name="weight"](%output_linear.9) | |
- %model.43 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.45 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.43 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.43) | |
? ^ ^ | |
+ %transformer_blocks.45 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.45) | |
? ^ ^ | |
- %_2.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.43) | |
? ^ | |
+ %_2.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.45) | |
? ^ | |
%attention.43 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.21) | |
%linear_layers.35 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.43) | |
%_2.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.35) | |
%bias.21 : Tensor = prim::GetAttr[name="bias"](%_2.23) | |
- %model.41 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.43 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.41 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.41) | |
? ^ ^ | |
+ %transformer_blocks.43 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.43) | |
? ^ ^ | |
- %_2.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.41) | |
? ^ | |
+ %_2.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.43) | |
? ^ | |
%attention.41 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.17) | |
%linear_layers.33 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.41) | |
%_2.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.33) | |
%weight.25 : Tensor = prim::GetAttr[name="weight"](%_2.19) | |
- %model.39 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.41 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.39) | |
? ^^ ^^ | |
+ %transformer_blocks.41 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.41) | |
? ^^ ^^ | |
- %_2.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.39) | |
? ^^ | |
+ %_2.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.41) | |
? ^^ | |
%attention.39 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.15) | |
%linear_layers.31 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.39) | |
%_1.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.31) | |
%bias.19 : Tensor = prim::GetAttr[name="bias"](%_1.27) | |
- %model.37 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.39 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.37) | |
? ^ ^ | |
+ %transformer_blocks.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.39) | |
? ^ ^ | |
- %_2.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.37) | |
? ^ | |
+ %_2.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.39) | |
? ^ | |
%attention.37 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.13) | |
%linear_layers.29 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.37) | |
%_1.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.29) | |
%weight.23 : Tensor = prim::GetAttr[name="weight"](%_1.25) | |
+ %model.37 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.37) | |
+ %_2.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.37) | |
+ %attention.35 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.11) | |
+ %linear_layers.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.35) | |
+ %_0.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.27) | |
+ %bias.17 : Tensor = prim::GetAttr[name="bias"](%_0.29) | |
%model.35 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.35 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.35) | |
- %_2.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.35) | |
? ^^ | |
+ %_2.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.35) | |
? ^ | |
- %attention.35 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.11) | |
? ^ ^^ | |
+ %attention.33 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.9) | |
? ^ ^ | |
- %linear_layers.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.35) | |
? ^ ^ | |
+ %linear_layers.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.33) | |
? ^ ^ | |
- %_0.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.27) | |
? ^ | |
+ %_0.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.25) | |
? ^ | |
- %bias.17 : Tensor = prim::GetAttr[name="bias"](%_0.27) | |
? ^ ^^ - ^ ^^ | |
+ %weight.21 : Tensor = prim::GetAttr[name="weight"](%_0.27) | |
? ^^ ^^^ + ^^ ^^^ | |
%model.33 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.33 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.33) | |
- %_2.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.33) | |
- %attention.33 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.9) | |
- %linear_layers.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.33) | |
- %_0.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.25) | |
- %weight.21 : Tensor = prim::GetAttr[name="weight"](%_0.25) | |
- %model.31 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.31 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.31) | |
- %_1.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.31) | |
? ^ | |
+ %_1.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.33) | |
? ^ | |
%attention.31 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.23) | |
%output_linear.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.31) | |
%bias.15 : Tensor = prim::GetAttr[name="bias"](%output_linear.7) | |
- %model.29 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
+ %model.31 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^^ | |
- %transformer_blocks.29 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.29) | |
? ^^ ^^ | |
+ %transformer_blocks.31 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.31) | |
? ^^ ^^ | |
- %_1.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.29) | |
? ^^ | |
+ %_1.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.31) | |
? ^^ | |
%attention.29 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.21) | |
%output_linear.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.29) | |
%weight.19 : Tensor = prim::GetAttr[name="weight"](%output_linear.5) | |
- %model.27 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.29 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.27) | |
? ^ ^ | |
+ %transformer_blocks.29 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.29) | |
? ^ ^ | |
- %_1.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.27) | |
? ^ | |
+ %_1.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.29) | |
? ^ | |
%attention.27 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.19) | |
%linear_layers.23 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.27) | |
%_2.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.23) | |
%bias.13 : Tensor = prim::GetAttr[name="bias"](%_2.7) | |
- %model.25 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.27 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.25) | |
? ^ ^ | |
+ %transformer_blocks.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.27) | |
? ^ ^ | |
- %_1.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.25) | |
? ^ | |
+ %_1.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.27) | |
? ^ | |
%attention.25 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.17) | |
%linear_layers.21 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.25) | |
%_2.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.21) | |
%weight.17 : Tensor = prim::GetAttr[name="weight"](%_2.5) | |
- %model.23 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.25 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.23 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.23) | |
? ^ ^ | |
+ %transformer_blocks.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.25) | |
? ^ ^ | |
- %_1.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.23) | |
? ^ | |
+ %_1.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.25) | |
? ^ | |
%attention.23 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.13) | |
%linear_layers.19 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.23) | |
%_1.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.19) | |
%bias.11 : Tensor = prim::GetAttr[name="bias"](%_1.15) | |
- %model.21 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
+ %model.23 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
? ^ | |
- %transformer_blocks.21 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.21) | |
? ^ ^ | |
+ %transformer_blocks.23 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.23) | |
? ^ ^ | |
- %_1.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.21) | |
? ^ | |
+ %_1.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.23) | |
? ^ | |
%attention.21 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.9) | |
%linear_layers.17 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.21) | |
%_1.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.17) | |
%weight.15 : Tensor = prim::GetAttr[name="weight"](%_1.11) | |
+ %model.21 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %transformer_blocks.21 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.21) | |
+ %_1.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.21) | |
+ %attention.19 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.7) | |
+ %linear_layers.15 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.19) | |
+ %_0.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.15) | |
+ %bias.9 : Tensor = prim::GetAttr[name="bias"](%_0.25) | |
%model.19 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.19 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.19) | |
- %_1.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.19) | |
? ^ | |
+ %_1.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.19) | |
? ^ | |
- %attention.19 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.7) | |
- %linear_layers.15 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.19) | |
- %_0.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.15) | |
- %bias.9 : Tensor = prim::GetAttr[name="bias"](%_0.23) | |
- %model.17 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
- %transformer_blocks.17 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.17) | |
- %_1.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.17) | |
%attention.17 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.5) | |
%linear_layers.13 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.17) | |
- %_0.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.13) | |
? ^ | |
+ %_0.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.13) | |
? ^ | |
- %weight.13 : Tensor = prim::GetAttr[name="weight"](%_0.21) | |
? ^ | |
+ %weight.13 : Tensor = prim::GetAttr[name="weight"](%_0.23) | |
? ^ | |
%model.15 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.15 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.15) | |
%_0.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.15) | |
%attention.15 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.19) | |
%output_linear.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.15) | |
%bias.7 : Tensor = prim::GetAttr[name="bias"](%output_linear.3) | |
%model.13 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.13 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.13) | |
%_0.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.13) | |
%attention.13 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.17) | |
%output_linear.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.13) | |
%weight.11 : Tensor = prim::GetAttr[name="weight"](%output_linear.1) | |
%model.11 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.11 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.11) | |
%_0.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.11) | |
%attention.11 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.15) | |
%linear_layers.11 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.11) | |
%_2.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.11) | |
%bias.5 : Tensor = prim::GetAttr[name="bias"](%_2.3) | |
%model.9 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.9 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.9) | |
%_0.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.9) | |
%attention.9 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.13) | |
%linear_layers.9 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.9) | |
%_2.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.9) | |
%weight.9 : Tensor = prim::GetAttr[name="weight"](%_2.1) | |
%model.7 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.7 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.7) | |
%_0.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.7) | |
%attention.7 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.11) | |
%linear_layers.7 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.7) | |
%_1.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.7) | |
%bias.3 : Tensor = prim::GetAttr[name="bias"](%_1.3) | |
%model.5 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.5 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.5) | |
%_0.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.5) | |
%attention.5 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.9) | |
%linear_layers.5 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.5) | |
%_1.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.5) | |
%weight.7 : Tensor = prim::GetAttr[name="weight"](%_1.1) | |
%model.3 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.3 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.3) | |
%_0.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.3) | |
%attention.3 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.5) | |
%linear_layers.3 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.3) | |
%_0.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.3) | |
%bias.1 : Tensor = prim::GetAttr[name="bias"](%_0.7) | |
%model.1 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
%transformer_blocks.1 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.1) | |
%_0.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.1) | |
%attention.1 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.1) | |
%linear_layers.1 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.1) | |
%_0.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.1) | |
%weight.5 : Tensor = prim::GetAttr[name="weight"](%_0.3) | |
%1516 : int = prim::Constant[value=0]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1517 : Tensor = aten::gt(%tokens, %1516) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1518 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1519 : Tensor = aten::unsqueeze(%1517, %1518) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1520 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1521 : int = aten::size(%tokens, %1520) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1522 : Tensor = prim::NumToTensor(%1521) | |
%1523 : int = aten::Int(%1522) | |
%1524 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1525 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1526 : int[] = prim::ListConstruct(%1524, %1523, %1525) | |
%1527 : Tensor = aten::repeat(%1519, %1526) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
%1528 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
- %mask : Tensor = aten::unsqueeze(%1527, %1528) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
+ %mask.1 : Tensor = aten::unsqueeze(%1527, %1528) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
? ++ | |
%1567 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.embedding/__module.model.embedding.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1568 : bool = prim::Constant[value=1](), scope: __module.model.embedding/__module.model.embedding.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1569 : int = prim::Constant[value=1](), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
%1570 : int = prim::Constant[value=9223372036854775807](), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
%1571 : int = prim::Constant[value=0](), scope: __module.model.embedding/__module.model.embedding.token # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
%1572 : bool = prim::Constant[value=0](), scope: __module.model.embedding/__module.model.embedding.token # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
%dropout.1 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%embedding) | |
%segment : __torch__.bert_pytorch.model.embedding.segment.SegmentEmbedding = prim::GetAttr[name="segment"](%embedding) | |
%position : __torch__.bert_pytorch.model.embedding.position.PositionalEmbedding = prim::GetAttr[name="position"](%embedding) | |
%token : __torch__.bert_pytorch.model.embedding.token.TokenEmbedding = prim::GetAttr[name="token"](%embedding) | |
%weight.101 : Tensor = prim::GetAttr[name="weight"](%token) | |
%1578 : Tensor = aten::embedding(%weight.101, %tokens, %1571, %1572, %1572), scope: __module.model.embedding/__module.model.embedding.token # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
%pe : Tensor = prim::GetAttr[name="pe"](%position) | |
%1580 : int = aten::size(%tokens, %1569), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
%1581 : Tensor = aten::slice(%pe, %1571, %1571, %1570, %1569), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
%1582 : Tensor = aten::slice(%1581, %1569, %1571, %1580, %1569), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
%1583 : Tensor = aten::add(%1578, %1582, %1569), scope: __module.model.embedding # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/bert.py:32:0 | |
%weight.103 : Tensor = prim::GetAttr[name="weight"](%segment) | |
%1585 : Tensor = aten::embedding(%weight.103, %tokens, %1571, %1572, %1572), scope: __module.model.embedding/__module.model.embedding.segment # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
%input.1 : Tensor = aten::add(%1583, %1585, %1569), scope: __module.model.embedding # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/bert.py:32:0 | |
%x.1 : Tensor = aten::dropout(%input.1, %1567, %1568), scope: __module.model.embedding/__module.model.embedding.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1588 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1589 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1590 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1591 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%1592 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention/__module.model.transformer_blocks.0.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1593 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1594 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1595 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1596 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1597 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1598 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1599 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm | |
%1600 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1601 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.1 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.1) | |
%b_2.1 : Tensor = prim::GetAttr[name="b_2"](%norm.1) | |
%a_2.1 : Tensor = prim::GetAttr[name="a_2"](%norm.1) | |
%1605 : int[] = prim::ListConstruct(%1597), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm | |
%mean.1 : Tensor = aten::mean(%x.1, %1605, %1598, %1599), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1607 : int[] = prim::ListConstruct(%1597), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm | |
%std.1 : Tensor = aten::std(%x.1, %1607, %1598, %1598), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1609 : Tensor = aten::sub(%x.1, %mean.1, %1600), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1610 : Tensor = aten::mul(%a_2.1, %1609), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1611 : Tensor = aten::add(%std.1, %1601, %1600), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1612 : Tensor = aten::div(%1610, %1611), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.1 : Tensor = aten::add(%1612, %b_2.1, %1600), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1614 : int = aten::size(%query.1, %1596), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1615 : Tensor = aten::linear(%query.1, %weight.5, %bias.1), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1616 : int[] = prim::ListConstruct(%1614, %1597, %1595, %1594), scope: __module.model.transformer_blocks.0.input_sublayer | |
%1617 : Tensor = aten::view(%1615, %1616), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.3 : Tensor = aten::transpose(%1617, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1619 : Tensor = aten::linear(%query.1, %weight.7, %bias.3), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1620 : int[] = prim::ListConstruct(%1614, %1597, %1595, %1594), scope: __module.model.transformer_blocks.0.input_sublayer | |
%1621 : Tensor = aten::view(%1619, %1620), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.1 : Tensor = aten::transpose(%1621, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1623 : Tensor = aten::linear(%query.1, %weight.9, %bias.5), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1624 : int[] = prim::ListConstruct(%1614, %1597, %1595, %1594), scope: __module.model.transformer_blocks.0.input_sublayer | |
%1625 : Tensor = aten::view(%1623, %1624), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.1 : Tensor = aten::transpose(%1625, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1627 : Tensor = aten::transpose(%key.1, %1589, %1597), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1628 : Tensor = aten::matmul(%query.3, %1627), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.1 : Tensor = aten::div(%1628, %1590), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %1630 : Tensor = aten::eq(%mask, %1596), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %1630 : Tensor = aten::eq(%mask.1, %1596), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.3 : Tensor = aten::masked_fill(%scores.1, %1630, %1591), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.5 : Tensor = aten::softmax(%input.3, %1597, %1599), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.1 : Tensor = aten::dropout(%input.5, %1592, %1598), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention/__module.model.transformer_blocks.0.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.3 : Tensor = aten::matmul(%p_attn.1, %value.1), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%1635 : Tensor = aten::transpose(%x.3, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1636 : Tensor = aten::contiguous(%1635, %1596), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1637 : int[] = prim::ListConstruct(%1614, %1597, %1588), scope: __module.model.transformer_blocks.0.input_sublayer | |
%input.7 : Tensor = aten::view(%1636, %1637), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.9 : Tensor = aten::linear(%input.7, %weight.11, %bias.7), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1640 : Tensor = aten::dropout(%input.9, %1592, %1598), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.5 : Tensor = aten::add(%x.1, %1640, %1600), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1642 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1643 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%1644 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1645 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1646 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm | |
%1647 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1648 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.9 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.1) | |
%norm.3 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.1) | |
%b_2.3 : Tensor = prim::GetAttr[name="b_2"](%norm.3) | |
%a_2.3 : Tensor = prim::GetAttr[name="a_2"](%norm.3) | |
%1653 : int[] = prim::ListConstruct(%1644), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm | |
%mean.3 : Tensor = aten::mean(%x.5, %1653, %1645, %1646), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1655 : int[] = prim::ListConstruct(%1644), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm | |
%std.3 : Tensor = aten::std(%x.5, %1655, %1645, %1645), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1657 : Tensor = aten::sub(%x.5, %mean.3, %1647), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1658 : Tensor = aten::mul(%a_2.3, %1657), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1659 : Tensor = aten::add(%std.3, %1648, %1647), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1660 : Tensor = aten::div(%1658, %1659), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.11 : Tensor = aten::add(%1660, %b_2.3, %1647), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.97 : Tensor = prim::GetAttr[name="bias"](%w_1.1) | |
%weight.105 : Tensor = prim::GetAttr[name="weight"](%w_1.1) | |
%input.13 : Tensor = aten::linear(%input.11, %weight.105, %bias.97), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.15 : Tensor = aten::gelu(%input.13, %1643), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.17 : Tensor = aten::dropout(%input.15, %1642, %1645), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.99 : Tensor = prim::GetAttr[name="bias"](%w_2.1) | |
%weight.107 : Tensor = prim::GetAttr[name="weight"](%w_2.1) | |
%input.19 : Tensor = aten::linear(%input.17, %weight.107, %bias.99), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1670 : Tensor = aten::dropout(%input.19, %1642, %1645), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.21 : Tensor = aten::add(%x.5, %1670, %1647), scope: __module.model.transformer_blocks.0.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1672 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1673 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.0.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.7 : Tensor = aten::dropout(%input.21, %1673, %1672), scope: __module.model.transformer_blocks.0.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1675 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1676 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1677 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1678 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%1679 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention/__module.model.transformer_blocks.1.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1680 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1681 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1682 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1683 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1684 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1685 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1686 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm | |
%1687 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1688 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.5 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.3) | |
%b_2.5 : Tensor = prim::GetAttr[name="b_2"](%norm.5) | |
%a_2.5 : Tensor = prim::GetAttr[name="a_2"](%norm.5) | |
%1692 : int[] = prim::ListConstruct(%1684), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm | |
%mean.5 : Tensor = aten::mean(%x.7, %1692, %1685, %1686), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1694 : int[] = prim::ListConstruct(%1684), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm | |
%std.5 : Tensor = aten::std(%x.7, %1694, %1685, %1685), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1696 : Tensor = aten::sub(%x.7, %mean.5, %1687), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1697 : Tensor = aten::mul(%a_2.5, %1696), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1698 : Tensor = aten::add(%std.5, %1688, %1687), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1699 : Tensor = aten::div(%1697, %1698), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.5 : Tensor = aten::add(%1699, %b_2.5, %1687), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1701 : int = aten::size(%query.5, %1683), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1702 : Tensor = aten::linear(%query.5, %weight.13, %bias.9), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1703 : int[] = prim::ListConstruct(%1701, %1684, %1682, %1681), scope: __module.model.transformer_blocks.1.input_sublayer | |
%1704 : Tensor = aten::view(%1702, %1703), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.7 : Tensor = aten::transpose(%1704, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1706 : Tensor = aten::linear(%query.5, %weight.15, %bias.11), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1707 : int[] = prim::ListConstruct(%1701, %1684, %1682, %1681), scope: __module.model.transformer_blocks.1.input_sublayer | |
%1708 : Tensor = aten::view(%1706, %1707), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.3 : Tensor = aten::transpose(%1708, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1710 : Tensor = aten::linear(%query.5, %weight.17, %bias.13), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1711 : int[] = prim::ListConstruct(%1701, %1684, %1682, %1681), scope: __module.model.transformer_blocks.1.input_sublayer | |
%1712 : Tensor = aten::view(%1710, %1711), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.3 : Tensor = aten::transpose(%1712, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1714 : Tensor = aten::transpose(%key.3, %1676, %1684), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1715 : Tensor = aten::matmul(%query.7, %1714), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.3 : Tensor = aten::div(%1715, %1677), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %1717 : Tensor = aten::eq(%mask, %1683), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %1717 : Tensor = aten::eq(%mask.1, %1683), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.23 : Tensor = aten::masked_fill(%scores.3, %1717, %1678), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.25 : Tensor = aten::softmax(%input.23, %1684, %1686), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.3 : Tensor = aten::dropout(%input.25, %1679, %1685), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention/__module.model.transformer_blocks.1.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.9 : Tensor = aten::matmul(%p_attn.3, %value.3), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%1722 : Tensor = aten::transpose(%x.9, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1723 : Tensor = aten::contiguous(%1722, %1683), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1724 : int[] = prim::ListConstruct(%1701, %1684, %1675), scope: __module.model.transformer_blocks.1.input_sublayer | |
%input.27 : Tensor = aten::view(%1723, %1724), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.29 : Tensor = aten::linear(%input.27, %weight.19, %bias.15), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1727 : Tensor = aten::dropout(%input.29, %1679, %1685), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.11 : Tensor = aten::add(%x.7, %1727, %1687), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1729 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1730 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%1731 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1732 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1733 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm | |
%1734 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1735 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.21 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.3) | |
%norm.7 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.3) | |
%b_2.7 : Tensor = prim::GetAttr[name="b_2"](%norm.7) | |
%a_2.7 : Tensor = prim::GetAttr[name="a_2"](%norm.7) | |
%1740 : int[] = prim::ListConstruct(%1731), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm | |
%mean.7 : Tensor = aten::mean(%x.11, %1740, %1732, %1733), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1742 : int[] = prim::ListConstruct(%1731), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm | |
%std.7 : Tensor = aten::std(%x.11, %1742, %1732, %1732), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1744 : Tensor = aten::sub(%x.11, %mean.7, %1734), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1745 : Tensor = aten::mul(%a_2.7, %1744), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1746 : Tensor = aten::add(%std.7, %1735, %1734), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1747 : Tensor = aten::div(%1745, %1746), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.31 : Tensor = aten::add(%1747, %b_2.7, %1734), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.101 : Tensor = prim::GetAttr[name="bias"](%w_1.3) | |
%weight.109 : Tensor = prim::GetAttr[name="weight"](%w_1.3) | |
%input.33 : Tensor = aten::linear(%input.31, %weight.109, %bias.101), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.35 : Tensor = aten::gelu(%input.33, %1730), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.37 : Tensor = aten::dropout(%input.35, %1729, %1732), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.103 : Tensor = prim::GetAttr[name="bias"](%w_2.3) | |
%weight.111 : Tensor = prim::GetAttr[name="weight"](%w_2.3) | |
%input.39 : Tensor = aten::linear(%input.37, %weight.111, %bias.103), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1757 : Tensor = aten::dropout(%input.39, %1729, %1732), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.41 : Tensor = aten::add(%x.11, %1757, %1734), scope: __module.model.transformer_blocks.1.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1759 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1760 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.1.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.13 : Tensor = aten::dropout(%input.41, %1760, %1759), scope: __module.model.transformer_blocks.1.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1762 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1763 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1764 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1765 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%1766 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention/__module.model.transformer_blocks.2.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1767 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1768 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1769 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1770 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1771 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1772 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1773 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm | |
%1774 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1775 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.9 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.5) | |
%b_2.9 : Tensor = prim::GetAttr[name="b_2"](%norm.9) | |
%a_2.9 : Tensor = prim::GetAttr[name="a_2"](%norm.9) | |
%1779 : int[] = prim::ListConstruct(%1771), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm | |
%mean.9 : Tensor = aten::mean(%x.13, %1779, %1772, %1773), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1781 : int[] = prim::ListConstruct(%1771), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm | |
%std.9 : Tensor = aten::std(%x.13, %1781, %1772, %1772), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1783 : Tensor = aten::sub(%x.13, %mean.9, %1774), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1784 : Tensor = aten::mul(%a_2.9, %1783), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1785 : Tensor = aten::add(%std.9, %1775, %1774), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1786 : Tensor = aten::div(%1784, %1785), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.9 : Tensor = aten::add(%1786, %b_2.9, %1774), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1788 : int = aten::size(%query.9, %1770), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1789 : Tensor = aten::linear(%query.9, %weight.21, %bias.17), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1790 : int[] = prim::ListConstruct(%1788, %1771, %1769, %1768), scope: __module.model.transformer_blocks.2.input_sublayer | |
%1791 : Tensor = aten::view(%1789, %1790), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.11 : Tensor = aten::transpose(%1791, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1793 : Tensor = aten::linear(%query.9, %weight.23, %bias.19), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1794 : int[] = prim::ListConstruct(%1788, %1771, %1769, %1768), scope: __module.model.transformer_blocks.2.input_sublayer | |
%1795 : Tensor = aten::view(%1793, %1794), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.5 : Tensor = aten::transpose(%1795, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1797 : Tensor = aten::linear(%query.9, %weight.25, %bias.21), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1798 : int[] = prim::ListConstruct(%1788, %1771, %1769, %1768), scope: __module.model.transformer_blocks.2.input_sublayer | |
%1799 : Tensor = aten::view(%1797, %1798), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.5 : Tensor = aten::transpose(%1799, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1801 : Tensor = aten::transpose(%key.5, %1763, %1771), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1802 : Tensor = aten::matmul(%query.11, %1801), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.5 : Tensor = aten::div(%1802, %1764), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %1804 : Tensor = aten::eq(%mask, %1770), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %1804 : Tensor = aten::eq(%mask.1, %1770), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.43 : Tensor = aten::masked_fill(%scores.5, %1804, %1765), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.45 : Tensor = aten::softmax(%input.43, %1771, %1773), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.5 : Tensor = aten::dropout(%input.45, %1766, %1772), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention/__module.model.transformer_blocks.2.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.15 : Tensor = aten::matmul(%p_attn.5, %value.5), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%1809 : Tensor = aten::transpose(%x.15, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1810 : Tensor = aten::contiguous(%1809, %1770), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1811 : int[] = prim::ListConstruct(%1788, %1771, %1762), scope: __module.model.transformer_blocks.2.input_sublayer | |
%input.47 : Tensor = aten::view(%1810, %1811), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.49 : Tensor = aten::linear(%input.47, %weight.27, %bias.23), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1814 : Tensor = aten::dropout(%input.49, %1766, %1772), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.17 : Tensor = aten::add(%x.13, %1814, %1774), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1816 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1817 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%1818 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1819 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1820 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm | |
%1821 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1822 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.33 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.5) | |
%norm.11 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.5) | |
%b_2.11 : Tensor = prim::GetAttr[name="b_2"](%norm.11) | |
%a_2.11 : Tensor = prim::GetAttr[name="a_2"](%norm.11) | |
%1827 : int[] = prim::ListConstruct(%1818), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm | |
%mean.11 : Tensor = aten::mean(%x.17, %1827, %1819, %1820), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1829 : int[] = prim::ListConstruct(%1818), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm | |
%std.11 : Tensor = aten::std(%x.17, %1829, %1819, %1819), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1831 : Tensor = aten::sub(%x.17, %mean.11, %1821), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1832 : Tensor = aten::mul(%a_2.11, %1831), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1833 : Tensor = aten::add(%std.11, %1822, %1821), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1834 : Tensor = aten::div(%1832, %1833), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.51 : Tensor = aten::add(%1834, %b_2.11, %1821), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.105 : Tensor = prim::GetAttr[name="bias"](%w_1.5) | |
%weight.113 : Tensor = prim::GetAttr[name="weight"](%w_1.5) | |
%input.53 : Tensor = aten::linear(%input.51, %weight.113, %bias.105), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.55 : Tensor = aten::gelu(%input.53, %1817), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.57 : Tensor = aten::dropout(%input.55, %1816, %1819), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.107 : Tensor = prim::GetAttr[name="bias"](%w_2.5) | |
%weight.115 : Tensor = prim::GetAttr[name="weight"](%w_2.5) | |
%input.59 : Tensor = aten::linear(%input.57, %weight.115, %bias.107), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1844 : Tensor = aten::dropout(%input.59, %1816, %1819), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.61 : Tensor = aten::add(%x.17, %1844, %1821), scope: __module.model.transformer_blocks.2.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1846 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1847 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.2.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.19 : Tensor = aten::dropout(%input.61, %1847, %1846), scope: __module.model.transformer_blocks.2.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1849 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1850 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1851 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1852 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%1853 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention/__module.model.transformer_blocks.3.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1854 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1855 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1856 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1857 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1858 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1859 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1860 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm | |
%1861 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1862 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.13 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.7) | |
%b_2.13 : Tensor = prim::GetAttr[name="b_2"](%norm.13) | |
%a_2.13 : Tensor = prim::GetAttr[name="a_2"](%norm.13) | |
%1866 : int[] = prim::ListConstruct(%1858), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm | |
%mean.13 : Tensor = aten::mean(%x.19, %1866, %1859, %1860), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1868 : int[] = prim::ListConstruct(%1858), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm | |
%std.13 : Tensor = aten::std(%x.19, %1868, %1859, %1859), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1870 : Tensor = aten::sub(%x.19, %mean.13, %1861), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1871 : Tensor = aten::mul(%a_2.13, %1870), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1872 : Tensor = aten::add(%std.13, %1862, %1861), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1873 : Tensor = aten::div(%1871, %1872), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.13 : Tensor = aten::add(%1873, %b_2.13, %1861), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1875 : int = aten::size(%query.13, %1857), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1876 : Tensor = aten::linear(%query.13, %weight.29, %bias.25), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1877 : int[] = prim::ListConstruct(%1875, %1858, %1856, %1855), scope: __module.model.transformer_blocks.3.input_sublayer | |
%1878 : Tensor = aten::view(%1876, %1877), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.15 : Tensor = aten::transpose(%1878, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1880 : Tensor = aten::linear(%query.13, %weight.31, %bias.27), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1881 : int[] = prim::ListConstruct(%1875, %1858, %1856, %1855), scope: __module.model.transformer_blocks.3.input_sublayer | |
%1882 : Tensor = aten::view(%1880, %1881), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.7 : Tensor = aten::transpose(%1882, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1884 : Tensor = aten::linear(%query.13, %weight.33, %bias.29), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1885 : int[] = prim::ListConstruct(%1875, %1858, %1856, %1855), scope: __module.model.transformer_blocks.3.input_sublayer | |
%1886 : Tensor = aten::view(%1884, %1885), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.7 : Tensor = aten::transpose(%1886, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1888 : Tensor = aten::transpose(%key.7, %1850, %1858), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1889 : Tensor = aten::matmul(%query.15, %1888), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.7 : Tensor = aten::div(%1889, %1851), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %1891 : Tensor = aten::eq(%mask, %1857), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %1891 : Tensor = aten::eq(%mask.1, %1857), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.63 : Tensor = aten::masked_fill(%scores.7, %1891, %1852), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.65 : Tensor = aten::softmax(%input.63, %1858, %1860), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.7 : Tensor = aten::dropout(%input.65, %1853, %1859), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention/__module.model.transformer_blocks.3.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.21 : Tensor = aten::matmul(%p_attn.7, %value.7), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%1896 : Tensor = aten::transpose(%x.21, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1897 : Tensor = aten::contiguous(%1896, %1857), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1898 : int[] = prim::ListConstruct(%1875, %1858, %1849), scope: __module.model.transformer_blocks.3.input_sublayer | |
%input.67 : Tensor = aten::view(%1897, %1898), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.69 : Tensor = aten::linear(%input.67, %weight.35, %bias.31), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1901 : Tensor = aten::dropout(%input.69, %1853, %1859), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.23 : Tensor = aten::add(%x.19, %1901, %1861), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1903 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1904 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%1905 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1906 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1907 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm | |
%1908 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1909 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.45 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.7) | |
%norm.15 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.7) | |
%b_2.15 : Tensor = prim::GetAttr[name="b_2"](%norm.15) | |
%a_2.15 : Tensor = prim::GetAttr[name="a_2"](%norm.15) | |
%1914 : int[] = prim::ListConstruct(%1905), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm | |
%mean.15 : Tensor = aten::mean(%x.23, %1914, %1906, %1907), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1916 : int[] = prim::ListConstruct(%1905), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm | |
%std.15 : Tensor = aten::std(%x.23, %1916, %1906, %1906), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1918 : Tensor = aten::sub(%x.23, %mean.15, %1908), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1919 : Tensor = aten::mul(%a_2.15, %1918), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1920 : Tensor = aten::add(%std.15, %1909, %1908), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1921 : Tensor = aten::div(%1919, %1920), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.71 : Tensor = aten::add(%1921, %b_2.15, %1908), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.109 : Tensor = prim::GetAttr[name="bias"](%w_1.7) | |
%weight.117 : Tensor = prim::GetAttr[name="weight"](%w_1.7) | |
%input.73 : Tensor = aten::linear(%input.71, %weight.117, %bias.109), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.75 : Tensor = aten::gelu(%input.73, %1904), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.77 : Tensor = aten::dropout(%input.75, %1903, %1906), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.111 : Tensor = prim::GetAttr[name="bias"](%w_2.7) | |
%weight.119 : Tensor = prim::GetAttr[name="weight"](%w_2.7) | |
%input.79 : Tensor = aten::linear(%input.77, %weight.119, %bias.111), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1931 : Tensor = aten::dropout(%input.79, %1903, %1906), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.81 : Tensor = aten::add(%x.23, %1931, %1908), scope: __module.model.transformer_blocks.3.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1933 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1934 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.3.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.25 : Tensor = aten::dropout(%input.81, %1934, %1933), scope: __module.model.transformer_blocks.3.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1936 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1937 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1938 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1939 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%1940 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention/__module.model.transformer_blocks.4.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1941 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1942 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1943 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1944 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1945 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1946 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1947 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm | |
%1948 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1949 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.17 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.9) | |
%b_2.17 : Tensor = prim::GetAttr[name="b_2"](%norm.17) | |
%a_2.17 : Tensor = prim::GetAttr[name="a_2"](%norm.17) | |
%1953 : int[] = prim::ListConstruct(%1945), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm | |
%mean.17 : Tensor = aten::mean(%x.25, %1953, %1946, %1947), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1955 : int[] = prim::ListConstruct(%1945), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm | |
%std.17 : Tensor = aten::std(%x.25, %1955, %1946, %1946), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%1957 : Tensor = aten::sub(%x.25, %mean.17, %1948), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1958 : Tensor = aten::mul(%a_2.17, %1957), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1959 : Tensor = aten::add(%std.17, %1949, %1948), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1960 : Tensor = aten::div(%1958, %1959), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.17 : Tensor = aten::add(%1960, %b_2.17, %1948), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1962 : int = aten::size(%query.17, %1944), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%1963 : Tensor = aten::linear(%query.17, %weight.37, %bias.33), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1964 : int[] = prim::ListConstruct(%1962, %1945, %1943, %1942), scope: __module.model.transformer_blocks.4.input_sublayer | |
%1965 : Tensor = aten::view(%1963, %1964), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.19 : Tensor = aten::transpose(%1965, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1967 : Tensor = aten::linear(%query.17, %weight.39, %bias.35), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1968 : int[] = prim::ListConstruct(%1962, %1945, %1943, %1942), scope: __module.model.transformer_blocks.4.input_sublayer | |
%1969 : Tensor = aten::view(%1967, %1968), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.9 : Tensor = aten::transpose(%1969, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1971 : Tensor = aten::linear(%query.17, %weight.41, %bias.37), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1972 : int[] = prim::ListConstruct(%1962, %1945, %1943, %1942), scope: __module.model.transformer_blocks.4.input_sublayer | |
%1973 : Tensor = aten::view(%1971, %1972), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.9 : Tensor = aten::transpose(%1973, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%1975 : Tensor = aten::transpose(%key.9, %1937, %1945), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%1976 : Tensor = aten::matmul(%query.19, %1975), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.9 : Tensor = aten::div(%1976, %1938), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %1978 : Tensor = aten::eq(%mask, %1944), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %1978 : Tensor = aten::eq(%mask.1, %1944), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.83 : Tensor = aten::masked_fill(%scores.9, %1978, %1939), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.85 : Tensor = aten::softmax(%input.83, %1945, %1947), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.9 : Tensor = aten::dropout(%input.85, %1940, %1946), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention/__module.model.transformer_blocks.4.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.27 : Tensor = aten::matmul(%p_attn.9, %value.9), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%1983 : Tensor = aten::transpose(%x.27, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1984 : Tensor = aten::contiguous(%1983, %1944), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%1985 : int[] = prim::ListConstruct(%1962, %1945, %1936), scope: __module.model.transformer_blocks.4.input_sublayer | |
%input.87 : Tensor = aten::view(%1984, %1985), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.89 : Tensor = aten::linear(%input.87, %weight.43, %bias.39), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%1988 : Tensor = aten::dropout(%input.89, %1940, %1946), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.29 : Tensor = aten::add(%x.25, %1988, %1948), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%1990 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%1991 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%1992 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1993 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%1994 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm | |
%1995 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%1996 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.57 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.9) | |
%norm.19 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.9) | |
%b_2.19 : Tensor = prim::GetAttr[name="b_2"](%norm.19) | |
%a_2.19 : Tensor = prim::GetAttr[name="a_2"](%norm.19) | |
%2001 : int[] = prim::ListConstruct(%1992), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm | |
%mean.19 : Tensor = aten::mean(%x.29, %2001, %1993, %1994), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2003 : int[] = prim::ListConstruct(%1992), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm | |
%std.19 : Tensor = aten::std(%x.29, %2003, %1993, %1993), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2005 : Tensor = aten::sub(%x.29, %mean.19, %1995), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2006 : Tensor = aten::mul(%a_2.19, %2005), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2007 : Tensor = aten::add(%std.19, %1996, %1995), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2008 : Tensor = aten::div(%2006, %2007), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.91 : Tensor = aten::add(%2008, %b_2.19, %1995), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.113 : Tensor = prim::GetAttr[name="bias"](%w_1.9) | |
%weight.121 : Tensor = prim::GetAttr[name="weight"](%w_1.9) | |
%input.93 : Tensor = aten::linear(%input.91, %weight.121, %bias.113), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.95 : Tensor = aten::gelu(%input.93, %1991), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.97 : Tensor = aten::dropout(%input.95, %1990, %1993), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.115 : Tensor = prim::GetAttr[name="bias"](%w_2.9) | |
%weight.123 : Tensor = prim::GetAttr[name="weight"](%w_2.9) | |
%input.99 : Tensor = aten::linear(%input.97, %weight.123, %bias.115), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2018 : Tensor = aten::dropout(%input.99, %1990, %1993), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.101 : Tensor = aten::add(%x.29, %2018, %1995), scope: __module.model.transformer_blocks.4.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2020 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2021 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.4.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.31 : Tensor = aten::dropout(%input.101, %2021, %2020), scope: __module.model.transformer_blocks.4.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2023 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2024 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2025 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2026 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2027 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention/__module.model.transformer_blocks.5.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2028 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2029 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2030 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2031 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2032 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2033 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2034 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm | |
%2035 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2036 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.21 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.11) | |
%b_2.21 : Tensor = prim::GetAttr[name="b_2"](%norm.21) | |
%a_2.21 : Tensor = prim::GetAttr[name="a_2"](%norm.21) | |
%2040 : int[] = prim::ListConstruct(%2032), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm | |
%mean.21 : Tensor = aten::mean(%x.31, %2040, %2033, %2034), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2042 : int[] = prim::ListConstruct(%2032), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm | |
%std.21 : Tensor = aten::std(%x.31, %2042, %2033, %2033), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2044 : Tensor = aten::sub(%x.31, %mean.21, %2035), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2045 : Tensor = aten::mul(%a_2.21, %2044), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2046 : Tensor = aten::add(%std.21, %2036, %2035), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2047 : Tensor = aten::div(%2045, %2046), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.21 : Tensor = aten::add(%2047, %b_2.21, %2035), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2049 : int = aten::size(%query.21, %2031), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2050 : Tensor = aten::linear(%query.21, %weight.45, %bias.41), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2051 : int[] = prim::ListConstruct(%2049, %2032, %2030, %2029), scope: __module.model.transformer_blocks.5.input_sublayer | |
%2052 : Tensor = aten::view(%2050, %2051), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.23 : Tensor = aten::transpose(%2052, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2054 : Tensor = aten::linear(%query.21, %weight.47, %bias.43), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2055 : int[] = prim::ListConstruct(%2049, %2032, %2030, %2029), scope: __module.model.transformer_blocks.5.input_sublayer | |
%2056 : Tensor = aten::view(%2054, %2055), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.11 : Tensor = aten::transpose(%2056, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2058 : Tensor = aten::linear(%query.21, %weight.49, %bias.45), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2059 : int[] = prim::ListConstruct(%2049, %2032, %2030, %2029), scope: __module.model.transformer_blocks.5.input_sublayer | |
%2060 : Tensor = aten::view(%2058, %2059), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.11 : Tensor = aten::transpose(%2060, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2062 : Tensor = aten::transpose(%key.11, %2024, %2032), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2063 : Tensor = aten::matmul(%query.23, %2062), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.11 : Tensor = aten::div(%2063, %2025), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2065 : Tensor = aten::eq(%mask, %2031), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2065 : Tensor = aten::eq(%mask.1, %2031), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.103 : Tensor = aten::masked_fill(%scores.11, %2065, %2026), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.105 : Tensor = aten::softmax(%input.103, %2032, %2034), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.11 : Tensor = aten::dropout(%input.105, %2027, %2033), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention/__module.model.transformer_blocks.5.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.33 : Tensor = aten::matmul(%p_attn.11, %value.11), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2070 : Tensor = aten::transpose(%x.33, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2071 : Tensor = aten::contiguous(%2070, %2031), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2072 : int[] = prim::ListConstruct(%2049, %2032, %2023), scope: __module.model.transformer_blocks.5.input_sublayer | |
%input.107 : Tensor = aten::view(%2071, %2072), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.109 : Tensor = aten::linear(%input.107, %weight.51, %bias.47), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2075 : Tensor = aten::dropout(%input.109, %2027, %2033), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.35 : Tensor = aten::add(%x.31, %2075, %2035), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2077 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2078 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2079 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2080 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2081 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm | |
%2082 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2083 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.69 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.11) | |
%norm.23 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.11) | |
%b_2.23 : Tensor = prim::GetAttr[name="b_2"](%norm.23) | |
%a_2.23 : Tensor = prim::GetAttr[name="a_2"](%norm.23) | |
%2088 : int[] = prim::ListConstruct(%2079), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm | |
%mean.23 : Tensor = aten::mean(%x.35, %2088, %2080, %2081), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2090 : int[] = prim::ListConstruct(%2079), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm | |
%std.23 : Tensor = aten::std(%x.35, %2090, %2080, %2080), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2092 : Tensor = aten::sub(%x.35, %mean.23, %2082), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2093 : Tensor = aten::mul(%a_2.23, %2092), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2094 : Tensor = aten::add(%std.23, %2083, %2082), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2095 : Tensor = aten::div(%2093, %2094), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.111 : Tensor = aten::add(%2095, %b_2.23, %2082), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.117 : Tensor = prim::GetAttr[name="bias"](%w_1.11) | |
%weight.125 : Tensor = prim::GetAttr[name="weight"](%w_1.11) | |
%input.113 : Tensor = aten::linear(%input.111, %weight.125, %bias.117), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.115 : Tensor = aten::gelu(%input.113, %2078), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.117 : Tensor = aten::dropout(%input.115, %2077, %2080), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.119 : Tensor = prim::GetAttr[name="bias"](%w_2.11) | |
%weight.127 : Tensor = prim::GetAttr[name="weight"](%w_2.11) | |
%input.119 : Tensor = aten::linear(%input.117, %weight.127, %bias.119), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2105 : Tensor = aten::dropout(%input.119, %2077, %2080), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.121 : Tensor = aten::add(%x.35, %2105, %2082), scope: __module.model.transformer_blocks.5.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2107 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2108 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.5.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.37 : Tensor = aten::dropout(%input.121, %2108, %2107), scope: __module.model.transformer_blocks.5.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2110 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2111 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2112 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2113 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2114 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention/__module.model.transformer_blocks.6.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2115 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2116 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2117 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2118 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2119 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2120 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2121 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm | |
%2122 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2123 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.25 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.13) | |
%b_2.25 : Tensor = prim::GetAttr[name="b_2"](%norm.25) | |
%a_2.25 : Tensor = prim::GetAttr[name="a_2"](%norm.25) | |
%2127 : int[] = prim::ListConstruct(%2119), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm | |
%mean.25 : Tensor = aten::mean(%x.37, %2127, %2120, %2121), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2129 : int[] = prim::ListConstruct(%2119), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm | |
%std.25 : Tensor = aten::std(%x.37, %2129, %2120, %2120), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2131 : Tensor = aten::sub(%x.37, %mean.25, %2122), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2132 : Tensor = aten::mul(%a_2.25, %2131), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2133 : Tensor = aten::add(%std.25, %2123, %2122), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2134 : Tensor = aten::div(%2132, %2133), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.25 : Tensor = aten::add(%2134, %b_2.25, %2122), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2136 : int = aten::size(%query.25, %2118), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2137 : Tensor = aten::linear(%query.25, %weight.53, %bias.49), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2138 : int[] = prim::ListConstruct(%2136, %2119, %2117, %2116), scope: __module.model.transformer_blocks.6.input_sublayer | |
%2139 : Tensor = aten::view(%2137, %2138), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.27 : Tensor = aten::transpose(%2139, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2141 : Tensor = aten::linear(%query.25, %weight.55, %bias.51), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2142 : int[] = prim::ListConstruct(%2136, %2119, %2117, %2116), scope: __module.model.transformer_blocks.6.input_sublayer | |
%2143 : Tensor = aten::view(%2141, %2142), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.13 : Tensor = aten::transpose(%2143, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2145 : Tensor = aten::linear(%query.25, %weight.57, %bias.53), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2146 : int[] = prim::ListConstruct(%2136, %2119, %2117, %2116), scope: __module.model.transformer_blocks.6.input_sublayer | |
%2147 : Tensor = aten::view(%2145, %2146), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.13 : Tensor = aten::transpose(%2147, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2149 : Tensor = aten::transpose(%key.13, %2111, %2119), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2150 : Tensor = aten::matmul(%query.27, %2149), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.13 : Tensor = aten::div(%2150, %2112), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2152 : Tensor = aten::eq(%mask, %2118), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2152 : Tensor = aten::eq(%mask.1, %2118), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.123 : Tensor = aten::masked_fill(%scores.13, %2152, %2113), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.125 : Tensor = aten::softmax(%input.123, %2119, %2121), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.13 : Tensor = aten::dropout(%input.125, %2114, %2120), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention/__module.model.transformer_blocks.6.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.39 : Tensor = aten::matmul(%p_attn.13, %value.13), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2157 : Tensor = aten::transpose(%x.39, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2158 : Tensor = aten::contiguous(%2157, %2118), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2159 : int[] = prim::ListConstruct(%2136, %2119, %2110), scope: __module.model.transformer_blocks.6.input_sublayer | |
%input.127 : Tensor = aten::view(%2158, %2159), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.129 : Tensor = aten::linear(%input.127, %weight.59, %bias.55), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2162 : Tensor = aten::dropout(%input.129, %2114, %2120), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.41 : Tensor = aten::add(%x.37, %2162, %2122), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2164 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2165 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2166 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2167 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2168 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm | |
%2169 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2170 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.81 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.13) | |
%norm.27 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.13) | |
%b_2.27 : Tensor = prim::GetAttr[name="b_2"](%norm.27) | |
%a_2.27 : Tensor = prim::GetAttr[name="a_2"](%norm.27) | |
%2175 : int[] = prim::ListConstruct(%2166), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm | |
%mean.27 : Tensor = aten::mean(%x.41, %2175, %2167, %2168), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2177 : int[] = prim::ListConstruct(%2166), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm | |
%std.27 : Tensor = aten::std(%x.41, %2177, %2167, %2167), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2179 : Tensor = aten::sub(%x.41, %mean.27, %2169), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2180 : Tensor = aten::mul(%a_2.27, %2179), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2181 : Tensor = aten::add(%std.27, %2170, %2169), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2182 : Tensor = aten::div(%2180, %2181), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.131 : Tensor = aten::add(%2182, %b_2.27, %2169), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.121 : Tensor = prim::GetAttr[name="bias"](%w_1.13) | |
%weight.129 : Tensor = prim::GetAttr[name="weight"](%w_1.13) | |
%input.133 : Tensor = aten::linear(%input.131, %weight.129, %bias.121), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.135 : Tensor = aten::gelu(%input.133, %2165), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.137 : Tensor = aten::dropout(%input.135, %2164, %2167), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.123 : Tensor = prim::GetAttr[name="bias"](%w_2.13) | |
%weight.131 : Tensor = prim::GetAttr[name="weight"](%w_2.13) | |
%input.139 : Tensor = aten::linear(%input.137, %weight.131, %bias.123), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2192 : Tensor = aten::dropout(%input.139, %2164, %2167), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.141 : Tensor = aten::add(%x.41, %2192, %2169), scope: __module.model.transformer_blocks.6.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2194 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2195 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.6.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.43 : Tensor = aten::dropout(%input.141, %2195, %2194), scope: __module.model.transformer_blocks.6.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2197 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2198 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2199 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2200 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2201 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention/__module.model.transformer_blocks.7.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2202 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2203 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2204 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2205 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2206 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2207 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2208 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm | |
%2209 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2210 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.29 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.15) | |
%b_2.29 : Tensor = prim::GetAttr[name="b_2"](%norm.29) | |
%a_2.29 : Tensor = prim::GetAttr[name="a_2"](%norm.29) | |
%2214 : int[] = prim::ListConstruct(%2206), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm | |
%mean.29 : Tensor = aten::mean(%x.43, %2214, %2207, %2208), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2216 : int[] = prim::ListConstruct(%2206), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm | |
%std.29 : Tensor = aten::std(%x.43, %2216, %2207, %2207), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2218 : Tensor = aten::sub(%x.43, %mean.29, %2209), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2219 : Tensor = aten::mul(%a_2.29, %2218), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2220 : Tensor = aten::add(%std.29, %2210, %2209), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2221 : Tensor = aten::div(%2219, %2220), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.29 : Tensor = aten::add(%2221, %b_2.29, %2209), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2223 : int = aten::size(%query.29, %2205), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2224 : Tensor = aten::linear(%query.29, %weight.61, %bias.57), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2225 : int[] = prim::ListConstruct(%2223, %2206, %2204, %2203), scope: __module.model.transformer_blocks.7.input_sublayer | |
%2226 : Tensor = aten::view(%2224, %2225), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.31 : Tensor = aten::transpose(%2226, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2228 : Tensor = aten::linear(%query.29, %weight.63, %bias.59), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2229 : int[] = prim::ListConstruct(%2223, %2206, %2204, %2203), scope: __module.model.transformer_blocks.7.input_sublayer | |
%2230 : Tensor = aten::view(%2228, %2229), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.15 : Tensor = aten::transpose(%2230, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2232 : Tensor = aten::linear(%query.29, %weight.65, %bias.61), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2233 : int[] = prim::ListConstruct(%2223, %2206, %2204, %2203), scope: __module.model.transformer_blocks.7.input_sublayer | |
%2234 : Tensor = aten::view(%2232, %2233), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.15 : Tensor = aten::transpose(%2234, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2236 : Tensor = aten::transpose(%key.15, %2198, %2206), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2237 : Tensor = aten::matmul(%query.31, %2236), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.15 : Tensor = aten::div(%2237, %2199), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2239 : Tensor = aten::eq(%mask, %2205), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2239 : Tensor = aten::eq(%mask.1, %2205), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.143 : Tensor = aten::masked_fill(%scores.15, %2239, %2200), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.145 : Tensor = aten::softmax(%input.143, %2206, %2208), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.15 : Tensor = aten::dropout(%input.145, %2201, %2207), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention/__module.model.transformer_blocks.7.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.45 : Tensor = aten::matmul(%p_attn.15, %value.15), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2244 : Tensor = aten::transpose(%x.45, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2245 : Tensor = aten::contiguous(%2244, %2205), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2246 : int[] = prim::ListConstruct(%2223, %2206, %2197), scope: __module.model.transformer_blocks.7.input_sublayer | |
%input.147 : Tensor = aten::view(%2245, %2246), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.149 : Tensor = aten::linear(%input.147, %weight.67, %bias.63), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2249 : Tensor = aten::dropout(%input.149, %2201, %2207), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.47 : Tensor = aten::add(%x.43, %2249, %2209), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2251 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2252 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2253 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2254 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2255 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm | |
%2256 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2257 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.93 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.15) | |
%norm.31 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.15) | |
%b_2.31 : Tensor = prim::GetAttr[name="b_2"](%norm.31) | |
%a_2.31 : Tensor = prim::GetAttr[name="a_2"](%norm.31) | |
%2262 : int[] = prim::ListConstruct(%2253), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm | |
%mean.31 : Tensor = aten::mean(%x.47, %2262, %2254, %2255), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2264 : int[] = prim::ListConstruct(%2253), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm | |
%std.31 : Tensor = aten::std(%x.47, %2264, %2254, %2254), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2266 : Tensor = aten::sub(%x.47, %mean.31, %2256), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2267 : Tensor = aten::mul(%a_2.31, %2266), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2268 : Tensor = aten::add(%std.31, %2257, %2256), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2269 : Tensor = aten::div(%2267, %2268), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.151 : Tensor = aten::add(%2269, %b_2.31, %2256), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.125 : Tensor = prim::GetAttr[name="bias"](%w_1.15) | |
%weight.133 : Tensor = prim::GetAttr[name="weight"](%w_1.15) | |
%input.153 : Tensor = aten::linear(%input.151, %weight.133, %bias.125), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.155 : Tensor = aten::gelu(%input.153, %2252), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.157 : Tensor = aten::dropout(%input.155, %2251, %2254), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.127 : Tensor = prim::GetAttr[name="bias"](%w_2.15) | |
%weight.135 : Tensor = prim::GetAttr[name="weight"](%w_2.15) | |
%input.159 : Tensor = aten::linear(%input.157, %weight.135, %bias.127), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2279 : Tensor = aten::dropout(%input.159, %2251, %2254), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.161 : Tensor = aten::add(%x.47, %2279, %2256), scope: __module.model.transformer_blocks.7.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2281 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2282 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.7.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.49 : Tensor = aten::dropout(%input.161, %2282, %2281), scope: __module.model.transformer_blocks.7.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2284 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2285 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2286 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2287 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2288 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention/__module.model.transformer_blocks.8.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2289 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2290 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2291 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2292 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2293 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2294 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2295 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm | |
%2296 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2297 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.33 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.17) | |
%b_2.33 : Tensor = prim::GetAttr[name="b_2"](%norm.33) | |
%a_2.33 : Tensor = prim::GetAttr[name="a_2"](%norm.33) | |
%2301 : int[] = prim::ListConstruct(%2293), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm | |
%mean.33 : Tensor = aten::mean(%x.49, %2301, %2294, %2295), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2303 : int[] = prim::ListConstruct(%2293), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm | |
%std.33 : Tensor = aten::std(%x.49, %2303, %2294, %2294), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2305 : Tensor = aten::sub(%x.49, %mean.33, %2296), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2306 : Tensor = aten::mul(%a_2.33, %2305), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2307 : Tensor = aten::add(%std.33, %2297, %2296), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2308 : Tensor = aten::div(%2306, %2307), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.33 : Tensor = aten::add(%2308, %b_2.33, %2296), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2310 : int = aten::size(%query.33, %2292), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2311 : Tensor = aten::linear(%query.33, %weight.69, %bias.65), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2312 : int[] = prim::ListConstruct(%2310, %2293, %2291, %2290), scope: __module.model.transformer_blocks.8.input_sublayer | |
%2313 : Tensor = aten::view(%2311, %2312), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.35 : Tensor = aten::transpose(%2313, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2315 : Tensor = aten::linear(%query.33, %weight.71, %bias.67), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2316 : int[] = prim::ListConstruct(%2310, %2293, %2291, %2290), scope: __module.model.transformer_blocks.8.input_sublayer | |
%2317 : Tensor = aten::view(%2315, %2316), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.17 : Tensor = aten::transpose(%2317, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2319 : Tensor = aten::linear(%query.33, %weight.73, %bias.69), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2320 : int[] = prim::ListConstruct(%2310, %2293, %2291, %2290), scope: __module.model.transformer_blocks.8.input_sublayer | |
%2321 : Tensor = aten::view(%2319, %2320), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.17 : Tensor = aten::transpose(%2321, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2323 : Tensor = aten::transpose(%key.17, %2285, %2293), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2324 : Tensor = aten::matmul(%query.35, %2323), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.17 : Tensor = aten::div(%2324, %2286), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2326 : Tensor = aten::eq(%mask, %2292), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2326 : Tensor = aten::eq(%mask.1, %2292), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.163 : Tensor = aten::masked_fill(%scores.17, %2326, %2287), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.165 : Tensor = aten::softmax(%input.163, %2293, %2295), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.17 : Tensor = aten::dropout(%input.165, %2288, %2294), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention/__module.model.transformer_blocks.8.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.51 : Tensor = aten::matmul(%p_attn.17, %value.17), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2331 : Tensor = aten::transpose(%x.51, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2332 : Tensor = aten::contiguous(%2331, %2292), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2333 : int[] = prim::ListConstruct(%2310, %2293, %2284), scope: __module.model.transformer_blocks.8.input_sublayer | |
%input.167 : Tensor = aten::view(%2332, %2333), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.169 : Tensor = aten::linear(%input.167, %weight.75, %bias.71), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2336 : Tensor = aten::dropout(%input.169, %2288, %2294), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.53 : Tensor = aten::add(%x.49, %2336, %2296), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2338 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2339 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2340 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2341 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2342 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm | |
%2343 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2344 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.105 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.17) | |
%norm.35 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.17) | |
%b_2.35 : Tensor = prim::GetAttr[name="b_2"](%norm.35) | |
%a_2.35 : Tensor = prim::GetAttr[name="a_2"](%norm.35) | |
%2349 : int[] = prim::ListConstruct(%2340), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm | |
%mean.35 : Tensor = aten::mean(%x.53, %2349, %2341, %2342), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2351 : int[] = prim::ListConstruct(%2340), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm | |
%std.35 : Tensor = aten::std(%x.53, %2351, %2341, %2341), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2353 : Tensor = aten::sub(%x.53, %mean.35, %2343), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2354 : Tensor = aten::mul(%a_2.35, %2353), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2355 : Tensor = aten::add(%std.35, %2344, %2343), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2356 : Tensor = aten::div(%2354, %2355), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.171 : Tensor = aten::add(%2356, %b_2.35, %2343), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.129 : Tensor = prim::GetAttr[name="bias"](%w_1.17) | |
%weight.137 : Tensor = prim::GetAttr[name="weight"](%w_1.17) | |
%input.173 : Tensor = aten::linear(%input.171, %weight.137, %bias.129), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.175 : Tensor = aten::gelu(%input.173, %2339), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.177 : Tensor = aten::dropout(%input.175, %2338, %2341), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.131 : Tensor = prim::GetAttr[name="bias"](%w_2.17) | |
%weight.139 : Tensor = prim::GetAttr[name="weight"](%w_2.17) | |
%input.179 : Tensor = aten::linear(%input.177, %weight.139, %bias.131), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2366 : Tensor = aten::dropout(%input.179, %2338, %2341), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.181 : Tensor = aten::add(%x.53, %2366, %2343), scope: __module.model.transformer_blocks.8.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2368 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2369 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.8.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.55 : Tensor = aten::dropout(%input.181, %2369, %2368), scope: __module.model.transformer_blocks.8.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2371 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2372 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2373 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2374 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2375 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention/__module.model.transformer_blocks.9.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2376 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2377 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2378 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2379 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2380 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2381 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2382 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm | |
%2383 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2384 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.37 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.19) | |
%b_2.37 : Tensor = prim::GetAttr[name="b_2"](%norm.37) | |
%a_2.37 : Tensor = prim::GetAttr[name="a_2"](%norm.37) | |
%2388 : int[] = prim::ListConstruct(%2380), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm | |
%mean.37 : Tensor = aten::mean(%x.55, %2388, %2381, %2382), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2390 : int[] = prim::ListConstruct(%2380), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm | |
%std.37 : Tensor = aten::std(%x.55, %2390, %2381, %2381), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2392 : Tensor = aten::sub(%x.55, %mean.37, %2383), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2393 : Tensor = aten::mul(%a_2.37, %2392), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2394 : Tensor = aten::add(%std.37, %2384, %2383), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2395 : Tensor = aten::div(%2393, %2394), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.37 : Tensor = aten::add(%2395, %b_2.37, %2383), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2397 : int = aten::size(%query.37, %2379), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2398 : Tensor = aten::linear(%query.37, %weight.77, %bias.73), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2399 : int[] = prim::ListConstruct(%2397, %2380, %2378, %2377), scope: __module.model.transformer_blocks.9.input_sublayer | |
%2400 : Tensor = aten::view(%2398, %2399), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.39 : Tensor = aten::transpose(%2400, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2402 : Tensor = aten::linear(%query.37, %weight.79, %bias.75), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2403 : int[] = prim::ListConstruct(%2397, %2380, %2378, %2377), scope: __module.model.transformer_blocks.9.input_sublayer | |
%2404 : Tensor = aten::view(%2402, %2403), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.19 : Tensor = aten::transpose(%2404, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2406 : Tensor = aten::linear(%query.37, %weight.81, %bias.77), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2407 : int[] = prim::ListConstruct(%2397, %2380, %2378, %2377), scope: __module.model.transformer_blocks.9.input_sublayer | |
%2408 : Tensor = aten::view(%2406, %2407), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.19 : Tensor = aten::transpose(%2408, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2410 : Tensor = aten::transpose(%key.19, %2372, %2380), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2411 : Tensor = aten::matmul(%query.39, %2410), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.19 : Tensor = aten::div(%2411, %2373), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2413 : Tensor = aten::eq(%mask, %2379), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2413 : Tensor = aten::eq(%mask.1, %2379), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.183 : Tensor = aten::masked_fill(%scores.19, %2413, %2374), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.185 : Tensor = aten::softmax(%input.183, %2380, %2382), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.19 : Tensor = aten::dropout(%input.185, %2375, %2381), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention/__module.model.transformer_blocks.9.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.57 : Tensor = aten::matmul(%p_attn.19, %value.19), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2418 : Tensor = aten::transpose(%x.57, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2419 : Tensor = aten::contiguous(%2418, %2379), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2420 : int[] = prim::ListConstruct(%2397, %2380, %2371), scope: __module.model.transformer_blocks.9.input_sublayer | |
%input.187 : Tensor = aten::view(%2419, %2420), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.189 : Tensor = aten::linear(%input.187, %weight.83, %bias.79), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2423 : Tensor = aten::dropout(%input.189, %2375, %2381), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.59 : Tensor = aten::add(%x.55, %2423, %2383), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2425 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2426 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2427 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2428 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2429 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm | |
%2430 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2431 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.117 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.19) | |
%norm.39 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.19) | |
%b_2.39 : Tensor = prim::GetAttr[name="b_2"](%norm.39) | |
%a_2.39 : Tensor = prim::GetAttr[name="a_2"](%norm.39) | |
%2436 : int[] = prim::ListConstruct(%2427), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm | |
%mean.39 : Tensor = aten::mean(%x.59, %2436, %2428, %2429), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2438 : int[] = prim::ListConstruct(%2427), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm | |
%std.39 : Tensor = aten::std(%x.59, %2438, %2428, %2428), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2440 : Tensor = aten::sub(%x.59, %mean.39, %2430), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2441 : Tensor = aten::mul(%a_2.39, %2440), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2442 : Tensor = aten::add(%std.39, %2431, %2430), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2443 : Tensor = aten::div(%2441, %2442), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.191 : Tensor = aten::add(%2443, %b_2.39, %2430), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.133 : Tensor = prim::GetAttr[name="bias"](%w_1.19) | |
%weight.141 : Tensor = prim::GetAttr[name="weight"](%w_1.19) | |
%input.193 : Tensor = aten::linear(%input.191, %weight.141, %bias.133), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.195 : Tensor = aten::gelu(%input.193, %2426), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.197 : Tensor = aten::dropout(%input.195, %2425, %2428), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.135 : Tensor = prim::GetAttr[name="bias"](%w_2.19) | |
%weight.143 : Tensor = prim::GetAttr[name="weight"](%w_2.19) | |
%input.199 : Tensor = aten::linear(%input.197, %weight.143, %bias.135), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2453 : Tensor = aten::dropout(%input.199, %2425, %2428), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.201 : Tensor = aten::add(%x.59, %2453, %2430), scope: __module.model.transformer_blocks.9.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2455 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2456 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.9.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.61 : Tensor = aten::dropout(%input.201, %2456, %2455), scope: __module.model.transformer_blocks.9.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2458 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2459 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2460 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2461 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2462 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention/__module.model.transformer_blocks.10.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2463 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2464 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2465 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2466 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2467 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2468 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2469 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm | |
%2470 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2471 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.41 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.21) | |
%b_2.41 : Tensor = prim::GetAttr[name="b_2"](%norm.41) | |
%a_2.41 : Tensor = prim::GetAttr[name="a_2"](%norm.41) | |
%2475 : int[] = prim::ListConstruct(%2467), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm | |
%mean.41 : Tensor = aten::mean(%x.61, %2475, %2468, %2469), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2477 : int[] = prim::ListConstruct(%2467), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm | |
%std.41 : Tensor = aten::std(%x.61, %2477, %2468, %2468), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2479 : Tensor = aten::sub(%x.61, %mean.41, %2470), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2480 : Tensor = aten::mul(%a_2.41, %2479), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2481 : Tensor = aten::add(%std.41, %2471, %2470), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2482 : Tensor = aten::div(%2480, %2481), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.41 : Tensor = aten::add(%2482, %b_2.41, %2470), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2484 : int = aten::size(%query.41, %2466), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2485 : Tensor = aten::linear(%query.41, %weight.85, %bias.81), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2486 : int[] = prim::ListConstruct(%2484, %2467, %2465, %2464), scope: __module.model.transformer_blocks.10.input_sublayer | |
%2487 : Tensor = aten::view(%2485, %2486), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query.43 : Tensor = aten::transpose(%2487, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2489 : Tensor = aten::linear(%query.41, %weight.87, %bias.83), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2490 : int[] = prim::ListConstruct(%2484, %2467, %2465, %2464), scope: __module.model.transformer_blocks.10.input_sublayer | |
%2491 : Tensor = aten::view(%2489, %2490), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key.21 : Tensor = aten::transpose(%2491, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2493 : Tensor = aten::linear(%query.41, %weight.89, %bias.85), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2494 : int[] = prim::ListConstruct(%2484, %2467, %2465, %2464), scope: __module.model.transformer_blocks.10.input_sublayer | |
%2495 : Tensor = aten::view(%2493, %2494), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value.21 : Tensor = aten::transpose(%2495, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2497 : Tensor = aten::transpose(%key.21, %2459, %2467), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2498 : Tensor = aten::matmul(%query.43, %2497), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores.21 : Tensor = aten::div(%2498, %2460), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2500 : Tensor = aten::eq(%mask, %2466), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2500 : Tensor = aten::eq(%mask.1, %2466), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.203 : Tensor = aten::masked_fill(%scores.21, %2500, %2461), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.205 : Tensor = aten::softmax(%input.203, %2467, %2469), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn.21 : Tensor = aten::dropout(%input.205, %2462, %2468), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention/__module.model.transformer_blocks.10.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.63 : Tensor = aten::matmul(%p_attn.21, %value.21), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2505 : Tensor = aten::transpose(%x.63, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2506 : Tensor = aten::contiguous(%2505, %2466), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2507 : int[] = prim::ListConstruct(%2484, %2467, %2458), scope: __module.model.transformer_blocks.10.input_sublayer | |
%input.207 : Tensor = aten::view(%2506, %2507), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.209 : Tensor = aten::linear(%input.207, %weight.91, %bias.87), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2510 : Tensor = aten::dropout(%input.209, %2462, %2468), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.65 : Tensor = aten::add(%x.61, %2510, %2470), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2512 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2513 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2514 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2515 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2516 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm | |
%2517 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2518 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.129 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.21) | |
%norm.43 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.21) | |
%b_2.43 : Tensor = prim::GetAttr[name="b_2"](%norm.43) | |
%a_2.43 : Tensor = prim::GetAttr[name="a_2"](%norm.43) | |
%2523 : int[] = prim::ListConstruct(%2514), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm | |
%mean.43 : Tensor = aten::mean(%x.65, %2523, %2515, %2516), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2525 : int[] = prim::ListConstruct(%2514), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm | |
%std.43 : Tensor = aten::std(%x.65, %2525, %2515, %2515), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2527 : Tensor = aten::sub(%x.65, %mean.43, %2517), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2528 : Tensor = aten::mul(%a_2.43, %2527), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2529 : Tensor = aten::add(%std.43, %2518, %2517), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2530 : Tensor = aten::div(%2528, %2529), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.211 : Tensor = aten::add(%2530, %b_2.43, %2517), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.137 : Tensor = prim::GetAttr[name="bias"](%w_1.21) | |
%weight.145 : Tensor = prim::GetAttr[name="weight"](%w_1.21) | |
%input.213 : Tensor = aten::linear(%input.211, %weight.145, %bias.137), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.215 : Tensor = aten::gelu(%input.213, %2513), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.217 : Tensor = aten::dropout(%input.215, %2512, %2515), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias.139 : Tensor = prim::GetAttr[name="bias"](%w_2.21) | |
%weight.147 : Tensor = prim::GetAttr[name="weight"](%w_2.21) | |
%input.219 : Tensor = aten::linear(%input.217, %weight.147, %bias.139), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2540 : Tensor = aten::dropout(%input.219, %2512, %2515), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input.221 : Tensor = aten::add(%x.65, %2540, %2517), scope: __module.model.transformer_blocks.10.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2542 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2543 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.10.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.67 : Tensor = aten::dropout(%input.221, %2543, %2542), scope: __module.model.transformer_blocks.10.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2545 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2546 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2547 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2548 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%2549 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention/__module.model.transformer_blocks.11.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2550 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2551 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2552 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2553 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2554 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2555 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2556 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm | |
%2557 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2558 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%norm.45 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer) | |
%b_2.45 : Tensor = prim::GetAttr[name="b_2"](%norm.45) | |
%a_2.45 : Tensor = prim::GetAttr[name="a_2"](%norm.45) | |
%2562 : int[] = prim::ListConstruct(%2554), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm | |
%mean.45 : Tensor = aten::mean(%x.67, %2562, %2555, %2556), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2564 : int[] = prim::ListConstruct(%2554), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm | |
%std.45 : Tensor = aten::std(%x.67, %2564, %2555, %2555), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2566 : Tensor = aten::sub(%x.67, %mean.45, %2557), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2567 : Tensor = aten::mul(%a_2.45, %2566), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2568 : Tensor = aten::add(%std.45, %2558, %2557), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2569 : Tensor = aten::div(%2567, %2568), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%query.45 : Tensor = aten::add(%2569, %b_2.45, %2557), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2571 : int = aten::size(%query.45, %2553), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
%2572 : Tensor = aten::linear(%query.45, %weight.93, %bias.89), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2573 : int[] = prim::ListConstruct(%2571, %2554, %2552, %2551), scope: __module.model.transformer_blocks.11.input_sublayer | |
%2574 : Tensor = aten::view(%2572, %2573), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%query : Tensor = aten::transpose(%2574, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2576 : Tensor = aten::linear(%query.45, %weight.95, %bias.91), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2577 : int[] = prim::ListConstruct(%2571, %2554, %2552, %2551), scope: __module.model.transformer_blocks.11.input_sublayer | |
%2578 : Tensor = aten::view(%2576, %2577), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%key : Tensor = aten::transpose(%2578, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2580 : Tensor = aten::linear(%query.45, %weight.97, %bias.93), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2581 : int[] = prim::ListConstruct(%2571, %2554, %2552, %2551), scope: __module.model.transformer_blocks.11.input_sublayer | |
%2582 : Tensor = aten::view(%2580, %2581), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%value : Tensor = aten::transpose(%2582, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
%2584 : Tensor = aten::transpose(%key, %2546, %2554), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%2585 : Tensor = aten::matmul(%query, %2584), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
%scores : Tensor = aten::div(%2585, %2547), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
- %2587 : Tensor = aten::eq(%mask, %2553), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
+ %2587 : Tensor = aten::eq(%mask.1, %2553), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
? ++ | |
%input.223 : Tensor = aten::masked_fill(%scores, %2587, %2548), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
%input.225 : Tensor = aten::softmax(%input.223, %2554, %2556), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
%p_attn : Tensor = aten::dropout(%input.225, %2549, %2555), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention/__module.model.transformer_blocks.11.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x.69 : Tensor = aten::matmul(%p_attn, %value), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
%2592 : Tensor = aten::transpose(%x.69, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2593 : Tensor = aten::contiguous(%2592, %2553), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%2594 : int[] = prim::ListConstruct(%2571, %2554, %2545), scope: __module.model.transformer_blocks.11.input_sublayer | |
%input.227 : Tensor = aten::view(%2593, %2594), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
%input.229 : Tensor = aten::linear(%input.227, %weight.99, %bias.95), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2597 : Tensor = aten::dropout(%input.229, %2549, %2555), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%x : Tensor = aten::add(%x.67, %2597, %2557), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2599 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2600 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%2601 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2602 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2603 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm | |
%2604 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2605 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%dropout.141 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer) | |
%norm : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer) | |
%b_2 : Tensor = prim::GetAttr[name="b_2"](%norm) | |
%a_2 : Tensor = prim::GetAttr[name="a_2"](%norm) | |
%2610 : int[] = prim::ListConstruct(%2601), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm | |
%mean : Tensor = aten::mean(%x, %2610, %2602, %2603), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
%2612 : int[] = prim::ListConstruct(%2601), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm | |
%std : Tensor = aten::std(%x, %2612, %2602, %2602), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
%2614 : Tensor = aten::sub(%x, %mean, %2604), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2615 : Tensor = aten::mul(%a_2, %2614), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2616 : Tensor = aten::add(%std, %2605, %2604), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%2617 : Tensor = aten::div(%2615, %2616), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%input.231 : Tensor = aten::add(%2617, %b_2, %2604), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
%bias.141 : Tensor = prim::GetAttr[name="bias"](%w_1) | |
%weight.149 : Tensor = prim::GetAttr[name="weight"](%w_1) | |
%input.233 : Tensor = aten::linear(%input.231, %weight.149, %bias.141), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%input.235 : Tensor = aten::gelu(%input.233, %2600), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
%input.237 : Tensor = aten::dropout(%input.235, %2599, %2602), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%bias : Tensor = prim::GetAttr[name="bias"](%w_2) | |
%weight : Tensor = prim::GetAttr[name="weight"](%w_2) | |
%input.239 : Tensor = aten::linear(%input.237, %weight, %bias), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
%2627 : Tensor = aten::dropout(%input.239, %2599, %2602), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%input : Tensor = aten::add(%x, %2627, %2604), scope: __module.model.transformer_blocks.11.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
%2629 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2630 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.11.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
%2631 : Tensor = aten::dropout(%input, %2630, %2629), scope: __module.model.transformer_blocks.11.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
return (%2631) | |
First diverging operator: | |
Node diff: | |
- %model : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
+ %model : __torch__.bert_pytorch.model.bert.___torch_mangle_764.BERT = prim::GetAttr[name="model"](%self.1) | |
? ++++++++++++++++++++ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment