Created
May 10, 2023 00:25
-
-
Save HDCharles/17300b0c0e2cd2e7a3e49d546dc9e19a to your computer and use it in GitHub Desktop.
dynamically_quantize_per_tensor triton graph
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
===== __compiled_fn_21 ===== | |
<eval_with_key>.144 class GraphModule(torch.nn.Module): | |
def forward(self, L_x_ : torch.Tensor): | |
l_x_ = L_x_ | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:29, code: min_val = torch.min(x) | |
min_1 = torch.min(l_x_) | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:30, code: max_val = torch.max(x) | |
max_1 = torch.max(l_x_) | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:32, code: min_val_neg = torch.min(min_val, torch.zeros_like(min_val)) | |
zeros_like = torch.zeros_like(min_1) | |
min_2 = torch.min(min_1, zeros_like); min_1 = zeros_like = None | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:33, code: max_val_pos = torch.max(max_val, torch.zeros_like(max_val)) | |
zeros_like_1 = torch.zeros_like(max_1) | |
max_2 = torch.max(max_1, zeros_like_1); max_1 = zeros_like_1 = None | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:37, code: scale = (max_val_pos.to(torch.float64) - min_val_neg) / torch.tensor([quant_max - quant_min], dtype=torch.float64).to(x.device) | |
to = max_2.to(torch.float64); max_2 = None | |
sub = to - min_2; to = None | |
tensor = torch.tensor([255], dtype = torch.float64) | |
to_1 = tensor.to(device(type='cuda', index=0)); tensor = None | |
truediv = sub / to_1; sub = to_1 = None | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:38, code: scale = torch.clamp(scale, min=eps) | |
clamp = torch.clamp(truediv, min = 1.1920928955078125e-07); truediv = None | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:40, code: zero_point = quant_min - torch.round(min_val_neg / scale).to(torch.int32) | |
truediv_1 = min_2 / clamp; min_2 = None | |
round_1 = torch.round(truediv_1); truediv_1 = None | |
to_2 = round_1.to(torch.int32); round_1 = None | |
sub_1 = -128 - to_2; to_2 = None | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:41, code: zero_point = torch.clamp(zero_point, quant_min, quant_max) | |
clamp_1 = torch.clamp(sub_1, -128, 127); sub_1 = None | |
# File: /fsx/users/cdhernandez/protoquant/ao_experimental/quant_primitives.py:45, code: x_q = torch.clamp(torch.round(x / scale) + zero_point, quant_min, quant_max).to(target_dtype) | |
truediv_2 = l_x_ / clamp; l_x_ = None | |
round_2 = torch.round(truediv_2); truediv_2 = None | |
add = round_2 + clamp_1; round_2 = None | |
clamp_2 = torch.clamp(add, -128, 127); add = None | |
to_3 = clamp_2.to(torch.int8); clamp_2 = None | |
return (to_3, clamp, clamp_1) | |
[2023-05-10 00:24:14,488] torch._dynamo.output_graph.__graph: [DEBUG] TRACED GRAPH | |
__compiled_fn_21 <eval_with_key>.144 opcode name target args kwargs | |
------------- ------------ ------------------------------------------------------------- -------------------------------------- ------------------------------- | |
placeholder l_x_ L_x_ () {} | |
call_function min_1 <built-in method min of type object at 0x7f193520b8a0> (l_x_,) {} | |
call_function max_1 <built-in method max of type object at 0x7f193520b8a0> (l_x_,) {} | |
call_function zeros_like <built-in method zeros_like of type object at 0x7f193520b8a0> (min_1,) {} | |
call_function min_2 <built-in method min of type object at 0x7f193520b8a0> (min_1, zeros_like) {} | |
call_function zeros_like_1 <built-in method zeros_like of type object at 0x7f193520b8a0> (max_1,) {} | |
call_function max_2 <built-in method max of type object at 0x7f193520b8a0> (max_1, zeros_like_1) {} | |
call_method to to (max_2, torch.float64) {} | |
call_function sub <built-in function sub> (to, min_2) {} | |
call_function tensor <built-in method tensor of type object at 0x7f193520b8a0> ([255],) {'dtype': torch.float64} | |
call_method to_1 to (tensor, device(type='cuda', index=0)) {} | |
call_function truediv <built-in function truediv> (sub, to_1) {} | |
call_function clamp <built-in method clamp of type object at 0x7f193520b8a0> (truediv,) {'min': 1.1920928955078125e-07} | |
call_function truediv_1 <built-in function truediv> (min_2, clamp) {} | |
call_function round_1 <built-in method round of type object at 0x7f193520b8a0> (truediv_1,) {} | |
call_method to_2 to (round_1, torch.int32) {} | |
call_function sub_1 <built-in function sub> (-128, to_2) {} | |
call_function clamp_1 <built-in method clamp of type object at 0x7f193520b8a0> (sub_1, -128, 127) {} | |
call_function truediv_2 <built-in function truediv> (l_x_, clamp) {} | |
call_function round_2 <built-in method round of type object at 0x7f193520b8a0> (truediv_2,) {} | |
call_function add <built-in function add> (round_2, clamp_1) {} | |
call_function clamp_2 <built-in method clamp of type object at 0x7f193520b8a0> (add, -128, 127) {} | |
call_method to_3 to (clamp_2, torch.int8) {} | |
output output output ((to_3, clamp, clamp_1),) {} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment