Created
November 24, 2025 07:20
-
-
Save shunting314/1fb3d31756ddbbad91ebfcb7c075f4dd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class <lambda>(torch.nn.Module): | |
| def forward(self, arg0_1: "Sym(s36)", arg1_1: "bf16[s36, 128256][128256, 1]cuda:0", arg2_1: "f32[s36][1]cuda:0"): | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/sampler.py:87 in forward, code: logits = logits.to(torch.float32) | |
| convert_element_type: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.prims.convert_element_type.default(arg1_1, torch.float32); arg1_1 = None | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/sampler.py:138 in apply_temperature, code: return logits.div_(temp.unsqueeze(dim=1)) | |
| unsqueeze: "f32[s36, 1][1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(arg2_1, 1); arg2_1 = None | |
| # No stacktrace found for following nodes | |
| ge_scalar: "b8[s36, 1][1, 1]cuda:0" = torch.ops.aten.ge.Scalar(unsqueeze, 0) | |
| scalar_tensor_default: "f32[][]cuda:0" = torch.ops.aten.scalar_tensor.default(1, dtype = torch.float32, device = device(type='cuda', index=0), pin_memory = False) | |
| neg_default: "f32[][]cuda:0" = torch.ops.aten.neg.default(scalar_tensor_default) | |
| where_self: "f32[s36, 1][1, 1]cuda:0" = torch.ops.aten.where.self(ge_scalar, scalar_tensor_default, neg_default); ge_scalar = scalar_tensor_default = neg_default = None | |
| mul_tensor: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.mul.Tensor(convert_element_type, where_self); convert_element_type = None | |
| amax_default: "f32[s36, 1][1, 1]cuda:0" = torch.ops.aten.amax.default(mul_tensor, [-1], True) | |
| sub_tensor: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.sub.Tensor(mul_tensor, amax_default); mul_tensor = amax_default = None | |
| mul_tensor_1: "f32[s36, 1][1, 1]cuda:0" = torch.ops.aten.mul.Tensor(where_self, unsqueeze); where_self = unsqueeze = None | |
| div_tensor: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.div.Tensor(sub_tensor, mul_tensor_1); sub_tensor = mul_tensor_1 = None | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/ops/topk_topp_sampler.py:100 in forward_native, code: probs = logits.softmax(dim=-1, dtype=torch.float32) | |
| exp: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.exp.default(div_tensor); div_tensor = None | |
| sum_1: "f32[s36, 1][1, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(exp, [-1], True) | |
| div_1: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.div.Tensor(exp, sum_1); exp = sum_1 = None | |
| # No stacktrace found for following nodes | |
| inductor_seeds_default: "i64[1][1]cuda:0" = torch.ops.prims.inductor_seeds.default(1, device(type='cuda', index=0)) | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/ops/topk_topp_sampler.py:312 in random_sample, code: q.exponential_() | |
| inductor_lookup_seed_default: "i64[][]cuda:0" = torch.ops.prims.inductor_lookup_seed.default(inductor_seeds_default, 0); inductor_seeds_default = None | |
| inductor_random_default: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.prims.inductor_random.default([arg0_1, 128256], inductor_lookup_seed_default, 'rand'); arg0_1 = inductor_lookup_seed_default = None | |
| ge_10: "b8[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.ge.Scalar(inductor_random_default, 0.9999999403953552) | |
| full_default: "f32[][]cuda:0" = torch.ops.aten.full.default([], -5.960464477539063e-08, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) | |
| log: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.log.default(inductor_random_default); inductor_random_default = None | |
| where: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.where.self(ge_10, full_default, log); ge_10 = full_default = log = None | |
| mul_53: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.mul.Tensor(where, -1.0); where = None | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/ops/topk_topp_sampler.py:318 in random_sample, code: return probs.div_(q).argmax(dim=-1).view(-1) | |
| div_2: "f32[s36, 128256][128256, 1]cuda:0" = torch.ops.aten.div.Tensor(div_1, mul_53); div_1 = mul_53 = None | |
| argmax: "i64[s36][1]cuda:0" = torch.ops.aten.argmax.default(div_2, -1); div_2 = None | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/sampler.py:116 in forward, code: sampled = sampled.to(torch.int32) | |
| convert_element_type_1: "i32[s36][1]cuda:0" = torch.ops.prims.convert_element_type.default(argmax, torch.int32); argmax = None | |
| # File: /home/shunting/ws/vllm/vllm/v1/sample/sampler.py:123 in forward, code: sampled_token_ids=sampled.unsqueeze(-1), | |
| unsqueeze_1: "i32[s36, 1][1, 1]cuda:0" = torch.ops.aten.unsqueeze.default(convert_element_type_1, -1); convert_element_type_1 = None | |
| return (unsqueeze_1,) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment