vndee · November 2, 2024 10:27
diff --git a/token_sampling_03.py b/token_sampling_03.py
 def generate(
    self,
    prompt_tokens: List[List[int]],
    max_gen_len: int,
    temperature: float = 0.6,
    top_p: float = 0.9,
    logprobs: bool = False,
    echo: bool = False,
 ) -> Tuple[List[List[int]], Optional[List[List[float]]]]:
    # ....
    # ....
    
    for cur_pos in range(min_prompt_len, total_len):
        logits = self.model.forward(tokens[:, prev_pos:cur_pos], prev_pos)
        if temperature > 0:
            probs = torch.softmax(logits[:, -1] / temperature, dim=-1)
            next_token = sample_top_p(probs, top_p)
        else:
            next_token = torch.argmax(logits[:, -1], dim=-1)
            
    # ....
    # ....
            
 def sample_top_p(probs, p):
    """
    Perform top-p (nucleus) sampling on a probability distribution.

    Args:
        probs (torch.Tensor): Probability distribution tensor.
        p (float): Probability threshold for top-p sampling.

    Returns:
        torch.Tensor: Sampled token indices.

    Note:
        Top-p sampling selects the smallest set of tokens whose cumulative probability mass
        exceeds the threshold p. The distribution is renormalized based on the selected tokens.

    """
    probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
    probs_sum = torch.cumsum(probs_sort, dim=-1)
    mask = probs_sum - probs_sort > p
    probs_sort[mask] = 0.0
    probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
    next_token = torch.multinomial(probs_sort, num_samples=1)
    next_token = torch.gather(probs_idx, -1, next_token)
    return next_token
	def generate(
	self,
	prompt_tokens: List[List[int]],
	max_gen_len: int,
	temperature: float = 0.6,
	top_p: float = 0.9,
	logprobs: bool = False,
	echo: bool = False,
	) -> Tuple[List[List[int]], Optional[List[List[float]]]]:
	# ....
	# ....

	for cur_pos in range(min_prompt_len, total_len):
	logits = self.model.forward(tokens[:, prev_pos:cur_pos], prev_pos)
	if temperature > 0:
	probs = torch.softmax(logits[:, -1] / temperature, dim=-1)
	next_token = sample_top_p(probs, top_p)
	else:
	next_token = torch.argmax(logits[:, -1], dim=-1)

	# ....
	# ....

	def sample_top_p(probs, p):
	"""
	Perform top-p (nucleus) sampling on a probability distribution.

	Args:
	probs (torch.Tensor): Probability distribution tensor.
	p (float): Probability threshold for top-p sampling.

	Returns:
	torch.Tensor: Sampled token indices.

	Note:
	Top-p sampling selects the smallest set of tokens whose cumulative probability mass
	exceeds the threshold p. The distribution is renormalized based on the selected tokens.

	"""
	probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
	probs_sum = torch.cumsum(probs_sort, dim=-1)
	mask = probs_sum - probs_sort > p
	probs_sort[mask] = 0.0
	probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
	next_token = torch.multinomial(probs_sort, num_samples=1)
	next_token = torch.gather(probs_idx, -1, next_token)
	return next_token