Hellisotherpeople · August 12, 2025 21:18 · abansinsi · Apr 30, 2023 · JJC-code · May 13, 2023
diff --git a/blog.md b/blog.md
diff --git a/prompt_alternating_llm.py b/prompt_alternating_llm.py
 ### Implementation of Prompt Alternating for LLMs


 import torch
 from transformers import GPT2LMHeadModel, GPT2Tokenizer

 def prompt_alternating(prompt, insert_position, alternate_prompts, num_tokens):
    model = GPT2LMHeadModel.from_pretrained("gpt2")
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

    prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
    output_tokens = prompt_tokens.clone()
    
    for _ in range(num_tokens):
        alternate_index = len(output_tokens[0]) % len(alternate_prompts)
        alternate = alternate_prompts[alternate_index]
        
        alternate_tokens = tokenizer.encode(alternate, return_tensors="pt")

        print(prompt_tokens[:, :insert_position])
        print(alternate_tokens)
        print(output_tokens[:, insert_position:])
        
        input_ids = torch.cat((prompt_tokens[:, :insert_position], alternate_tokens, output_tokens[:, insert_position:]), dim=-1)
        next_token = model.generate(input_ids, max_length=input_ids.shape[1] + 1, do_sample = True)[:, -1].unsqueeze(0)
        
        output_tokens = torch.cat((output_tokens, next_token), dim=-1)
        
    generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    return generated_text

 prompt = "This is a apple"
 insert_position = 3
 alternate_prompts = ["blue", "red", "yellow"]
 num_tokens = 100

 result = prompt_alternating(prompt, insert_position, alternate_prompts, num_tokens)
 print(result)
diff --git a/prompt_weighting_llm.py b/prompt_weighting_llm.py
 ### Implementing Automatic1111 style attention weights
 ### Note, GPT2 is very tempermental with this technique, seems to need a high temperature for even close to coherent output

 import re
 import torch
 from transformers import GPT2LMHeadModel, GPT2Tokenizer

 def modify_attention_mask(prompt, model, tokenizer):
    tokens = []
    attention_modifiers = []
    add_space = False

    for token in re.split(r'\(|\)', prompt):
        if ':' in token:
            word, modifier = token.split(':')
            modifier = float(modifier.strip())
        else:
            word = token.strip()
            modifier = 1.0

        current_tokens = tokenizer.tokenize(word)
        if add_space and current_tokens:
            tokens.append('Ġ')  # Space token for GPT-2
            attention_modifiers.append(1.0)
        tokens.extend(current_tokens)
        attention_modifiers.extend([modifier] * len(current_tokens))
        add_space = True

    attention_mask = torch.tensor([attention_modifiers])
    input_ids = torch.tensor([tokenizer.convert_tokens_to_ids(tokens)])

    return input_ids, attention_mask


 def custom_generate(prompt, model, tokenizer, **kwargs):
    input_ids, attention_mask = modify_attention_mask(prompt, model, tokenizer)
    print(attention_mask)

    # Set the modified attention mask
    model.config.attention_probs_dropout_prob = 0.0

    with torch.no_grad():
        output_sequences = model.generate(input_ids=input_ids, attention_mask=attention_mask, **kwargs)

    return tokenizer.decode(output_sequences[0], skip_special_tokens=True)

 model_name = "gpt2"
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 model = GPT2LMHeadModel.from_pretrained(model_name)

 prompt = "The (large house:1.0001) was situated on a hill. The buildings were made in an enormous block by the three towers of the four houses, with high ceilings of over one hundred and eight inches. They were built with stones and wood and all are from small scale timber."

 generated_text = custom_generate(prompt, model, tokenizer, do_sample = True, temperature = 20.0, max_length=200)
 print(generated_text)
diff --git a/weighted_average_token_embeddings_llm.py b/weighted_average_token_embeddings_llm.py
 ### Implementing of Prompt Blending for a LLM

 import torch
 from transformers import AutoTokenizer, AutoModelWithLMHead

 tokenizer = AutoTokenizer.from_pretrained('gpt2-xl')
 model = AutoModelWithLMHead.from_pretrained('gpt2-xl', device_map='auto')

 # Tokenize the entire prompt
 prompt = "I am eating today "
 input_ids = tokenizer.encode(prompt, return_tensors='pt')

 # Get the embeddings for the entire prompt
 all_embeddings = model.transformer.wte(input_ids)

 # List of sequences to average
 sequences = ["delicious chow mein", "delicious ice cream", "tasty pizza"]

 # List of weights for each sequence
 weights = [0.6, 0.3, 0.1]
 assert len(sequences) == len(weights), "Weights and sequences must have the same length."

 # Tokenize and retrieve the embeddings for the sequences
 sequence_embeddings = []
 for seq in sequences:
    input_ids_seq = tokenizer.encode(seq, return_tensors='pt')
    embeddings_seq = model.transformer.wte(input_ids_seq)
    sequence_embeddings.append(embeddings_seq.mean(dim=1))

 # Calculate the weighted average embeddings for the desired sequences
 weights_tensor = torch.tensor(weights).view(-1, 1, 1).to(all_embeddings.device)
 weighted_embeddings = torch.stack(sequence_embeddings, dim=0) * weights_tensor
 average_embedding = weighted_embeddings.sum(dim=0)

 # Insert position for the averaged embeddings in the prompt
 insert_position = 3

 # Concatenate the averaged embeddings with the prompt embeddings at the specified position
 modified_embeddings = torch.cat([all_embeddings[:, :insert_position], average_embedding.unsqueeze(1), all_embeddings[:, insert_position:]], dim=1)

 # Use the modified embeddings as input
 output = model.generate(inputs_embeds=modified_embeddings, do_sample=True, max_length=100)
 decoded_output = tokenizer.decode(output[0])

 print(decoded_output)
	### Implementation of Prompt Alternating for LLMs


	import torch
	from transformers import GPT2LMHeadModel, GPT2Tokenizer

	def prompt_alternating(prompt, insert_position, alternate_prompts, num_tokens):
	model = GPT2LMHeadModel.from_pretrained("gpt2")
	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

	prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
	output_tokens = prompt_tokens.clone()

	for _ in range(num_tokens):
	alternate_index = len(output_tokens[0]) % len(alternate_prompts)
	alternate = alternate_prompts[alternate_index]

	alternate_tokens = tokenizer.encode(alternate, return_tensors="pt")

	print(prompt_tokens[:, :insert_position])
	print(alternate_tokens)
	print(output_tokens[:, insert_position:])

	input_ids = torch.cat((prompt_tokens[:, :insert_position], alternate_tokens, output_tokens[:, insert_position:]), dim=-1)
	next_token = model.generate(input_ids, max_length=input_ids.shape[1] + 1, do_sample = True)[:, -1].unsqueeze(0)

	output_tokens = torch.cat((output_tokens, next_token), dim=-1)

	generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
	return generated_text

	prompt = "This is a apple"
	insert_position = 3
	alternate_prompts = ["blue", "red", "yellow"]
	num_tokens = 100

	result = prompt_alternating(prompt, insert_position, alternate_prompts, num_tokens)
	print(result)
	### Implementing Automatic1111 style attention weights
	### Note, GPT2 is very tempermental with this technique, seems to need a high temperature for even close to coherent output

	import re
	import torch
	from transformers import GPT2LMHeadModel, GPT2Tokenizer

	def modify_attention_mask(prompt, model, tokenizer):
	tokens = []
	attention_modifiers = []
	add_space = False

	for token in re.split(r'\(\|\)', prompt):
	if ':' in token:
	word, modifier = token.split(':')
	modifier = float(modifier.strip())
	else:
	word = token.strip()
	modifier = 1.0

	current_tokens = tokenizer.tokenize(word)
	if add_space and current_tokens:
	tokens.append('Ġ') # Space token for GPT-2
	attention_modifiers.append(1.0)
	tokens.extend(current_tokens)
	attention_modifiers.extend([modifier] * len(current_tokens))
	add_space = True

	attention_mask = torch.tensor([attention_modifiers])
	input_ids = torch.tensor([tokenizer.convert_tokens_to_ids(tokens)])

	return input_ids, attention_mask


	def custom_generate(prompt, model, tokenizer, **kwargs):
	input_ids, attention_mask = modify_attention_mask(prompt, model, tokenizer)
	print(attention_mask)

	# Set the modified attention mask
	model.config.attention_probs_dropout_prob = 0.0

	with torch.no_grad():
	output_sequences = model.generate(input_ids=input_ids, attention_mask=attention_mask, **kwargs)

	return tokenizer.decode(output_sequences[0], skip_special_tokens=True)

	model_name = "gpt2"
	tokenizer = GPT2Tokenizer.from_pretrained(model_name)
	model = GPT2LMHeadModel.from_pretrained(model_name)

	prompt = "The (large house:1.0001) was situated on a hill. The buildings were made in an enormous block by the three towers of the four houses, with high ceilings of over one hundred and eight inches. They were built with stones and wood and all are from small scale timber."

	generated_text = custom_generate(prompt, model, tokenizer, do_sample = True, temperature = 20.0, max_length=200)
	print(generated_text)
	### Implementing of Prompt Blending for a LLM

	import torch
	from transformers import AutoTokenizer, AutoModelWithLMHead

	tokenizer = AutoTokenizer.from_pretrained('gpt2-xl')
	model = AutoModelWithLMHead.from_pretrained('gpt2-xl', device_map='auto')

	# Tokenize the entire prompt
	prompt = "I am eating today "
	input_ids = tokenizer.encode(prompt, return_tensors='pt')

	# Get the embeddings for the entire prompt
	all_embeddings = model.transformer.wte(input_ids)

	# List of sequences to average
	sequences = ["delicious chow mein", "delicious ice cream", "tasty pizza"]

	# List of weights for each sequence
	weights = [0.6, 0.3, 0.1]
	assert len(sequences) == len(weights), "Weights and sequences must have the same length."

	# Tokenize and retrieve the embeddings for the sequences
	sequence_embeddings = []
	for seq in sequences:
	input_ids_seq = tokenizer.encode(seq, return_tensors='pt')
	embeddings_seq = model.transformer.wte(input_ids_seq)
	sequence_embeddings.append(embeddings_seq.mean(dim=1))

	# Calculate the weighted average embeddings for the desired sequences
	weights_tensor = torch.tensor(weights).view(-1, 1, 1).to(all_embeddings.device)
	weighted_embeddings = torch.stack(sequence_embeddings, dim=0) * weights_tensor
	average_embedding = weighted_embeddings.sum(dim=0)

	# Insert position for the averaged embeddings in the prompt
	insert_position = 3

	# Concatenate the averaged embeddings with the prompt embeddings at the specified position
	modified_embeddings = torch.cat([all_embeddings[:, :insert_position], average_embedding.unsqueeze(1), all_embeddings[:, insert_position:]], dim=1)

	# Use the modified embeddings as input
	output = model.generate(inputs_embeds=modified_embeddings, do_sample=True, max_length=100)
	decoded_output = tokenizer.decode(output[0])

	print(decoded_output)