imohitmayank · November 5, 2025 15:18
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/env python3
 """
 Script to test the size of saved models with different LoRA configurations:
 1. Normal LoRA (attention layers only)
 2. LoRA with embedding space adapter (modules_to_save)
 3. LoRA with trainable_token_indices (specific token indices)
 4. LoRA with new tokens added (train only new tokens)

 Requirements:
    pip install transformers peft torch
 """

 import os
 import shutil
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 try:
    from peft import LoraConfig, get_peft_model, TaskType
 except ImportError:
    raise ImportError("PEFT library is required. Install it with: pip install peft")
 import json


 def get_directory_size(directory_path):
    """Calculate the total size of a directory in bytes."""
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(directory_path):
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            if os.path.exists(filepath):
                total_size += os.path.getsize(filepath)
    return total_size


 def format_size(size_bytes):
    """Format bytes to human-readable format."""
    for unit in ['B', 'KB', 'MB', 'GB']:
        if size_bytes < 1024.0:
            return f"{size_bytes:.2f} {unit}"
        size_bytes /= 1024.0
    return f"{size_bytes:.2f} TB"


 def print_model_info(model, tokenizer, save_path):
    """Print information about the model and its saved size."""
    print("\n" + "="*60)
    print(f"Model saved at: {save_path}")
    print("="*60)
    
    # Model parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print("\n📊 Model Statistics:")
    print(f"  Total parameters: {total_params:,}")
    print(f"  Trainable parameters: {trainable_params:,}")
    print(f"  Vocabulary size: {len(tokenizer):,}")
    
    # File sizes
    if os.path.exists(save_path):
        dir_size = get_directory_size(save_path)
        print("\n💾 Saved Model Size:")
        print(f"  Total size: {format_size(dir_size)}")
        
        # List individual files
        print("\n📁 Files in saved directory:")
        for root, dirs, files in os.walk(save_path):
            for file in files:
                file_path = os.path.join(root, file)
                file_size = os.path.getsize(file_path)
                rel_path = os.path.relpath(file_path, save_path)
                print(f"    {rel_path}: {format_size(file_size)}")
    
    # Print trainable parameters breakdown
    print("\n🔧 Trainable Parameters Breakdown:")
    trainable_count = 0
    for name, param in model.named_parameters():
        if param.requires_grad:
            trainable_count += param.numel()
            print(f"    {name}: {param.shape} ({param.numel():,} params)")
    
    print("\n" + "="*60)


 def setup_normal_lora(model, tokenizer):
    """Setup normal LoRA on attention layers only."""
    print("\n" + "="*60)
    print("Setting up NORMAL LoRA (attention layers only)")
    print("="*60)
    
    lora_config = LoraConfig(
        r=16,  # LoRA attention dimension
        lora_alpha=32,  # Alpha parameter for LoRA scaling
        lora_dropout=0.05,  # Dropout probability for LoRA layers
        bias="none",  # Bias type for LoRA
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj", "v_proj"],  # Standard attention layers
    )
    
    print("\nLoRA Configuration:")
    print(f"  r (rank): {lora_config.r}")
    print(f"  lora_alpha: {lora_config.lora_alpha}")
    print(f"  lora_dropout: {lora_config.lora_dropout}")
    print(f"  target_modules: {lora_config.target_modules}")
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    
    return model, lora_config


 def setup_embedding_lora(model, tokenizer):
    """Setup LoRA with embedding space adapter using modules_to_save."""
    print("\n" + "="*60)
    print("Setting up LoRA with EMBEDDING SPACE ADAPTER (modules_to_save)")
    print("="*60)
    
    # Get the original vocabulary size to identify new tokens
    original_vocab_size = len(tokenizer)
    
    # Option 1: Use modules_to_save to train embedding layer
    # This trains the entire embedding layer
    
    lora_config = LoraConfig(
        r=16,  # LoRA attention dimension
        lora_alpha=32,  # Alpha parameter for LoRA scaling
        lora_dropout=0.05,  # Dropout probability for LoRA layers
        bias="none",  # Bias type for LoRA
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj", "v_proj"],  # Still target attention layers
        modules_to_save=["embed_tokens"],  # Also train the embedding layer
    )
    
    print("\nLoRA Configuration:")
    print(f"  r (rank): {lora_config.r}")
    print(f"  lora_alpha: {lora_config.lora_alpha}")
    print(f"  lora_dropout: {lora_config.lora_dropout}")
    print(f"  target_modules: {lora_config.target_modules}")
    print(f"  modules_to_save: {lora_config.modules_to_save}")
    print(f"  Original vocab size: {original_vocab_size:,}")
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    
    return model, lora_config


 def setup_embedding_lora_trainable_indices(model, tokenizer):
    """Setup LoRA with embedding space adapter using trainable_token_indices."""
    print("\n" + "="*60)
    print("Setting up LoRA with EMBEDDING SPACE ADAPTER (trainable_token_indices)")
    print("="*60)
    
    # Get the original vocabulary size to identify new tokens
    vocab_size = len(tokenizer)
    
    # Option 2: Use trainable_token_indices for specific tokens
    # Train only a subset of tokens (e.g., last 10% or specific range)
    # This is useful when you've added new tokens and only want to train those
    # For demonstration, we'll train the last 10% of tokens
    trainable_indices_start = int(vocab_size * 0.9)  # Last 10% of tokens
    trainable_indices = list(range(trainable_indices_start, vocab_size-1))
    
    print(f"\n  Vocabulary size: {vocab_size:,}")
    print(f"  Training tokens from index {trainable_indices_start} to {vocab_size-1}")
    print(f"  Number of trainable token indices: {len(trainable_indices):,}")
    
    lora_config = LoraConfig(
        r=16,  # LoRA attention dimension
        lora_alpha=32,  # Alpha parameter for LoRA scaling
        lora_dropout=0.05,  # Dropout probability for LoRA layers
        bias="none",  # Bias type for LoRA
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj", "v_proj"],  # Still target attention layers
        trainable_token_indices={"embed_tokens": trainable_indices},  # Train specific token indices
    )
    
    print("\nLoRA Configuration:")
    print(f"  r (rank): {lora_config.r}")
    print(f"  lora_alpha: {lora_config.lora_alpha}")
    print(f"  lora_dropout: {lora_config.lora_dropout}")
    print(f"  target_modules: {lora_config.target_modules}")
    print(f"  trainable_token_indices: {len(trainable_indices):,} tokens")
    print(f"    (indices {trainable_indices_start} to {vocab_size-1})")
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    
    # Store trainable_indices for later use in summary
    lora_config.trainable_indices_list = trainable_indices
    
    return model, lora_config


 def setup_new_tokens_lora(model, tokenizer):
    """Setup LoRA with new tokens added to tokenizer and model, train only new tokens."""
    print("\n" + "="*60)
    print("Setting up LoRA with NEW TOKENS (train only new tokens)")
    print("="*60)
    
    # Store original vocabulary size
    original_vocab_size = len(tokenizer)
    print(f"\n  Original vocabulary size: {original_vocab_size:,}")
    
    # Add new tokens to tokenizer
    # For demonstration, add some example tokens (e.g., special tokens for audio codec)
    new_tokens = [
        "<audio_start>", "<audio_end>", "<layer_sep>",
        "<snac_pad>", "<snac_token_1>", "<snac_token_2>",
        "<snac_token_3>", "<snac_token_4>", "<snac_token_5>",
        "<snac_token_6>", "<snac_token_7>", "<snac_token_8>",
        "<snac_token_9>", "<snac_token_10>", "<snac_token_11>",
        "<snac_token_12>", "<snac_token_13>", "<snac_token_14>",
        "<snac_token_15>", "<snac_token_16>", "<snac_token_17>",
        "<snac_token_18>", "<snac_token_19>", "<snac_token_20>",
    ]
    
    # Add tokens to tokenizer
    num_added = tokenizer.add_tokens(new_tokens)
    print(f"  Added {num_added} new tokens to tokenizer")
    print(f"  New vocabulary size: {len(tokenizer):,}")
    
    # Resize model embeddings to accommodate new tokens
    model.resize_token_embeddings(len(tokenizer))
    print(f"  Resized model embeddings to {len(tokenizer):,} tokens")
    
    # Get the indices of the newly added tokens
    new_token_indices = list(range(original_vocab_size, len(tokenizer)))
    print(f"  New token indices: {original_vocab_size} to {len(tokenizer)-1}")
    print(f"  Number of new token indices: {len(new_token_indices):,}")
    
    # Setup LoRA with trainable_token_indices for only the new tokens
    lora_config = LoraConfig(
        r=16,  # LoRA attention dimension
        lora_alpha=32,  # Alpha parameter for LoRA scaling
        lora_dropout=0.05,  # Dropout probability for LoRA layers
        bias="none",  # Bias type for LoRA
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj", "v_proj"],  # Still target attention layers
        trainable_token_indices={"embed_tokens": new_token_indices},  # Train only new tokens
    )
    
    print("\nLoRA Configuration:")
    print(f"  r (rank): {lora_config.r}")
    print(f"  lora_alpha: {lora_config.lora_alpha}")
    print(f"  lora_dropout: {lora_config.lora_dropout}")
    print(f"  target_modules: {lora_config.target_modules}")
    print(f"  trainable_token_indices: {len(new_token_indices):,} new tokens")
    print(f"    (indices {original_vocab_size} to {len(tokenizer)-1})")
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    
    # Store new_token_indices for later use in summary
    lora_config.new_token_indices_list = new_token_indices
    lora_config.original_vocab_size = original_vocab_size
    lora_config.num_new_tokens = num_added
    
    return model, lora_config


 def main():
    """Main function to test model sizes."""
    print("="*60)
    print("MODEL SIZE TESTING SCRIPT")
    print("="*60)
    
    # Configuration
    # Try different gemma-3 variants, fallback to gemma-2
    model_candidates = [
        "google/gemma-3-270m",
        # "google/gemma-3-2b",
        # "google/gemma-2-2b",
        # "google/gemma-2-9b",
    ]
    base_output_dir = "./test_model_sizes"
    
    # Try to load gemma-3 (or fallback to gemma-2)
    model_name_used = None
    model = None
    tokenizer = None
    
    for candidate in model_candidates:
        try:
            print(f"\n🔍 Attempting to load: {candidate}")
            model = AutoModelForCausalLM.from_pretrained(
                candidate,
                torch_dtype=torch.bfloat16,
                trust_remote_code=True,
                device_map="cpu"
            )
            tokenizer = AutoTokenizer.from_pretrained(
                candidate,
                trust_remote_code=True
            )
            model_name_used = candidate
            print(f"✅ Successfully loaded: {candidate}")
            break
        except Exception as e:
            print(f"⚠️  Could not load {candidate}: {e}")
            continue
    
    if model is None or tokenizer is None:
        raise RuntimeError(f"Failed to load any model from candidates: {model_candidates}")
    
    # Add padding token if not present
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id
    
    print(f"\n✅ Loaded model: {model_name_used}")
    print(f"   Vocabulary size: {len(tokenizer):,}")
    
    # Clean and remove existing output directory if it exists
    if os.path.exists(base_output_dir):
        print(f"\n🧹 Cleaning existing output directory: {base_output_dir}")
        shutil.rmtree(base_output_dir)
        print("✅ Removed existing directory")
    
    # Create output directory
    os.makedirs(base_output_dir, exist_ok=True)
    
    # Test 1: Normal LoRA
    print("\n" + "="*60)
    print("TEST 1: NORMAL LoRA (Attention Layers Only)")
    print("="*60)
    
    # Load fresh model for test 1
    model1 = AutoModelForCausalLM.from_pretrained(
        model_name_used,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="cpu"
    )
    tokenizer1 = AutoTokenizer.from_pretrained(
        model_name_used,
        trust_remote_code=True
    )
    if tokenizer1.pad_token is None:
        tokenizer1.pad_token = tokenizer1.eos_token
        tokenizer1.pad_token_id = tokenizer1.eos_token_id
    
    model1, lora_config1 = setup_normal_lora(model1, tokenizer1)
    
    save_path1 = os.path.join(base_output_dir, "normal_lora")
    print(f"\n💾 Saving normal LoRA model to: {save_path1}")
    model1.save_pretrained(save_path1)
    tokenizer1.save_pretrained(save_path1)
    
    print_model_info(model1, tokenizer1, save_path1)
    
    # Test 2: LoRA with Embedding Adapter
    print("\n" + "="*60)
    print("TEST 2: LoRA with EMBEDDING SPACE ADAPTER")
    print("="*60)
    
    # Load fresh model for test 2
    model2 = AutoModelForCausalLM.from_pretrained(
        model_name_used,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="cpu"
    )
    tokenizer2 = AutoTokenizer.from_pretrained(
        model_name_used,
        trust_remote_code=True
    )
    if tokenizer2.pad_token is None:
        tokenizer2.pad_token = tokenizer2.eos_token
        tokenizer2.pad_token_id = tokenizer2.eos_token_id
    
    model2, lora_config2 = setup_embedding_lora(model2, tokenizer2)
    
    save_path2 = os.path.join(base_output_dir, "embedding_lora")
    print(f"\n💾 Saving embedding LoRA model to: {save_path2}")
    model2.save_pretrained(save_path2)
    tokenizer2.save_pretrained(save_path2)
    
    print_model_info(model2, tokenizer2, save_path2)
    
    # Test 3: LoRA with Trainable Token Indices
    print("\n" + "="*60)
    print("TEST 3: LoRA with TRAINABLE TOKEN INDICES")
    print("="*60)
    
    # Load fresh model for test 3
    model3 = AutoModelForCausalLM.from_pretrained(
        model_name_used,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="cpu"
    )
    tokenizer3 = AutoTokenizer.from_pretrained(
        model_name_used,
        trust_remote_code=True
    )
    if tokenizer3.pad_token is None:
        tokenizer3.pad_token = tokenizer3.eos_token
        tokenizer3.pad_token_id = tokenizer3.eos_token_id
    
    model3, lora_config3 = setup_embedding_lora_trainable_indices(model3, tokenizer3)
    
    save_path3 = os.path.join(base_output_dir, "trainable_indices_lora")
    print(f"\n💾 Saving trainable indices LoRA model to: {save_path3}")
    model3.save_pretrained(save_path3)
    tokenizer3.save_pretrained(save_path3)
    
    print_model_info(model3, tokenizer3, save_path3)
    
    # Test 4: LoRA with New Tokens Added
    print("\n" + "="*60)
    print("TEST 4: LoRA with NEW TOKENS ADDED")
    print("="*60)
    
    # Load fresh model for test 4
    model4 = AutoModelForCausalLM.from_pretrained(
        model_name_used,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="cpu"
    )
    tokenizer4 = AutoTokenizer.from_pretrained(
        model_name_used,
        trust_remote_code=True
    )
    if tokenizer4.pad_token is None:
        tokenizer4.pad_token = tokenizer4.eos_token
        tokenizer4.pad_token_id = tokenizer4.eos_token_id
    
    model4, lora_config4 = setup_new_tokens_lora(model4, tokenizer4)
    
    save_path4 = os.path.join(base_output_dir, "new_tokens_lora")
    print(f"\n💾 Saving new tokens LoRA model to: {save_path4}")
    model4.save_pretrained(save_path4)
    tokenizer4.save_pretrained(save_path4)
    
    print_model_info(model4, tokenizer4, save_path4)
    
    # Comparison
    print("\n" + "="*60)
    print("SIZE COMPARISON")
    print("="*60)
    
    size1 = get_directory_size(save_path1)
    size2 = get_directory_size(save_path2)
    size3 = get_directory_size(save_path3)
    size4 = get_directory_size(save_path4)
    
    print("\n📊 Model Size Comparison:")
    print(f"  Normal LoRA:                    {format_size(size1)}")
    print(f"  Embedding LoRA (modules_to_save): {format_size(size2)}")
    print(f"  Embedding LoRA (trainable_indices): {format_size(size3)}")
    print(f"  New Tokens LoRA:                 {format_size(size4)}")
    
    sizes = [
        ("Normal LoRA", size1), 
        ("Embedding LoRA (modules_to_save)", size2), 
        ("Embedding LoRA (trainable_indices)", size3),
        ("New Tokens LoRA", size4)
    ]
    sizes_sorted = sorted(sizes, key=lambda x: x[1])
    
    print("\n📈 Size Ranking (smallest to largest):")
    for i, (name, size) in enumerate(sizes_sorted, 1):
        print(f"  {i}. {name}: {format_size(size)}")
    
    if size2 > size1:
        print(f"\n  Embedding LoRA (modules_to_save) is {size2/size1:.2f}x larger than Normal LoRA")
    if size3 > size1:
        print(f"  Embedding LoRA (trainable_indices) is {size3/size1:.2f}x larger than Normal LoRA")
    if size4 > size1:
        print(f"  New Tokens LoRA is {size4/size1:.2f}x larger than Normal LoRA")
    if size3 > size2:
        print(f"  Embedding LoRA (trainable_indices) is {size3/size2:.2f}x larger than modules_to_save")
    elif size2 > size3:
        print(f"  Embedding LoRA (modules_to_save) is {size2/size3:.2f}x larger than trainable_indices")
    if size4 > size3:
        print(f"  New Tokens LoRA is {size4/size3:.2f}x larger than trainable_indices")
    elif size3 > size4:
        print(f"  Embedding LoRA (trainable_indices) is {size3/size4:.2f}x larger than New Tokens LoRA")
    
    print("\n" + "="*60)
    print("TESTING COMPLETE")
    print("="*60)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Script to test the size of saved models with different LoRA configurations:
	1. Normal LoRA (attention layers only)
	2. LoRA with embedding space adapter (modules_to_save)
	3. LoRA with trainable_token_indices (specific token indices)
	4. LoRA with new tokens added (train only new tokens)

	Requirements:
	pip install transformers peft torch
	"""

	import os
	import shutil
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	try:
	from peft import LoraConfig, get_peft_model, TaskType
	except ImportError:
	raise ImportError("PEFT library is required. Install it with: pip install peft")
	import json


	def get_directory_size(directory_path):
	"""Calculate the total size of a directory in bytes."""
	total_size = 0
	for dirpath, dirnames, filenames in os.walk(directory_path):
	for filename in filenames:
	filepath = os.path.join(dirpath, filename)
	if os.path.exists(filepath):
	total_size += os.path.getsize(filepath)
	return total_size


	def format_size(size_bytes):
	"""Format bytes to human-readable format."""
	for unit in ['B', 'KB', 'MB', 'GB']:
	if size_bytes < 1024.0:
	return f"{size_bytes:.2f} {unit}"
	size_bytes /= 1024.0
	return f"{size_bytes:.2f} TB"


	def print_model_info(model, tokenizer, save_path):
	"""Print information about the model and its saved size."""
	print("\n" + "="*60)
	print(f"Model saved at: {save_path}")
	print("="*60)

	# Model parameters
	total_params = sum(p.numel() for p in model.parameters())
	trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

	print("\n📊 Model Statistics:")
	print(f" Total parameters: {total_params:,}")
	print(f" Trainable parameters: {trainable_params:,}")
	print(f" Vocabulary size: {len(tokenizer):,}")

	# File sizes
	if os.path.exists(save_path):
	dir_size = get_directory_size(save_path)
	print("\n💾 Saved Model Size:")
	print(f" Total size: {format_size(dir_size)}")

	# List individual files
	print("\n📁 Files in saved directory:")
	for root, dirs, files in os.walk(save_path):
	for file in files:
	file_path = os.path.join(root, file)
	file_size = os.path.getsize(file_path)
	rel_path = os.path.relpath(file_path, save_path)
	print(f" {rel_path}: {format_size(file_size)}")

	# Print trainable parameters breakdown
	print("\n🔧 Trainable Parameters Breakdown:")
	trainable_count = 0
	for name, param in model.named_parameters():
	if param.requires_grad:
	trainable_count += param.numel()
	print(f" {name}: {param.shape} ({param.numel():,} params)")

	print("\n" + "="*60)


	def setup_normal_lora(model, tokenizer):
	"""Setup normal LoRA on attention layers only."""
	print("\n" + "="*60)
	print("Setting up NORMAL LoRA (attention layers only)")
	print("="*60)

	lora_config = LoraConfig(
	r=16, # LoRA attention dimension
	lora_alpha=32, # Alpha parameter for LoRA scaling
	lora_dropout=0.05, # Dropout probability for LoRA layers
	bias="none", # Bias type for LoRA
	task_type=TaskType.CAUSAL_LM,
	target_modules=["q_proj", "v_proj"], # Standard attention layers
	)

	print("\nLoRA Configuration:")
	print(f" r (rank): {lora_config.r}")
	print(f" lora_alpha: {lora_config.lora_alpha}")
	print(f" lora_dropout: {lora_config.lora_dropout}")
	print(f" target_modules: {lora_config.target_modules}")

	model = get_peft_model(model, lora_config)
	model.print_trainable_parameters()

	return model, lora_config


	def setup_embedding_lora(model, tokenizer):
	"""Setup LoRA with embedding space adapter using modules_to_save."""
	print("\n" + "="*60)
	print("Setting up LoRA with EMBEDDING SPACE ADAPTER (modules_to_save)")
	print("="*60)

	# Get the original vocabulary size to identify new tokens
	original_vocab_size = len(tokenizer)

	# Option 1: Use modules_to_save to train embedding layer
	# This trains the entire embedding layer

	lora_config = LoraConfig(
	r=16, # LoRA attention dimension
	lora_alpha=32, # Alpha parameter for LoRA scaling
	lora_dropout=0.05, # Dropout probability for LoRA layers
	bias="none", # Bias type for LoRA
	task_type=TaskType.CAUSAL_LM,
	target_modules=["q_proj", "v_proj"], # Still target attention layers
	modules_to_save=["embed_tokens"], # Also train the embedding layer
	)

	print("\nLoRA Configuration:")
	print(f" r (rank): {lora_config.r}")
	print(f" lora_alpha: {lora_config.lora_alpha}")
	print(f" lora_dropout: {lora_config.lora_dropout}")
	print(f" target_modules: {lora_config.target_modules}")
	print(f" modules_to_save: {lora_config.modules_to_save}")
	print(f" Original vocab size: {original_vocab_size:,}")

	model = get_peft_model(model, lora_config)
	model.print_trainable_parameters()

	return model, lora_config


	def setup_embedding_lora_trainable_indices(model, tokenizer):
	"""Setup LoRA with embedding space adapter using trainable_token_indices."""
	print("\n" + "="*60)
	print("Setting up LoRA with EMBEDDING SPACE ADAPTER (trainable_token_indices)")
	print("="*60)

	# Get the original vocabulary size to identify new tokens
	vocab_size = len(tokenizer)

	# Option 2: Use trainable_token_indices for specific tokens
	# Train only a subset of tokens (e.g., last 10% or specific range)
	# This is useful when you've added new tokens and only want to train those
	# For demonstration, we'll train the last 10% of tokens
	trainable_indices_start = int(vocab_size * 0.9) # Last 10% of tokens
	trainable_indices = list(range(trainable_indices_start, vocab_size-1))

	print(f"\n Vocabulary size: {vocab_size:,}")
	print(f" Training tokens from index {trainable_indices_start} to {vocab_size-1}")
	print(f" Number of trainable token indices: {len(trainable_indices):,}")

	lora_config = LoraConfig(
	r=16, # LoRA attention dimension
	lora_alpha=32, # Alpha parameter for LoRA scaling
	lora_dropout=0.05, # Dropout probability for LoRA layers
	bias="none", # Bias type for LoRA
	task_type=TaskType.CAUSAL_LM,
	target_modules=["q_proj", "v_proj"], # Still target attention layers
	trainable_token_indices={"embed_tokens": trainable_indices}, # Train specific token indices
	)

	print("\nLoRA Configuration:")
	print(f" r (rank): {lora_config.r}")
	print(f" lora_alpha: {lora_config.lora_alpha}")
	print(f" lora_dropout: {lora_config.lora_dropout}")
	print(f" target_modules: {lora_config.target_modules}")
	print(f" trainable_token_indices: {len(trainable_indices):,} tokens")
	print(f" (indices {trainable_indices_start} to {vocab_size-1})")

	model = get_peft_model(model, lora_config)
	model.print_trainable_parameters()

	# Store trainable_indices for later use in summary
	lora_config.trainable_indices_list = trainable_indices

	return model, lora_config


	def setup_new_tokens_lora(model, tokenizer):
	"""Setup LoRA with new tokens added to tokenizer and model, train only new tokens."""
	print("\n" + "="*60)
	print("Setting up LoRA with NEW TOKENS (train only new tokens)")
	print("="*60)

	# Store original vocabulary size
	original_vocab_size = len(tokenizer)
	print(f"\n Original vocabulary size: {original_vocab_size:,}")

	# Add new tokens to tokenizer
	# For demonstration, add some example tokens (e.g., special tokens for audio codec)
	new_tokens = [
	"<audio_start>", "<audio_end>", "<layer_sep>",
	"<snac_pad>", "<snac_token_1>", "<snac_token_2>",
	"<snac_token_3>", "<snac_token_4>", "<snac_token_5>",
	"<snac_token_6>", "<snac_token_7>", "<snac_token_8>",
	"<snac_token_9>", "<snac_token_10>", "<snac_token_11>",
	"<snac_token_12>", "<snac_token_13>", "<snac_token_14>",
	"<snac_token_15>", "<snac_token_16>", "<snac_token_17>",
	"<snac_token_18>", "<snac_token_19>", "<snac_token_20>",
	]

	# Add tokens to tokenizer
	num_added = tokenizer.add_tokens(new_tokens)
	print(f" Added {num_added} new tokens to tokenizer")
	print(f" New vocabulary size: {len(tokenizer):,}")

	# Resize model embeddings to accommodate new tokens
	model.resize_token_embeddings(len(tokenizer))
	print(f" Resized model embeddings to {len(tokenizer):,} tokens")

	# Get the indices of the newly added tokens
	new_token_indices = list(range(original_vocab_size, len(tokenizer)))
	print(f" New token indices: {original_vocab_size} to {len(tokenizer)-1}")
	print(f" Number of new token indices: {len(new_token_indices):,}")

	# Setup LoRA with trainable_token_indices for only the new tokens
	lora_config = LoraConfig(
	r=16, # LoRA attention dimension
	lora_alpha=32, # Alpha parameter for LoRA scaling
	lora_dropout=0.05, # Dropout probability for LoRA layers
	bias="none", # Bias type for LoRA
	task_type=TaskType.CAUSAL_LM,
	target_modules=["q_proj", "v_proj"], # Still target attention layers
	trainable_token_indices={"embed_tokens": new_token_indices}, # Train only new tokens
	)

	print("\nLoRA Configuration:")
	print(f" r (rank): {lora_config.r}")
	print(f" lora_alpha: {lora_config.lora_alpha}")
	print(f" lora_dropout: {lora_config.lora_dropout}")
	print(f" target_modules: {lora_config.target_modules}")
	print(f" trainable_token_indices: {len(new_token_indices):,} new tokens")
	print(f" (indices {original_vocab_size} to {len(tokenizer)-1})")

	model = get_peft_model(model, lora_config)
	model.print_trainable_parameters()

	# Store new_token_indices for later use in summary
	lora_config.new_token_indices_list = new_token_indices
	lora_config.original_vocab_size = original_vocab_size
	lora_config.num_new_tokens = num_added

	return model, lora_config


	def main():
	"""Main function to test model sizes."""
	print("="*60)
	print("MODEL SIZE TESTING SCRIPT")
	print("="*60)

	# Configuration
	# Try different gemma-3 variants, fallback to gemma-2
	model_candidates = [
	"google/gemma-3-270m",
	# "google/gemma-3-2b",
	# "google/gemma-2-2b",
	# "google/gemma-2-9b",
	]
	base_output_dir = "./test_model_sizes"

	# Try to load gemma-3 (or fallback to gemma-2)
	model_name_used = None
	model = None
	tokenizer = None

	for candidate in model_candidates:
	try:
	print(f"\n🔍 Attempting to load: {candidate}")
	model = AutoModelForCausalLM.from_pretrained(
	candidate,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="cpu"
	)
	tokenizer = AutoTokenizer.from_pretrained(
	candidate,
	trust_remote_code=True
	)
	model_name_used = candidate
	print(f"✅ Successfully loaded: {candidate}")
	break
	except Exception as e:
	print(f"⚠️ Could not load {candidate}: {e}")
	continue

	if model is None or tokenizer is None:
	raise RuntimeError(f"Failed to load any model from candidates: {model_candidates}")

	# Add padding token if not present
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.pad_token_id = tokenizer.eos_token_id

	print(f"\n✅ Loaded model: {model_name_used}")
	print(f" Vocabulary size: {len(tokenizer):,}")

	# Clean and remove existing output directory if it exists
	if os.path.exists(base_output_dir):
	print(f"\n🧹 Cleaning existing output directory: {base_output_dir}")
	shutil.rmtree(base_output_dir)
	print("✅ Removed existing directory")

	# Create output directory
	os.makedirs(base_output_dir, exist_ok=True)

	# Test 1: Normal LoRA
	print("\n" + "="*60)
	print("TEST 1: NORMAL LoRA (Attention Layers Only)")
	print("="*60)

	# Load fresh model for test 1
	model1 = AutoModelForCausalLM.from_pretrained(
	model_name_used,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="cpu"
	)
	tokenizer1 = AutoTokenizer.from_pretrained(
	model_name_used,
	trust_remote_code=True
	)
	if tokenizer1.pad_token is None:
	tokenizer1.pad_token = tokenizer1.eos_token
	tokenizer1.pad_token_id = tokenizer1.eos_token_id

	model1, lora_config1 = setup_normal_lora(model1, tokenizer1)

	save_path1 = os.path.join(base_output_dir, "normal_lora")
	print(f"\n💾 Saving normal LoRA model to: {save_path1}")
	model1.save_pretrained(save_path1)
	tokenizer1.save_pretrained(save_path1)

	print_model_info(model1, tokenizer1, save_path1)

	# Test 2: LoRA with Embedding Adapter
	print("\n" + "="*60)
	print("TEST 2: LoRA with EMBEDDING SPACE ADAPTER")
	print("="*60)

	# Load fresh model for test 2
	model2 = AutoModelForCausalLM.from_pretrained(
	model_name_used,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="cpu"
	)
	tokenizer2 = AutoTokenizer.from_pretrained(
	model_name_used,
	trust_remote_code=True
	)
	if tokenizer2.pad_token is None:
	tokenizer2.pad_token = tokenizer2.eos_token
	tokenizer2.pad_token_id = tokenizer2.eos_token_id

	model2, lora_config2 = setup_embedding_lora(model2, tokenizer2)

	save_path2 = os.path.join(base_output_dir, "embedding_lora")
	print(f"\n💾 Saving embedding LoRA model to: {save_path2}")
	model2.save_pretrained(save_path2)
	tokenizer2.save_pretrained(save_path2)

	print_model_info(model2, tokenizer2, save_path2)

	# Test 3: LoRA with Trainable Token Indices
	print("\n" + "="*60)
	print("TEST 3: LoRA with TRAINABLE TOKEN INDICES")
	print("="*60)

	# Load fresh model for test 3
	model3 = AutoModelForCausalLM.from_pretrained(
	model_name_used,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="cpu"
	)
	tokenizer3 = AutoTokenizer.from_pretrained(
	model_name_used,
	trust_remote_code=True
	)
	if tokenizer3.pad_token is None:
	tokenizer3.pad_token = tokenizer3.eos_token
	tokenizer3.pad_token_id = tokenizer3.eos_token_id

	model3, lora_config3 = setup_embedding_lora_trainable_indices(model3, tokenizer3)

	save_path3 = os.path.join(base_output_dir, "trainable_indices_lora")
	print(f"\n💾 Saving trainable indices LoRA model to: {save_path3}")
	model3.save_pretrained(save_path3)
	tokenizer3.save_pretrained(save_path3)

	print_model_info(model3, tokenizer3, save_path3)

	# Test 4: LoRA with New Tokens Added
	print("\n" + "="*60)
	print("TEST 4: LoRA with NEW TOKENS ADDED")
	print("="*60)

	# Load fresh model for test 4
	model4 = AutoModelForCausalLM.from_pretrained(
	model_name_used,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="cpu"
	)
	tokenizer4 = AutoTokenizer.from_pretrained(
	model_name_used,
	trust_remote_code=True
	)
	if tokenizer4.pad_token is None:
	tokenizer4.pad_token = tokenizer4.eos_token
	tokenizer4.pad_token_id = tokenizer4.eos_token_id

	model4, lora_config4 = setup_new_tokens_lora(model4, tokenizer4)

	save_path4 = os.path.join(base_output_dir, "new_tokens_lora")
	print(f"\n💾 Saving new tokens LoRA model to: {save_path4}")
	model4.save_pretrained(save_path4)
	tokenizer4.save_pretrained(save_path4)

	print_model_info(model4, tokenizer4, save_path4)

	# Comparison
	print("\n" + "="*60)
	print("SIZE COMPARISON")
	print("="*60)

	size1 = get_directory_size(save_path1)
	size2 = get_directory_size(save_path2)
	size3 = get_directory_size(save_path3)
	size4 = get_directory_size(save_path4)

	print("\n📊 Model Size Comparison:")
	print(f" Normal LoRA: {format_size(size1)}")
	print(f" Embedding LoRA (modules_to_save): {format_size(size2)}")
	print(f" Embedding LoRA (trainable_indices): {format_size(size3)}")
	print(f" New Tokens LoRA: {format_size(size4)}")

	sizes = [
	("Normal LoRA", size1),
	("Embedding LoRA (modules_to_save)", size2),
	("Embedding LoRA (trainable_indices)", size3),
	("New Tokens LoRA", size4)
	]
	sizes_sorted = sorted(sizes, key=lambda x: x[1])

	print("\n📈 Size Ranking (smallest to largest):")
	for i, (name, size) in enumerate(sizes_sorted, 1):
	print(f" {i}. {name}: {format_size(size)}")

	if size2 > size1:
	print(f"\n Embedding LoRA (modules_to_save) is {size2/size1:.2f}x larger than Normal LoRA")
	if size3 > size1:
	print(f" Embedding LoRA (trainable_indices) is {size3/size1:.2f}x larger than Normal LoRA")
	if size4 > size1:
	print(f" New Tokens LoRA is {size4/size1:.2f}x larger than Normal LoRA")
	if size3 > size2:
	print(f" Embedding LoRA (trainable_indices) is {size3/size2:.2f}x larger than modules_to_save")
	elif size2 > size3:
	print(f" Embedding LoRA (modules_to_save) is {size2/size3:.2f}x larger than trainable_indices")
	if size4 > size3:
	print(f" New Tokens LoRA is {size4/size3:.2f}x larger than trainable_indices")
	elif size3 > size4:
	print(f" Embedding LoRA (trainable_indices) is {size3/size4:.2f}x larger than New Tokens LoRA")

	print("\n" + "="*60)
	print("TESTING COMPLETE")
	print("="*60)


	if __name__ == "__main__":
	main()