Created
November 5, 2025 15:18
-
-
Save imohitmayank/cead7ad4a63c8770bbd5a8f48d25aeeb to your computer and use it in GitHub Desktop.
Test Different LORA Adapter Saved Sizes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Script to test the size of saved models with different LoRA configurations: | |
| 1. Normal LoRA (attention layers only) | |
| 2. LoRA with embedding space adapter (modules_to_save) | |
| 3. LoRA with trainable_token_indices (specific token indices) | |
| 4. LoRA with new tokens added (train only new tokens) | |
| Requirements: | |
| pip install transformers peft torch | |
| """ | |
| import os | |
| import shutil | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| try: | |
| from peft import LoraConfig, get_peft_model, TaskType | |
| except ImportError: | |
| raise ImportError("PEFT library is required. Install it with: pip install peft") | |
| import json | |
| def get_directory_size(directory_path): | |
| """Calculate the total size of a directory in bytes.""" | |
| total_size = 0 | |
| for dirpath, dirnames, filenames in os.walk(directory_path): | |
| for filename in filenames: | |
| filepath = os.path.join(dirpath, filename) | |
| if os.path.exists(filepath): | |
| total_size += os.path.getsize(filepath) | |
| return total_size | |
| def format_size(size_bytes): | |
| """Format bytes to human-readable format.""" | |
| for unit in ['B', 'KB', 'MB', 'GB']: | |
| if size_bytes < 1024.0: | |
| return f"{size_bytes:.2f} {unit}" | |
| size_bytes /= 1024.0 | |
| return f"{size_bytes:.2f} TB" | |
| def print_model_info(model, tokenizer, save_path): | |
| """Print information about the model and its saved size.""" | |
| print("\n" + "="*60) | |
| print(f"Model saved at: {save_path}") | |
| print("="*60) | |
| # Model parameters | |
| total_params = sum(p.numel() for p in model.parameters()) | |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| print("\n📊 Model Statistics:") | |
| print(f" Total parameters: {total_params:,}") | |
| print(f" Trainable parameters: {trainable_params:,}") | |
| print(f" Vocabulary size: {len(tokenizer):,}") | |
| # File sizes | |
| if os.path.exists(save_path): | |
| dir_size = get_directory_size(save_path) | |
| print("\n💾 Saved Model Size:") | |
| print(f" Total size: {format_size(dir_size)}") | |
| # List individual files | |
| print("\n📁 Files in saved directory:") | |
| for root, dirs, files in os.walk(save_path): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| file_size = os.path.getsize(file_path) | |
| rel_path = os.path.relpath(file_path, save_path) | |
| print(f" {rel_path}: {format_size(file_size)}") | |
| # Print trainable parameters breakdown | |
| print("\n🔧 Trainable Parameters Breakdown:") | |
| trainable_count = 0 | |
| for name, param in model.named_parameters(): | |
| if param.requires_grad: | |
| trainable_count += param.numel() | |
| print(f" {name}: {param.shape} ({param.numel():,} params)") | |
| print("\n" + "="*60) | |
| def setup_normal_lora(model, tokenizer): | |
| """Setup normal LoRA on attention layers only.""" | |
| print("\n" + "="*60) | |
| print("Setting up NORMAL LoRA (attention layers only)") | |
| print("="*60) | |
| lora_config = LoraConfig( | |
| r=16, # LoRA attention dimension | |
| lora_alpha=32, # Alpha parameter for LoRA scaling | |
| lora_dropout=0.05, # Dropout probability for LoRA layers | |
| bias="none", # Bias type for LoRA | |
| task_type=TaskType.CAUSAL_LM, | |
| target_modules=["q_proj", "v_proj"], # Standard attention layers | |
| ) | |
| print("\nLoRA Configuration:") | |
| print(f" r (rank): {lora_config.r}") | |
| print(f" lora_alpha: {lora_config.lora_alpha}") | |
| print(f" lora_dropout: {lora_config.lora_dropout}") | |
| print(f" target_modules: {lora_config.target_modules}") | |
| model = get_peft_model(model, lora_config) | |
| model.print_trainable_parameters() | |
| return model, lora_config | |
| def setup_embedding_lora(model, tokenizer): | |
| """Setup LoRA with embedding space adapter using modules_to_save.""" | |
| print("\n" + "="*60) | |
| print("Setting up LoRA with EMBEDDING SPACE ADAPTER (modules_to_save)") | |
| print("="*60) | |
| # Get the original vocabulary size to identify new tokens | |
| original_vocab_size = len(tokenizer) | |
| # Option 1: Use modules_to_save to train embedding layer | |
| # This trains the entire embedding layer | |
| lora_config = LoraConfig( | |
| r=16, # LoRA attention dimension | |
| lora_alpha=32, # Alpha parameter for LoRA scaling | |
| lora_dropout=0.05, # Dropout probability for LoRA layers | |
| bias="none", # Bias type for LoRA | |
| task_type=TaskType.CAUSAL_LM, | |
| target_modules=["q_proj", "v_proj"], # Still target attention layers | |
| modules_to_save=["embed_tokens"], # Also train the embedding layer | |
| ) | |
| print("\nLoRA Configuration:") | |
| print(f" r (rank): {lora_config.r}") | |
| print(f" lora_alpha: {lora_config.lora_alpha}") | |
| print(f" lora_dropout: {lora_config.lora_dropout}") | |
| print(f" target_modules: {lora_config.target_modules}") | |
| print(f" modules_to_save: {lora_config.modules_to_save}") | |
| print(f" Original vocab size: {original_vocab_size:,}") | |
| model = get_peft_model(model, lora_config) | |
| model.print_trainable_parameters() | |
| return model, lora_config | |
| def setup_embedding_lora_trainable_indices(model, tokenizer): | |
| """Setup LoRA with embedding space adapter using trainable_token_indices.""" | |
| print("\n" + "="*60) | |
| print("Setting up LoRA with EMBEDDING SPACE ADAPTER (trainable_token_indices)") | |
| print("="*60) | |
| # Get the original vocabulary size to identify new tokens | |
| vocab_size = len(tokenizer) | |
| # Option 2: Use trainable_token_indices for specific tokens | |
| # Train only a subset of tokens (e.g., last 10% or specific range) | |
| # This is useful when you've added new tokens and only want to train those | |
| # For demonstration, we'll train the last 10% of tokens | |
| trainable_indices_start = int(vocab_size * 0.9) # Last 10% of tokens | |
| trainable_indices = list(range(trainable_indices_start, vocab_size-1)) | |
| print(f"\n Vocabulary size: {vocab_size:,}") | |
| print(f" Training tokens from index {trainable_indices_start} to {vocab_size-1}") | |
| print(f" Number of trainable token indices: {len(trainable_indices):,}") | |
| lora_config = LoraConfig( | |
| r=16, # LoRA attention dimension | |
| lora_alpha=32, # Alpha parameter for LoRA scaling | |
| lora_dropout=0.05, # Dropout probability for LoRA layers | |
| bias="none", # Bias type for LoRA | |
| task_type=TaskType.CAUSAL_LM, | |
| target_modules=["q_proj", "v_proj"], # Still target attention layers | |
| trainable_token_indices={"embed_tokens": trainable_indices}, # Train specific token indices | |
| ) | |
| print("\nLoRA Configuration:") | |
| print(f" r (rank): {lora_config.r}") | |
| print(f" lora_alpha: {lora_config.lora_alpha}") | |
| print(f" lora_dropout: {lora_config.lora_dropout}") | |
| print(f" target_modules: {lora_config.target_modules}") | |
| print(f" trainable_token_indices: {len(trainable_indices):,} tokens") | |
| print(f" (indices {trainable_indices_start} to {vocab_size-1})") | |
| model = get_peft_model(model, lora_config) | |
| model.print_trainable_parameters() | |
| # Store trainable_indices for later use in summary | |
| lora_config.trainable_indices_list = trainable_indices | |
| return model, lora_config | |
| def setup_new_tokens_lora(model, tokenizer): | |
| """Setup LoRA with new tokens added to tokenizer and model, train only new tokens.""" | |
| print("\n" + "="*60) | |
| print("Setting up LoRA with NEW TOKENS (train only new tokens)") | |
| print("="*60) | |
| # Store original vocabulary size | |
| original_vocab_size = len(tokenizer) | |
| print(f"\n Original vocabulary size: {original_vocab_size:,}") | |
| # Add new tokens to tokenizer | |
| # For demonstration, add some example tokens (e.g., special tokens for audio codec) | |
| new_tokens = [ | |
| "<audio_start>", "<audio_end>", "<layer_sep>", | |
| "<snac_pad>", "<snac_token_1>", "<snac_token_2>", | |
| "<snac_token_3>", "<snac_token_4>", "<snac_token_5>", | |
| "<snac_token_6>", "<snac_token_7>", "<snac_token_8>", | |
| "<snac_token_9>", "<snac_token_10>", "<snac_token_11>", | |
| "<snac_token_12>", "<snac_token_13>", "<snac_token_14>", | |
| "<snac_token_15>", "<snac_token_16>", "<snac_token_17>", | |
| "<snac_token_18>", "<snac_token_19>", "<snac_token_20>", | |
| ] | |
| # Add tokens to tokenizer | |
| num_added = tokenizer.add_tokens(new_tokens) | |
| print(f" Added {num_added} new tokens to tokenizer") | |
| print(f" New vocabulary size: {len(tokenizer):,}") | |
| # Resize model embeddings to accommodate new tokens | |
| model.resize_token_embeddings(len(tokenizer)) | |
| print(f" Resized model embeddings to {len(tokenizer):,} tokens") | |
| # Get the indices of the newly added tokens | |
| new_token_indices = list(range(original_vocab_size, len(tokenizer))) | |
| print(f" New token indices: {original_vocab_size} to {len(tokenizer)-1}") | |
| print(f" Number of new token indices: {len(new_token_indices):,}") | |
| # Setup LoRA with trainable_token_indices for only the new tokens | |
| lora_config = LoraConfig( | |
| r=16, # LoRA attention dimension | |
| lora_alpha=32, # Alpha parameter for LoRA scaling | |
| lora_dropout=0.05, # Dropout probability for LoRA layers | |
| bias="none", # Bias type for LoRA | |
| task_type=TaskType.CAUSAL_LM, | |
| target_modules=["q_proj", "v_proj"], # Still target attention layers | |
| trainable_token_indices={"embed_tokens": new_token_indices}, # Train only new tokens | |
| ) | |
| print("\nLoRA Configuration:") | |
| print(f" r (rank): {lora_config.r}") | |
| print(f" lora_alpha: {lora_config.lora_alpha}") | |
| print(f" lora_dropout: {lora_config.lora_dropout}") | |
| print(f" target_modules: {lora_config.target_modules}") | |
| print(f" trainable_token_indices: {len(new_token_indices):,} new tokens") | |
| print(f" (indices {original_vocab_size} to {len(tokenizer)-1})") | |
| model = get_peft_model(model, lora_config) | |
| model.print_trainable_parameters() | |
| # Store new_token_indices for later use in summary | |
| lora_config.new_token_indices_list = new_token_indices | |
| lora_config.original_vocab_size = original_vocab_size | |
| lora_config.num_new_tokens = num_added | |
| return model, lora_config | |
| def main(): | |
| """Main function to test model sizes.""" | |
| print("="*60) | |
| print("MODEL SIZE TESTING SCRIPT") | |
| print("="*60) | |
| # Configuration | |
| # Try different gemma-3 variants, fallback to gemma-2 | |
| model_candidates = [ | |
| "google/gemma-3-270m", | |
| # "google/gemma-3-2b", | |
| # "google/gemma-2-2b", | |
| # "google/gemma-2-9b", | |
| ] | |
| base_output_dir = "./test_model_sizes" | |
| # Try to load gemma-3 (or fallback to gemma-2) | |
| model_name_used = None | |
| model = None | |
| tokenizer = None | |
| for candidate in model_candidates: | |
| try: | |
| print(f"\n🔍 Attempting to load: {candidate}") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| candidate, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| device_map="cpu" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| candidate, | |
| trust_remote_code=True | |
| ) | |
| model_name_used = candidate | |
| print(f"✅ Successfully loaded: {candidate}") | |
| break | |
| except Exception as e: | |
| print(f"⚠️ Could not load {candidate}: {e}") | |
| continue | |
| if model is None or tokenizer is None: | |
| raise RuntimeError(f"Failed to load any model from candidates: {model_candidates}") | |
| # Add padding token if not present | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.pad_token_id = tokenizer.eos_token_id | |
| print(f"\n✅ Loaded model: {model_name_used}") | |
| print(f" Vocabulary size: {len(tokenizer):,}") | |
| # Clean and remove existing output directory if it exists | |
| if os.path.exists(base_output_dir): | |
| print(f"\n🧹 Cleaning existing output directory: {base_output_dir}") | |
| shutil.rmtree(base_output_dir) | |
| print("✅ Removed existing directory") | |
| # Create output directory | |
| os.makedirs(base_output_dir, exist_ok=True) | |
| # Test 1: Normal LoRA | |
| print("\n" + "="*60) | |
| print("TEST 1: NORMAL LoRA (Attention Layers Only)") | |
| print("="*60) | |
| # Load fresh model for test 1 | |
| model1 = AutoModelForCausalLM.from_pretrained( | |
| model_name_used, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| device_map="cpu" | |
| ) | |
| tokenizer1 = AutoTokenizer.from_pretrained( | |
| model_name_used, | |
| trust_remote_code=True | |
| ) | |
| if tokenizer1.pad_token is None: | |
| tokenizer1.pad_token = tokenizer1.eos_token | |
| tokenizer1.pad_token_id = tokenizer1.eos_token_id | |
| model1, lora_config1 = setup_normal_lora(model1, tokenizer1) | |
| save_path1 = os.path.join(base_output_dir, "normal_lora") | |
| print(f"\n💾 Saving normal LoRA model to: {save_path1}") | |
| model1.save_pretrained(save_path1) | |
| tokenizer1.save_pretrained(save_path1) | |
| print_model_info(model1, tokenizer1, save_path1) | |
| # Test 2: LoRA with Embedding Adapter | |
| print("\n" + "="*60) | |
| print("TEST 2: LoRA with EMBEDDING SPACE ADAPTER") | |
| print("="*60) | |
| # Load fresh model for test 2 | |
| model2 = AutoModelForCausalLM.from_pretrained( | |
| model_name_used, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| device_map="cpu" | |
| ) | |
| tokenizer2 = AutoTokenizer.from_pretrained( | |
| model_name_used, | |
| trust_remote_code=True | |
| ) | |
| if tokenizer2.pad_token is None: | |
| tokenizer2.pad_token = tokenizer2.eos_token | |
| tokenizer2.pad_token_id = tokenizer2.eos_token_id | |
| model2, lora_config2 = setup_embedding_lora(model2, tokenizer2) | |
| save_path2 = os.path.join(base_output_dir, "embedding_lora") | |
| print(f"\n💾 Saving embedding LoRA model to: {save_path2}") | |
| model2.save_pretrained(save_path2) | |
| tokenizer2.save_pretrained(save_path2) | |
| print_model_info(model2, tokenizer2, save_path2) | |
| # Test 3: LoRA with Trainable Token Indices | |
| print("\n" + "="*60) | |
| print("TEST 3: LoRA with TRAINABLE TOKEN INDICES") | |
| print("="*60) | |
| # Load fresh model for test 3 | |
| model3 = AutoModelForCausalLM.from_pretrained( | |
| model_name_used, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| device_map="cpu" | |
| ) | |
| tokenizer3 = AutoTokenizer.from_pretrained( | |
| model_name_used, | |
| trust_remote_code=True | |
| ) | |
| if tokenizer3.pad_token is None: | |
| tokenizer3.pad_token = tokenizer3.eos_token | |
| tokenizer3.pad_token_id = tokenizer3.eos_token_id | |
| model3, lora_config3 = setup_embedding_lora_trainable_indices(model3, tokenizer3) | |
| save_path3 = os.path.join(base_output_dir, "trainable_indices_lora") | |
| print(f"\n💾 Saving trainable indices LoRA model to: {save_path3}") | |
| model3.save_pretrained(save_path3) | |
| tokenizer3.save_pretrained(save_path3) | |
| print_model_info(model3, tokenizer3, save_path3) | |
| # Test 4: LoRA with New Tokens Added | |
| print("\n" + "="*60) | |
| print("TEST 4: LoRA with NEW TOKENS ADDED") | |
| print("="*60) | |
| # Load fresh model for test 4 | |
| model4 = AutoModelForCausalLM.from_pretrained( | |
| model_name_used, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| device_map="cpu" | |
| ) | |
| tokenizer4 = AutoTokenizer.from_pretrained( | |
| model_name_used, | |
| trust_remote_code=True | |
| ) | |
| if tokenizer4.pad_token is None: | |
| tokenizer4.pad_token = tokenizer4.eos_token | |
| tokenizer4.pad_token_id = tokenizer4.eos_token_id | |
| model4, lora_config4 = setup_new_tokens_lora(model4, tokenizer4) | |
| save_path4 = os.path.join(base_output_dir, "new_tokens_lora") | |
| print(f"\n💾 Saving new tokens LoRA model to: {save_path4}") | |
| model4.save_pretrained(save_path4) | |
| tokenizer4.save_pretrained(save_path4) | |
| print_model_info(model4, tokenizer4, save_path4) | |
| # Comparison | |
| print("\n" + "="*60) | |
| print("SIZE COMPARISON") | |
| print("="*60) | |
| size1 = get_directory_size(save_path1) | |
| size2 = get_directory_size(save_path2) | |
| size3 = get_directory_size(save_path3) | |
| size4 = get_directory_size(save_path4) | |
| print("\n📊 Model Size Comparison:") | |
| print(f" Normal LoRA: {format_size(size1)}") | |
| print(f" Embedding LoRA (modules_to_save): {format_size(size2)}") | |
| print(f" Embedding LoRA (trainable_indices): {format_size(size3)}") | |
| print(f" New Tokens LoRA: {format_size(size4)}") | |
| sizes = [ | |
| ("Normal LoRA", size1), | |
| ("Embedding LoRA (modules_to_save)", size2), | |
| ("Embedding LoRA (trainable_indices)", size3), | |
| ("New Tokens LoRA", size4) | |
| ] | |
| sizes_sorted = sorted(sizes, key=lambda x: x[1]) | |
| print("\n📈 Size Ranking (smallest to largest):") | |
| for i, (name, size) in enumerate(sizes_sorted, 1): | |
| print(f" {i}. {name}: {format_size(size)}") | |
| if size2 > size1: | |
| print(f"\n Embedding LoRA (modules_to_save) is {size2/size1:.2f}x larger than Normal LoRA") | |
| if size3 > size1: | |
| print(f" Embedding LoRA (trainable_indices) is {size3/size1:.2f}x larger than Normal LoRA") | |
| if size4 > size1: | |
| print(f" New Tokens LoRA is {size4/size1:.2f}x larger than Normal LoRA") | |
| if size3 > size2: | |
| print(f" Embedding LoRA (trainable_indices) is {size3/size2:.2f}x larger than modules_to_save") | |
| elif size2 > size3: | |
| print(f" Embedding LoRA (modules_to_save) is {size2/size3:.2f}x larger than trainable_indices") | |
| if size4 > size3: | |
| print(f" New Tokens LoRA is {size4/size3:.2f}x larger than trainable_indices") | |
| elif size3 > size4: | |
| print(f" Embedding LoRA (trainable_indices) is {size3/size4:.2f}x larger than New Tokens LoRA") | |
| print("\n" + "="*60) | |
| print("TESTING COMPLETE") | |
| print("="*60) | |
| if __name__ == "__main__": | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment