Last active
June 26, 2023 11:00
-
-
Save alexcpn/7b72b35534712b7d06f7516a34401eb0 to your computer and use it in GitHub Desktop.
How to freeze and train Huggingface models
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = AutoModelForSeq2SeqLM.from_pretrained(model_name,device_map="auto", torch_dtype=torch.float16) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
#freeze decoder block | |
num_encoder_layers = len(model.encoder.block) | |
num_decoder_layers = len(model.decoder.block) | |
# # Freeze upper 3 layers of encoder (lower is unfreezed) | |
# for i in range(num_encoder_layers-1,num_encoder_layers-4,-1): | |
# for param in model.encoder.block[i].parameters(): | |
# param.requires_grad = False | |
# Freeze all layers of decoder | |
# for i in range(num_decoder_layers): | |
# for param in model.decoder.block[i].parameters(): | |
# param.requires_grad = False | |
# OR | |
# freeze everything | |
for param in model.parameters(): | |
param.requires_grad = False | |
# # Un-Freeze lower 4 layers of encoder | |
# for i in range(0,4,1): | |
# for param in model.encoder.block[i].parameters(): | |
# param.requires_grad = True | |
# Un-Freeze higher 1 layers of encoder | |
for i in range(num_decoder_layers-1,num_decoder_layers-2,-1): | |
for param in model.encoder.block[i].parameters(): | |
param.requires_grad = True | |
for name, param in model.named_parameters(): | |
print(name,param.requires_grad) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment