Last active
July 3, 2024 22:54
-
-
Save thekaranacharya/f6bcb32ac983870999b7ca75056fe4af to your computer and use it in GitHub Desktop.
Implementation: Using simple fine-tuning (freezing all layers except the last few Linear layers) for LLMs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Imports | |
from transformers import AutoModelForSequenceClassification | |
################### | |
model_uri = "distilbert/distilbert-base-uncased" | |
num_classes = 2 | |
# Initialise the model | |
model = AutoModelForSequenceClassification.from_pretrained( | |
model_uri, num_labels=num_classes | |
) | |
# Freeze all the layers | |
for param in model.parameters(): | |
param.requires_grad = False | |
# Unfreeze pre-classifier (penultimate layer) | |
for param in model.pre_classifier.parameters(): | |
param.requires_grad = True | |
# Unfreeze classifier (final layer) | |
for param in model.classifier.parameters(): | |
param.requires_grad = True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment