Transfer Learning
Fine-tuning with Hugging Face
from transformers import (
AutoModelForSequenceClassification,
AutoTokenizer,
TrainingArguments,
Trainer
)
from datasets import Dataset
import torch
class TransferLearningPipeline:
def __init__(self, model_name: str, num_labels: int):
self.model = AutoModelForSequenceClassification.from_pretrained(
model_name, num_labels=num_labels
)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
def prepare_dataset(self, texts, labels, max_length=128):
encodings = self.tokenizer(
texts, truncation=True, padding=True, max_length=max_length
)
dataset = Dataset.from_dict({
"input_ids": encodings["input_ids"],
"attention_mask": encodings["attention_mask"],
"labels": labels
})
return dataset
def train(self, train_dataset, val_dataset, output_dir="./model"):
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=3,
per_device_train_batch_size=16,
per_device_eval_batch_size=64,
warmup_steps=500,
weight_decay=0.01,
logging_dir="./logs",
evaluation_strategy="epoch"
)
trainer = Trainer(
model=self.model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset
)
trainer.train()
return trainer
pipeline = TransferLearningPipeline("bert-base-uncased", num_labels=2)
train_ds = pipeline.prepare_dataset(train_texts, train_labels)
val_ds = pipeline.prepare_dataset(val_texts, val_labels)
trainer = pipeline.train(train_ds, val_ds)
LoRA Fine-tuning
import torch
import torch.nn as nn
from typing import Optional
class LoRALayer(nn.Module):
def __init__(self, in_features, out_features, rank=8, alpha=16):
super().__init__()
self.A = nn.Linear(in_features, rank, bias=False)
self.B = nn.Linear(rank, out_features, bias=False)
self.scaling = alpha / rank
nn.init.kaiming_uniform_(self.A.weight)
nn.init.zeros_(self.B.weight)
def forward(self, x):
return self.B(self.A(x)) * self.scaling
class LoRALinear(nn.Module):
def __init__(self, original_layer, rank=8, alpha=16):
super().__init__()
self.original = original_layer
self.lora = LoRALayer(
original_layer.in_features,
original_layer.out_features,
rank, alpha
)
for param in self.original.parameters():
param.requires_grad = False
def forward(self, x):
return self.original(x) + self.lora(x)
def apply_lora(model, target_modules=["q_proj", "v_proj"], rank=8):
for name, module in model.named_modules():
if any(target in name for target in target_modules):
if isinstance(module, nn.Linear):
parent_name = ".".join(name.split(".")[:-1])
child_name = name.split(".")[-1]
parent = dict(model.named_modules())[parent_name]
setattr(parent, child_name, LoRALinear(module, rank))
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable_params:,} / {total_params:,} ({100*trainable_params/total_params:.2f}%)")
return model
model = apply_lora(base_model, rank=16)
Domain Adaptation
class DomainAdapter:
def __init__(self, source_model, feature_dim):
self.source_model = source_model
self.domain_classifier = nn.Sequential(
nn.Linear(feature_dim, 128),
nn.ReLU(),
nn.Linear(128, 2)
)
self.gradient_reversal_scale = 1.0
def adapt(self, source_data, target_data, epochs=10):
optimizer = torch.optim.Adam(
list(self.source_model.parameters()) +
list(self.domain_classifier.parameters()),
lr=0.001
)
for epoch in range(epochs):
source_features = self.source_model.get_features(source_data)
target_features = self.source_model.get_features(target_data)
source_domain_pred = self.domain_classifier(source_features)
target_domain_pred = self.domain_classifier(target_features)
domain_loss = (
nn.CrossEntropyLoss()(source_domain_pred, torch.zeros(len(source_data))) +
nn.CrossEntropyLoss()(target_domain_pred, torch.ones(len(target_data)))
) / 2
optimizer.zero_grad()
domain_loss.backward()
optimizer.step()
adapter = DomainAdapter(model, feature_dim=768)
adapter.adapt(source_train_data, target_train_data)
Few-shot Learning
class FewShotLearner:
def __init__(self, base_model, tokenizer):
self.model = base_model
self.tokenizer = tokenizer
def create_prompt(self, support_set, query, task_description="Classify the text"):
prompt = f"{task_description}\n\n"
for example in support_set:
prompt += f"Text: {example['text']}\nLabel: {example['label']}\n\n"
prompt += f"Text: {query}\nLabel:"
return prompt
def predict(self, support_set, query):
prompt = self.create_prompt(support_set, query)
inputs = self.tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = self.model.generate(**inputs, max_new_tokens=10)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("Label:")[-1].strip()
learner = FewShotLearner(model, tokenizer)
support = [
{"text": "Great product!", "label": "positive"},
{"text": "Terrible service", "label": "negative"}
]
prediction = learner.predict(support, "Average experience")
Best Practices
- Start with pre-trained models from similar domains
- Use parameter-efficient methods for large models
- Monitor for catastrophic forgetting
- Validate on domain-specific test sets
- Use learning rate warmup and scheduling
- Implement early stopping based on validation metrics