# Import semua library yang dibutuhkan
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
# Hugging Face
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
TrainingArguments,
Trainer,
get_linear_schedule_with_warmup
)
from datasets import load_dataset
import evaluate
# PEFT - Parameter Efficient Fine-Tuning
from peft import LoraConfig, get_peft_model, TaskType
# Utilities
import warnings
warnings.filterwarnings('ignore')
print("=" * 70)
print("ENVIRONMENT VERIFICATION - LoRA Fine-tuning Lab")
print("=" * 70)
# System info
print(f"\nPython Version: {sys.version.split()[0]}")
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"CUDA Memory (GB): {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f}")
import transformers
print(f"Transformers Version: {transformers.__version__}")
try:
import peft
print(f"PEFT Version: {peft.__version__}")
except:
print("WARNING: PEFT not installed. Run: pip install peft")
print("\n" + "=" * 70)
print("✓ All libraries imported successfully!")
print("=" * 70)
# Set random seeds
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nUsing device: {device}")
print("=" * 70)Lab 09: LLM Fine-tuning dengan LoRA (Low-Rank Adaptation)
Efisiensi Parameter dalam Fine-tuning Model Bahasa Besar
23 Informasi Praktikum
Durasi: 2-3 jam Tingkat Kesulitan: Menengah-Lanjut Prerequisites: PyTorch, Transformers library, pemahaman NLP dasar Tools: Python 3.8+, Jupyter Notebook, GPU (recommended) Hardware: Minimum 4GB VRAM, recommended 8GB+
23.1 Tujuan Pembelajaran
Setelah menyelesaikan praktikum ini, mahasiswa diharapkan dapat:
- Memahami konsep LoRA (Low-Rank Adaptation) dalam fine-tuning LLM
- Menggunakan library PEFT (Parameter-Efficient Fine-Tuning) dari Hugging Face
- Fine-tuning DistilBERT untuk sentiment analysis dengan LoRA
- Membandingkan efisiensi parameter antara full fine-tuning vs LoRA
- Mengoptimalkan resource usage dalam training LLM
- Melakukan inferensi dengan model yang di-fine-tune
- Menganalisis trade-off antara akurasi dan efisiensi
23.2 Pemetaan ke CPMK
- CPMK-4: Memahami transfer learning dan fine-tuning
- CPMK-5: Mengimplementasikan NLP tasks dengan transformer models
- CPMK-7: Optimasi model neural networks untuk production
24 Prerequisites Checklist
Sebelum memulai praktikum, pastikan Anda telah:
25 Background: LoRA dan Parameter-Efficient Fine-tuning
25.1 Masalah dengan Fine-tuning Tradisional
Fine-tuning penuh (full fine-tuning) memiliki beberapa keterbatasan:
- Memory Intensive: Menyimpan gradient untuk semua parameter menggunakan RAM yang sangat besar
- Storage: Model yang di-fine-tune memiliki ukuran sama dengan model original
- Computational Cost: Membutuhkan waktu training yang lama
- Scalability: Sulit untuk fine-tune model besar pada hardware terbatas
Contoh: BERT-base memiliki 110 juta parameter. Fine-tuning penuh membutuhkan:
- Menyimpan gradients: 110M × 4 bytes = 440 MB per batch
- Optimizer states (Adam): 110M × 8 bytes = 880 MB
- Total aktivasi: bisa melebihi 2GB untuk batch size 32
25.2 Apa itu LoRA (Low-Rank Adaptation)?
LoRA adalah teknik parameter-efficient fine-tuning yang menggunakan low-rank matrices untuk memodifikasi model pre-trained dengan hanya menambahkan parameter yang sangat sedikit.
25.2.1 Konsep Dasar LoRA
Bukannya memodifikasi weight matrix W secara penuh:
W_new = W_original + ΔW
LoRA menggunakan dekomposisi low-rank:
W_new = W_original + B × A
Di mana:
- W_original: Original weight matrix (frozen, tidak di-update)
- B × A: Low-rank decomposition (trainable)
- A: Matrix berukuran (input_dim, r) dengan rank r
- B: Matrix berukuran (r, output_dim) dengan rank r
- r: Rank (typically 8-64, jauh lebih kecil dari dimensi)
25.2.2 Keuntungan LoRA
| Aspek | Full Fine-tuning | LoRA |
|---|---|---|
| Trainable Parameters | 110M (100%) | ~1-5M (1-5%) |
| Memory Usage | 440MB+ | ~50-100MB |
| Inference Latency | Minimal | Negligible |
| Model Storage | Full size | Small adapters |
| Training Speed | Baseline | 1.5-2x faster |
| Performance | Excellent | Comparable (>99%) |
25.3 DistilBERT: Model Efficient untuk Lab
DistilBERT adalah versi “distilled” dari BERT:
- 40% lebih kecil dari BERT-base
- 60% lebih cepat untuk inferensi
- Retains 97% dari BERT’s language understanding
- Ideal untuk resource-constrained environments
Arsitektur:
- 6 transformer layers (vs 12 pada BERT)
- 768 hidden units
- 12 attention heads
- ~66 juta parameters
26 Langkah-Langkah Praktikum
26.1 Step 1: Verifikasi dan Setup Environment
Jika ada library yang belum terinstal, jalankan:
# Core dependencies
pip install torch transformers datasets evaluate
# PEFT library (critical)
pip install peft
# Additional
pip install scikit-learn matplotlib seaborn pandas numpyUntuk environment baru:
# Create environment
python -m venv lora_env
source lora_env/bin/activate # atau: lora_env\Scripts\activate (Windows)
# Install all at once
pip install torch transformers datasets evaluate peft scikit-learn matplotlib seaborn pandas26.2 Step 2: Load dan Eksplorasi Dataset
Kita akan menggunakan SST-2 (Stanford Sentiment TreeBank) dataset untuk sentiment analysis.
# Load SST-2 dataset dari Hugging Face
print("=" * 70)
print("LOADING DATASET")
print("=" * 70)
# Load full dataset
dataset = load_dataset("glue", "sst2")
print(f"\nDataset splits: {dataset.keys()}")
print(f"Train samples: {len(dataset['train'])}")
print(f"Validation samples: {len(dataset['validation'])}")
# Examine dataset structure
print("\n" + "=" * 70)
print("DATASET STRUCTURE")
print("=" * 70)
print("\nFirst 3 training examples:")
for i in range(3):
example = dataset['train'][i]
label_text = "Positive" if example['label'] == 1 else "Negative"
print(f"\nExample {i+1}:")
print(f" Text: {example['sentence'][:80]}...")
print(f" Label: {label_text} ({example['label']})")
# Class distribution
print("\n" + "=" * 70)
print("CLASS DISTRIBUTION")
print("=" * 70)
train_labels = dataset['train']['label']
label_counts = pd.Series(train_labels).value_counts().sort_index()
print(f"\nTraining set:")
print(f" Negative (0): {label_counts[0]} ({label_counts[0]/len(train_labels)*100:.1f}%)")
print(f" Positive (1): {label_counts[1]} ({label_counts[1]/len(train_labels)*100:.1f}%)")
# For faster training in lab, use subset
print("\n" + "=" * 70)
print("CREATING SUBSET FOR LAB")
print("=" * 70)
# Use smaller subset for faster training
TRAIN_SIZE = 1000 # ~2% of original
VAL_SIZE = 200
TEST_SIZE = 300
train_dataset = dataset['train'].select(range(TRAIN_SIZE))
val_dataset = dataset['validation'].select(range(VAL_SIZE))
print(f"\nUsing subset:")
print(f" Training: {len(train_dataset)} samples")
print(f" Validation: {len(val_dataset)} samples")
print(f" (Full dataset has {len(dataset['train'])} training samples)")
print("\n(Subset untuk demonstrasi. Untuk production, gunakan full dataset)")26.3 Step 3: Tokenization dengan DistilBERT
# Load DistilBERT tokenizer
print("=" * 70)
print("TOKENIZATION WITH DISTILBERT")
print("=" * 70)
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
print(f"\nModel: {model_name}")
print(f"Vocabulary size: {tokenizer.vocab_size}")
print(f"Max sequence length: {tokenizer.model_max_length}")
# Tokenize function
def tokenize_function(examples):
return tokenizer(
examples['sentence'],
padding='max_length',
truncation=True,
max_length=128
)
# Apply tokenization
print("\nTokenizing dataset...")
train_dataset_tokenized = train_dataset.map(
tokenize_function,
batched=True,
remove_columns=['sentence', 'idx']
)
val_dataset_tokenized = val_dataset.map(
tokenize_function,
batched=True,
remove_columns=['sentence', 'idx']
)
# Rename label column for Hugging Face Trainer
train_dataset_tokenized = train_dataset_tokenized.rename_column('label', 'labels')
val_dataset_tokenized = val_dataset_tokenized.rename_column('label', 'labels')
# Set format for PyTorch
train_dataset_tokenized.set_format("torch")
val_dataset_tokenized.set_format("torch")
print(f"✓ Tokenization complete!")
# Show example
print("\n" + "=" * 70)
print("TOKENIZATION EXAMPLE")
print("=" * 70)
example = train_dataset[0]
print(f"\nOriginal text:")
print(f" {example['sentence']}")
print(f" Label: {'Positive' if example['label'] == 1 else 'Negative'}")
tokens = tokenizer(example['sentence'], max_length=128, truncation=True, padding='max_length')
print(f"\nTokenized:")
print(f" Tokens: {tokens['input_ids'][:20]}...")
print(f" Token count: {len(tokens['input_ids'])}")26.4 Step 4: Setup Model - LoRA Configuration
Sekarang kita setup LoRA configuration dan apply ke DistilBERT.
# Load base model
print("=" * 70)
print("MODEL SETUP - LORA CONFIGURATION")
print("=" * 70)
# Load DistilBERT model
model = AutoModelForSequenceClassification.from_pretrained(
model_name,
num_labels=2 # Binary classification
)
print(f"\nBase model loaded: {model_name}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
# LoRA Configuration
lora_config = LoraConfig(
task_type=TaskType.SEQ_2_SEQ_LM, # Sequence classification
r=16, # Rank dimension
lora_alpha=32, # Scaling factor
lora_dropout=0.05, # Dropout for stability
bias="none", # Don't adapt biases
target_modules=["q_proj", "v_proj"], # Which modules to apply LoRA
inference_mode=False
)
print(f"\nLoRA Configuration:")
print(f" Rank (r): {lora_config.r}")
print(f" Alpha: {lora_config.lora_alpha}")
print(f" Target modules: {lora_config.target_modules}")
print(f" Dropout: {lora_config.lora_dropout}")
# Apply LoRA to model
model = get_peft_model(model, lora_config)
# Print trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"\nModel after LoRA:")
print(f" Total parameters: {total_params:,}")
print(f" Trainable parameters: {trainable_params:,}")
print(f" Trainable ratio: {trainable_params/total_params*100:.2f}%")
print(f" Frozen parameters: {total_params - trainable_params:,}")
model.print_trainable_parameters()
# Move model to device
model = model.to(device)
print(f"\n✓ Model ready on device: {device}")26.5 Step 5: Setup Trainer dan Training Arguments
# Define training arguments
print("=" * 70)
print("TRAINING SETUP")
print("=" * 70)
training_args = TrainingArguments(
output_dir="./lora-distilbert",
learning_rate=5e-4,
per_device_train_batch_size=32,
per_device_eval_batch_size=64,
num_train_epochs=3,
weight_decay=0.01,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
logging_steps=50,
push_to_hub=False,
seed=42,
)
print("\nTraining Configuration:")
print(f" Learning rate: {training_args.learning_rate}")
print(f" Batch size: {training_args.per_device_train_batch_size}")
print(f" Epochs: {training_args.num_train_epochs}")
print(f" Weight decay: {training_args.weight_decay}")
print(f" Output directory: {training_args.output_dir}")
# Metrics
metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return metric.compute(predictions=predictions, references=labels)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset_tokenized,
eval_dataset=val_dataset_tokenized,
compute_metrics=compute_metrics,
)
print("\n✓ Trainer configured!")26.6 Step 6: Training Model dengan LoRA
# Train the model
print("=" * 70)
print("TRAINING MODEL WITH LORA")
print("=" * 70)
print(f"\nStart time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# Clear cache
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Train
train_result = trainer.train()
print(f"\n✓ Training complete!")
print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# Training summary
print("\n" + "=" * 70)
print("TRAINING SUMMARY")
print("=" * 70)
print(f"Training loss: {train_result.training_loss:.4f}")
print(f"Training samples/sec: {train_result.metrics.get('train_samples_per_second', 'N/A')}")26.7 Step 7: Evaluasi Model
# Evaluate
print("=" * 70)
print("MODEL EVALUATION")
print("=" * 70)
eval_results = trainer.evaluate()
print("\nValidation Results:")
for key, value in eval_results.items():
if key != 'epoch':
print(f" {key}: {value:.4f}")
# Detailed evaluation
predictions = trainer.predict(val_dataset_tokenized)
pred_labels = np.argmax(predictions.predictions, axis=1)
true_labels = val_dataset_tokenized['labels']
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
confusion_matrix, classification_report
)
print("\n" + "=" * 70)
print("DETAILED METRICS")
print("=" * 70)
accuracy = accuracy_score(true_labels, pred_labels)
precision = precision_score(true_labels, pred_labels, zero_division=0)
recall = recall_score(true_labels, pred_labels, zero_division=0)
f1 = f1_score(true_labels, pred_labels, zero_division=0)
print(f"\nAccuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("\n" + "=" * 70)
print("CLASSIFICATION REPORT")
print("=" * 70)
print(classification_report(true_labels, pred_labels,
target_names=['Negative', 'Positive']))
# Confusion matrix
cm = confusion_matrix(true_labels, pred_labels)
print("\nConfusion Matrix:")
print(f" TN: {cm[0,0]}, FP: {cm[0,1]}")
print(f" FN: {cm[1,0]}, TP: {cm[1,1]}")26.8 Step 8: Visualisasi Training Progress
# Extract training history
if hasattr(trainer.state, 'log_history'):
history = trainer.state.log_history
# Separate training and eval logs
train_logs = [h for h in history if 'loss' in h]
eval_logs = [h for h in history if 'eval_loss' in h]
if train_logs and eval_logs:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Loss plot
train_epochs = [h['epoch'] for h in train_logs]
train_losses = [h['loss'] for h in train_logs]
eval_epochs = [h['epoch'] for h in eval_logs]
eval_losses = [h['eval_loss'] for h in eval_logs]
axes[0].plot(train_epochs, train_losses, 'o-', label='Training Loss', linewidth=2)
axes[0].plot(eval_epochs, eval_losses, 's-', label='Validation Loss', linewidth=2)
axes[0].set_xlabel('Epoch', fontsize=11)
axes[0].set_ylabel('Loss', fontsize=11)
axes[0].set_title('Training and Validation Loss', fontweight='bold', fontsize=12)
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Accuracy plot
if any('eval_accuracy' in h for h in eval_logs):
eval_accuracies = [h.get('eval_accuracy', 0) for h in eval_logs]
axes[1].plot(eval_epochs, eval_accuracies, 's-', color='green', linewidth=2)
axes[1].set_xlabel('Epoch', fontsize=11)
axes[1].set_ylabel('Accuracy', fontsize=11)
axes[1].set_title('Validation Accuracy', fontweight='bold', fontsize=12)
axes[1].grid(True, alpha=0.3)
axes[1].set_ylim([0.5, 1.0])
plt.tight_layout()
plt.show()26.9 Step 9: Inference dengan Model LoRA
# Inference
print("=" * 70)
print("INFERENCE WITH LORA MODEL")
print("=" * 70)
# Test sentences
test_sentences = [
"This movie is absolutely fantastic! Highly recommended.",
"Worst film I've ever seen. Completely boring and waste of time.",
"It was okay, nothing special but watchable.",
"Amazing performances by the actors. A masterpiece!",
"Terrible acting and weak storyline. Very disappointed."
]
print("\nTesting model on new sentences:")
print("-" * 70)
model.eval()
with torch.no_grad():
for sentence in test_sentences:
# Tokenize
inputs = tokenizer(
sentence,
padding='max_length',
truncation=True,
max_length=128,
return_tensors='pt'
)
# Move to device
inputs = {k: v.to(device) for k, v in inputs.items()}
# Predict
outputs = model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1)
predicted_label = torch.argmax(probabilities, dim=-1).item()
confidence = probabilities[0, predicted_label].item()
label_text = "POSITIVE" if predicted_label == 1 else "NEGATIVE"
print(f"\nSentence: {sentence[:60]}...")
print(f"Prediction: {label_text} (confidence: {confidence:.2%})")26.10 Step 10: Parameter Efficiency Comparison
Mari kita bandingkan LoRA dengan full fine-tuning dari segi parameter dan memory.
print("=" * 70)
print("PARAMETER EFFICIENCY COMPARISON")
print("=" * 70)
# Current LoRA model stats
lora_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
lora_total = sum(p.numel() for p in model.parameters())
# Estimate full fine-tuning
# Assuming all parameters are trainable in full fine-tuning
full_trainable = sum(p.numel() for p in model.parameters())
# Calculate memory usage (rough estimates)
# Memory = (trainable_params * 4 bytes for FP32) + (optimizer states * 2)
bytes_per_param_lora = 4 + 8 # weights + optimizer states (Adam)
bytes_per_param_full = 4 + 8
lora_memory = lora_trainable * bytes_per_param_lora / (1024**3) # GB
full_memory = full_trainable * bytes_per_param_full / (1024**3)
print(f"\nLoRA Configuration (Rank=16):")
print(f" Trainable parameters: {lora_trainable:,}")
print(f" Total parameters: {lora_total:,}")
print(f" Trainable ratio: {lora_trainable/lora_total*100:.2f}%")
print(f" Estimated memory (gradients + optimizer): ~{lora_memory:.3f} GB")
print(f"\nFull Fine-tuning:")
print(f" Trainable parameters: {full_trainable:,}")
print(f" Total parameters: {full_trainable:,}")
print(f" Trainable ratio: 100%")
print(f" Estimated memory (gradients + optimizer): ~{full_memory:.3f} GB")
print(f"\nEfficiency Gains with LoRA:")
print(f" Parameter reduction: {(1 - lora_trainable/full_trainable)*100:.1f}%")
print(f" Memory reduction: {(1 - lora_memory/full_memory)*100:.1f}%")
print(f" Memory savings: {full_memory - lora_memory:.3f} GB")
# Visualization
fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# 1. Parameter comparison
methods = ['LoRA\n(r=16)', 'Full Fine-tune']
trainable_params = [lora_trainable/1e6, full_trainable/1e6]
colors = ['#2ecc71', '#e74c3c']
axes[0].bar(methods, trainable_params, color=colors, alpha=0.8, edgecolor='black', linewidth=2)
axes[0].set_ylabel('Trainable Parameters (Millions)', fontsize=11)
axes[0].set_title('Trainable Parameters Comparison', fontweight='bold', fontsize=12)
axes[0].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(trainable_params):
axes[0].text(i, v + 1, f'{v:.1f}M', ha='center', va='bottom', fontsize=10, fontweight='bold')
# 2. Memory comparison
memory_usage = [lora_memory, full_memory]
axes[1].bar(methods, memory_usage, color=colors, alpha=0.8, edgecolor='black', linewidth=2)
axes[1].set_ylabel('Memory Usage (GB)', fontsize=11)
axes[1].set_title('Memory Usage Comparison', fontweight='bold', fontsize=12)
axes[1].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(memory_usage):
axes[1].text(i, v + 0.02, f'{v:.3f} GB', ha='center', va='bottom', fontsize=10, fontweight='bold')
# 3. Efficiency gains
improvements = [
(1 - lora_trainable/full_trainable)*100,
(1 - lora_memory/full_memory)*100
]
improvement_labels = ['Parameter\nReduction', 'Memory\nReduction']
axes[2].bar(improvement_labels, improvements, color=['#3498db', '#9b59b6'], alpha=0.8, edgecolor='black', linewidth=2)
axes[2].set_ylabel('Efficiency Gain (%)', fontsize=11)
axes[2].set_title('LoRA Efficiency Improvements', fontweight='bold', fontsize=12)
axes[2].set_ylim([0, 100])
axes[2].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(improvements):
axes[2].text(i, v + 2, f'{v:.1f}%', ha='center', va='bottom', fontsize=10, fontweight='bold')
plt.tight_layout()
plt.show()26.11 Step 11: Model Adaptation Matrices Visualization
# Extract LoRA matrices
print("=" * 70)
print("LORA ADAPTATION MATRICES ANALYSIS")
print("=" * 70)
lora_params = []
lora_ranks = []
matrix_names = []
# Iterate through model to find LoRA layers
for name, module in model.named_modules():
if hasattr(module, 'lora_A') and hasattr(module, 'lora_B'):
if 'lora_A' in dir(module):
# Get weights
lora_a = module.lora_A.default.weight.data
lora_b = module.lora_B.default.weight.data
lora_params.append({
'name': name.split('.')[-1],
'lora_a_shape': lora_a.shape,
'lora_b_shape': lora_b.shape,
})
print(f"\nLoRA layers found: {len(lora_params)}")
for i, param in enumerate(lora_params[:5]): # Show first 5
print(f"\n{i+1}. {param['name']}")
print(f" LoRA_A shape: {param['lora_a_shape']}")
print(f" LoRA_B shape: {param['lora_b_shape']}")
# Visualization of LoRA concept
fig, axes = plt.subplots(1, 3, figsize=(14, 5))
# 1. LoRA concept diagram
axes[0].text(0.5, 0.9, 'LoRA Adaptation Mechanism', ha='center', fontsize=12, fontweight='bold')
axes[0].text(0.5, 0.75, 'Original Weight W:', ha='center', fontsize=10)
axes[0].add_patch(plt.Rectangle((0.2, 0.65), 0.6, 0.08, fill=True,
facecolor='lightblue', edgecolor='black', linewidth=2))
axes[0].text(0.5, 0.69, 'Frozen (no gradient)', ha='center', fontsize=9, style='italic')
axes[0].text(0.5, 0.55, '+ Low-rank Update:', ha='center', fontsize=10, fontweight='bold', color='green')
axes[0].text(0.35, 0.45, 'LoRA_B (r×d)', ha='center', fontsize=9)
axes[0].add_patch(plt.Rectangle((0.25, 0.38), 0.2, 0.08, fill=True,
facecolor='lightgreen', edgecolor='black', linewidth=2))
axes[0].text(0.42, 0.42, '@', ha='center', fontsize=14, fontweight='bold')
axes[0].text(0.65, 0.45, 'LoRA_A (d×r)', ha='center', fontsize=9)
axes[0].add_patch(plt.Rectangle((0.55, 0.38), 0.2, 0.08, fill=True,
facecolor='lightcoral', edgecolor='black', linewidth=2))
axes[0].text(0.5, 0.25, '= Trainable Update ΔW', ha='center', fontsize=10, fontweight='bold', color='red')
axes[0].set_xlim(0, 1)
axes[0].set_ylim(0, 1)
axes[0].axis('off')
# 2. Rank vs Trainable Parameters
ranks = [4, 8, 16, 32, 64]
trainable_for_ranks = [lora_total * r / 768 for r in ranks] # Approximate
axes[1].plot(ranks, trainable_for_ranks, 'o-', linewidth=2.5, markersize=8, color='#3498db')
axes[1].scatter([16], [lora_trainable/1e6], s=200, color='red', zorder=5, label='Current (r=16)')
axes[1].set_xlabel('LoRA Rank (r)', fontsize=11)
axes[1].set_ylabel('Trainable Parameters (Millions)', fontsize=11)
axes[1].set_title('Rank vs Trainable Parameters', fontweight='bold', fontsize=12)
axes[1].grid(True, alpha=0.3)
axes[1].legend()
# 3. Performance vs Efficiency
r_values = [0, 4, 8, 16, 32, 64] # Hypothetical
# Approximate performance curve (in practice, diminishing returns)
performance = [0.85, 0.92, 0.95, 0.968, 0.97, 0.971] # Approximate F1 scores
efficiency = [100, 50, 25, 12.5, 6.25, 3.125] # Trainable params as % of original
axes[2].plot(efficiency, performance, 's-', linewidth=2.5, markersize=8, color='#27ae60')
axes[2].scatter([12.5], [0.968], s=200, color='red', zorder=5, label='Current (r=16)')
axes[2].set_xlabel('Trainable Parameters (% of Full)', fontsize=11)
axes[2].set_ylabel('Model Performance (F1 Score)', fontsize=11)
axes[2].set_title('Performance vs Parameter Efficiency', fontweight='bold', fontsize=12)
axes[2].grid(True, alpha=0.3)
axes[2].set_xlim([0, 110])
axes[2].set_ylim([0.8, 1.0])
axes[2].legend()
plt.tight_layout()
plt.show()26.12 Step 12: Save dan Load Model
# Save model
print("=" * 70)
print("SAVING AND LOADING MODEL")
print("=" * 70)
# Save only LoRA weights (much smaller than full model)
save_dir = "./lora-distilbert-final"
os.makedirs(save_dir, exist_ok=True)
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"\nModel saved to: {save_dir}")
# Check file sizes
import os
model_size = sum(os.path.getsize(os.path.join(save_dir, f))
for f in os.listdir(save_dir)) / (1024**2)
print(f"Saved model size: {model_size:.2f} MB")
# Compare with full model size estimate
full_model_size = sum(p.numel() * 4 for p in model.parameters()) / (1024**2)
print(f"Full model would be: ~{full_model_size:.2f} MB")
print(f"Storage savings: {(1 - model_size/full_model_size)*100:.1f}%")
# Load model for inference
print("\n" + "=" * 70)
print("LOADING SAVED MODEL")
print("=" * 70)
from peft import AutoPeftModelForSequenceClassification
loaded_model = AutoPeftModelForSequenceClassification.from_pretrained(save_dir)
loaded_tokenizer = AutoTokenizer.from_pretrained(save_dir)
print(f"\n✓ Model loaded successfully!")
print(f"Total parameters: {sum(p.numel() for p in loaded_model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in loaded_model.parameters() if p.requires_grad):,}")27 Summary dan Key Takeaways
27.1 Apa yang Telah Kita Pelajari?
- Konsep LoRA: Teknik low-rank adaptation untuk parameter-efficient fine-tuning
- DistilBERT: Model transformer yang efisien untuk deployment
- PEFT Library: Menggunakan peft untuk implementasi LoRA
- Fine-tuning: Proses training model pre-trained untuk task spesifik
- Efisiensi: Membandingkan resource usage antara full ft-tuning vs LoRA
- Praktik terbaik: Setup training, evaluasi, dan inference
27.2 Hasil yang Diperoleh
- LoRA mengurangi trainable parameters sebesar ~95-99%
- Memory usage berkurang signifikan tanpa mengorbankan akurasi
- Training time lebih cepat dibanding full fine-tuning
- Model LoRA lebih mudah di-distribute dan deploy
27.3 Insight dari Eksperimen
- Trade-off: Rank parameter mempengaruhi kapasitas adaptasi vs efisiensi
- Performance: LoRA mencapai 99%+ performa dibanding full fine-tuning
- Scalability: Memungkinkan fine-tuning LLM besar pada hardware terbatas
- Production Ready: Solusi praktis untuk deployment model ML
27.4 Next Steps
Dalam praktikum berikutnya:
- Menggunakan LoRA dengan model yang lebih besar (BERT-large, RoBERTa)
- Multi-task learning dengan LoRA
- Combining multiple LoRA adapters
- Deployment di production environment
- Fine-tuning LLM generatif (GPT-2, Llama)
28 Troubleshooting Common Issues
28.1 Issue 1: CUDA Out of Memory
Problem: RuntimeError: CUDA out of memory
Solution:
# Reduce batch size
training_args.per_device_train_batch_size = 16
# Enable gradient accumulation
training_args.gradient_accumulation_steps = 2
# Use CPU offloading
training_args.use_cpu = True28.2 Issue 2: PEFT Module Not Found
Problem: ModuleNotFoundError: No module named 'peft'
Solution:
pip install peft --upgrade28.3 Issue 3: Model Convergence Issues
Problem: Loss tidak turun, model tidak belajar
Solution:
# Adjust learning rate
training_args.learning_rate = 1e-4 # Lower
# Add warmup
training_args.warmup_steps = 100
# Try different rank
lora_config.r = 32 # Increase rank28.4 Issue 4: Slow Training
Problem: Training terlalu lambat
Solution:
# Reduce dataset size
train_dataset = train_dataset.select(range(500))
# Increase batch size
training_args.per_device_train_batch_size = 64
# Use mixed precision
training_args.fp16 = True