graph TD A[Setup & Installation] --> B[Data Exploration] B --> C[Tokenization] C --> D[Model Loading] D --> E[Fine-tuning] E --> F[Evaluation] F --> G[Inference] G --> H[Optimization] style A fill:#e6f3ff style B fill:#ffe6e6 style C fill:#ffffcc style D fill:#ccffcc style E fill:#e6ccff style F fill:#ffcccc style G fill:#ccffe6 style H fill:#ffccff
graph TD
A[Setup & Installation] --> B[Data Exploration]
B --> C[Tokenization]
C --> D[Model Loading]
D --> E[Fine-tuning]
E --> F[Evaluation]
F --> G[Inference]
G --> H[Optimization]
style A fill:#e6f3ff
style B fill:#ffe6e6
style C fill:#ffffcc
style D fill:#ccffcc
style E fill:#e6ccff
style F fill:#ffcccc
style G fill:#ccffe6
style H fill:#ffccff
def setup_device():"""Setup computation device (GPU/CPU)"""if torch.cuda.is_available(): device = torch.device('cuda')print(f"✓ Using GPU: {torch.cuda.get_device_name(0)}")print(f" Memory: {torch.cuda.get_device_properties(0).total_memory /1e9:.2f} GB")else: device = torch.device('cpu')print("⚠ GPU not available. Using CPU")return devicedevice = setup_device()
19.3.5 Global Configuration
# HyperparametersCONFIG = {'model_name': 'bert-base-uncased','max_length': 256,'batch_size': 16,'learning_rate': 2e-5,'num_epochs': 3,'warmup_steps': 500,'weight_decay': 0.01,'seed': 42,'num_labels': 2,'train_subset': None, # None untuk full dataset, int untuk subset'eval_subset': None}print("Configuration:")for key, value in CONFIG.items():print(f" {key}: {value}")# Set random seedstorch.manual_seed(CONFIG['seed'])np.random.seed(CONFIG['seed'])
def show_sample_reviews(dataset, num_samples=3):"""Display sample reviews"""print("\n"+"="*80)print("SAMPLE REVIEWS")print("="*80)for sentiment in [0, 1]: sentiment_name ="NEGATIVE"if sentiment ==0else"POSITIVE"print(f"\n{sentiment_name} REVIEWS:")# Filter by sentiment samples = [s for s in dataset['train'] if s['label'] == sentiment][:num_samples]for i, sample inenumerate(samples, 1): text = sample['text']# Truncate for displayiflen(text) >300: text = text[:300] +"..."print(f"\n [{i}] {text}")print("="*80)show_sample_reviews(dataset, num_samples=2)
from transformers import DataCollatorWithPadding# Create data collatordata_collator = DataCollatorWithPadding(tokenizer=tokenizer)print("✓ Data collator created")
24 Part 5: Fine-tuning
24.1 Create Trainer
def create_trainer(model, training_args, train_dataset, eval_dataset, tokenizer):"""Create Hugging Face Trainer"""# Subset for faster training (if configured)if CONFIG['train_subset']: train_dataset = train_dataset.shuffle(seed=CONFIG['seed']).select(range(CONFIG['train_subset']))print(f"Using subset of training data: {len(train_dataset):,} samples")if CONFIG['eval_subset']: eval_dataset = eval_dataset.shuffle(seed=CONFIG['seed']).select(range(CONFIG['eval_subset']))print(f"Using subset of evaluation data: {len(eval_dataset):,} samples") trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, tokenizer=tokenizer, data_collator=data_collator, compute_metrics=compute_metrics )print("✓ Trainer created!")print(f" Training samples: {len(train_dataset):,}")print(f" Evaluation samples: {len(eval_dataset):,}")return trainertrainer = create_trainer( model, training_args, tokenized_dataset['train'], tokenized_dataset['test'], tokenizer)
24.2 Train Model
def train_model(trainer):"""Execute training"""print("\n"+"="*80)print("STARTING TRAINING")print("="*80)# Start training train_result = trainer.train()# Training summaryprint("\n"+"="*80)print("TRAINING COMPLETE")print("="*80) metrics = train_result.metricsprint(f"\nTraining metrics:")for key, value in metrics.items():print(f" {key}: {value}")# Save model trainer.save_model(str(dirs['models'] /'final_model'))print(f"\n✓ Model saved to: {dirs['models'] /'final_model'}")return train_result# Execute trainingtrain_result = train_model(trainer)
24.3 Training History Visualization
def plot_training_history(trainer):"""Plot training metrics"""# Extract history log_history = trainer.state.log_history# Separate train and eval logs train_logs = [log for log in log_history if'loss'in log and'eval_loss'notin log] eval_logs = [log for log in log_history if'eval_loss'in log] fig, axes = plt.subplots(1, 2, figsize=(16, 6))# Loss plotif train_logs: train_steps = [log['step'] for log in train_logs] train_loss = [log['loss'] for log in train_logs] axes[0].plot(train_steps, train_loss, label='Training Loss', linewidth=2)if eval_logs: eval_epochs = [log['epoch'] for log in eval_logs] eval_loss = [log['eval_loss'] for log in eval_logs]# Convert epochs to approximate steps steps_per_epoch = train_steps[-1] / eval_epochs[-1] if eval_epochs else1 eval_steps = [e * steps_per_epoch for e in eval_epochs] axes[0].plot(eval_steps, eval_loss, label='Validation Loss', linewidth=2, marker='o', markersize=8) axes[0].set_xlabel('Training Steps', fontsize=12, fontweight='bold') axes[0].set_ylabel('Loss', fontsize=12, fontweight='bold') axes[0].set_title('Training & Validation Loss', fontsize=14, fontweight='bold') axes[0].legend(fontsize=11) axes[0].grid(alpha=0.3)# Metrics plotif eval_logs: eval_accuracy = [log.get('eval_accuracy', 0) for log in eval_logs] eval_f1 = [log.get('eval_f1', 0) for log in eval_logs] axes[1].plot(eval_epochs, eval_accuracy, label='Accuracy', linewidth=2, marker='o') axes[1].plot(eval_epochs, eval_f1, label='F1 Score', linewidth=2, marker='s') axes[1].set_xlabel('Epoch', fontsize=12, fontweight='bold') axes[1].set_ylabel('Score', fontsize=12, fontweight='bold') axes[1].set_title('Validation Metrics', fontsize=14, fontweight='bold') axes[1].legend(fontsize=11) axes[1].grid(alpha=0.3) axes[1].set_ylim([0, 1]) plt.tight_layout() plt.savefig(dirs['figures'] /'training_history.png', dpi=300, bbox_inches='tight') plt.show()plot_training_history(trainer)
25 Part 6: Evaluation
25.1 Evaluate on Test Set
def evaluate_model(trainer):"""Evaluate model on test set"""print("\n"+"="*80)print("EVALUATION ON TEST SET")print("="*80)# Evaluate eval_results = trainer.evaluate()print(f"\nTest set metrics:")for key, value in eval_results.items():if'eval_'in key: metric_name = key.replace('eval_', '').upper()print(f" {metric_name:15s}: {value:.4f}")return eval_resultseval_results = evaluate_model(trainer)
def test_inference(pipeline, test_texts):"""Test inference on custom texts"""print("\n"+"="*80)print("INFERENCE TESTING")print("="*80)for i, text inenumerate(test_texts, 1): result = pipeline(text)[0] label = result['label'] score = result['score'] sentiment ="Positive"if label =='LABEL_1'else"Negative"print(f"\n[{i}] Text:")print(f" '{text}'")print(f" Prediction: {sentiment} (confidence: {score:.4f})")print("="*80)# Custom test casestest_texts = ["This movie is absolutely fantastic! Best film I've seen this year!","Terrible acting, boring plot. Complete waste of time.","It was okay, nothing special but not terrible either.","Amazing cinematography and brilliant performances throughout.","I fell asleep halfway through. Very disappointing.","The special effects were mind-blowing!","Not recommended. Poor script and direction."]test_inference(inference_pipeline, test_texts)
26.3 Batch Inference
def batch_inference(pipeline, texts, batch_size=32):"""Perform batch inference"""print(f"\nRunning batch inference on {len(texts)} samples...")import time start_time = time.time() results = pipeline(texts, batch_size=batch_size) elapsed_time = time.time() - start_timeprint(f"✓ Inference complete!")print(f" Time: {elapsed_time:.2f}s")print(f" Throughput: {len(texts)/elapsed_time:.1f} samples/sec")return results# Test batch inferencesample_texts = [dataset['test'][i]['text'] for i inrange(100)]batch_results = batch_inference(inference_pipeline, sample_texts, batch_size=16)
26.4 Save Predictions
def save_predictions(texts, predictions, output_path):"""Save predictions to file""" results_df = pd.DataFrame({'text': texts,'label': [p['label'] for p in predictions],'score': [p['score'] for p in predictions] }) results_df.to_csv(output_path, index=False)print(f"✓ Predictions saved to: {output_path}")return results_df# Save sample predictionspredictions_df = save_predictions( sample_texts, batch_results, dirs['predictions'] /'sample_predictions.csv')print("\nSample predictions:")print(predictions_df.head(10))
27 Part 8: Model Optimization
27.1 Model Quantization
def quantize_model(model):"""Apply dynamic quantization"""print("Applying dynamic quantization...")# Quantize quantized_model = torch.quantization.quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8 )print("✓ Quantization complete!")# Compare sizesdef get_model_size(model): torch.save(model.state_dict(), "temp.pth") size = Path("temp.pth").stat().st_size / (1024*1024) Path("temp.pth").unlink()return size original_size = get_model_size(model) quantized_size = get_model_size(quantized_model)print(f"\n Original model: {original_size:.2f} MB")print(f" Quantized model: {quantized_size:.2f} MB")print(f" Reduction: {(1- quantized_size/original_size)*100:.1f}%")return quantized_model# Note: Quantization may not work on all BERT models# Uncomment to test:# quantized_model = quantize_model(model)
27.2 Model Export (ONNX)
def export_to_onnx(model, tokenizer, output_path, max_length=256):"""Export model to ONNX format"""print(f"Exporting model to ONNX format...")# Dummy input dummy_text ="This is a sample text for export." inputs = tokenizer( dummy_text, padding='max_length', max_length=max_length, truncation=True, return_tensors='pt' )# Move to CPU for export model_cpu = model.cpu() inputs_cpu = {k: v.cpu() for k, v in inputs.items()}# Export torch.onnx.export( model_cpu, (inputs_cpu['input_ids'], inputs_cpu['attention_mask']), output_path, input_names=['input_ids', 'attention_mask'], output_names=['logits'], dynamic_axes={'input_ids': {0: 'batch', 1: 'sequence'},'attention_mask': {0: 'batch', 1: 'sequence'},'logits': {0: 'batch'} }, opset_version=11 )print(f"✓ Model exported to: {output_path}")# Check size size = Path(output_path).stat().st_size / (1024*1024)print(f" ONNX model size: {size:.2f} MB")# Export model# export_to_onnx(model, tokenizer, dirs['models'] / 'model.onnx')
28 Part 9: Interpretability
28.1 Attention Visualization
def visualize_attention(text, model, tokenizer, layer=11, head=0):"""Visualize attention weights"""print(f"\nVisualizing attention for text:")print(f" '{text}'")# Tokenize inputs = tokenizer(text, return_tensors='pt').to(device)# Get attention weightswith torch.no_grad(): outputs = model(**inputs, output_attentions=True)# Extract attention from specific layer and head attention = outputs.attentions[layer][0, head].cpu().numpy()# Get tokens tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])# Plot fig, ax = plt.subplots(figsize=(12, 10)) im = ax.imshow(attention, cmap='YlOrRd')# Set ticks ax.set_xticks(range(len(tokens))) ax.set_yticks(range(len(tokens))) ax.set_xticklabels(tokens, rotation=90) ax.set_yticklabels(tokens)# Colorbar plt.colorbar(im, ax=ax) ax.set_xlabel('Key', fontsize=12, fontweight='bold') ax.set_ylabel('Query', fontsize=12, fontweight='bold') ax.set_title(f'Attention Weights (Layer {layer}, Head {head})', fontsize=14, fontweight='bold') plt.tight_layout() plt.savefig(dirs['figures'] /f'attention_layer{layer}_head{head}.png', dpi=300, bbox_inches='tight') plt.show()# Visualize attention for sample textsample_text ="This movie is absolutely amazing and wonderful!"visualize_attention(sample_text, model, tokenizer, layer=11, head=0)
28.2 Feature Importance
def analyze_important_words(text, model, tokenizer):"""Analyze important words using gradient-based attribution"""print(f"\nAnalyzing important words for:")print(f" '{text}'")# Tokenize inputs = tokenizer(text, return_tensors='pt').to(device) embedding_layer = model.bert.embeddings# Get embeddings embeddings = embedding_layer(inputs['input_ids']) embeddings.retain_grad()# Forward pass outputs = model(**inputs) logits = outputs.logits# Get predicted class predicted_class = torch.argmax(logits, dim=1)# Backward pass model.zero_grad() logits[0, predicted_class].backward()# Get gradients gradients = embeddings.grad.cpu().numpy()[0]# Calculate importance (L2 norm of gradients) importance = np.linalg.norm(gradients, axis=1)# Get tokens tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])# Plot fig, ax = plt.subplots(figsize=(14, 6)) colors = ['green'if imp > np.median(importance) else'gray'for imp in importance] ax.barh(range(len(tokens)), importance, color=colors, alpha=0.7) ax.set_yticks(range(len(tokens))) ax.set_yticklabels(tokens) ax.set_xlabel('Importance Score', fontsize=12, fontweight='bold') ax.set_title('Word Importance (Gradient-based)', fontsize=14, fontweight='bold') ax.grid(axis='x', alpha=0.3) plt.tight_layout() plt.savefig(dirs['figures'] /'word_importance.png', dpi=300, bbox_inches='tight') plt.show()# Print top words top_indices = np.argsort(importance)[::-1][:5]print("\nTop 5 important words:")for i, idx inenumerate(top_indices, 1):print(f" {i}. {tokens[idx]:15s}: {importance[idx]:.4f}")analyze_important_words(sample_text, model, tokenizer)
29 Kesimpulan
29.1 Summary
Dalam lab ini, kita telah:
✓ Memuat dan mengeksplorasi IMDB dataset
✓ Melakukan tokenization dengan BERT tokenizer
✓ Fine-tuning pre-trained BERT untuk sentiment analysis
✓ Mengevaluasi model dengan berbagai metrics
✓ Membuat inference pipeline untuk production
✓ Mengoptimalkan model untuk deployment
✓ Memvisualisasikan attention dan word importance
29.2 Key Takeaways
Transfer learning sangat powerful untuk NLP tasks
Hugging Face Transformers mempermudah working dengan pre-trained models
Fine-tuning requires minimal data untuk excellent results
Proper tokenization critical untuk performance
Model optimization penting untuk production deployment
29.3 Next Steps
Experiment dengan model lain (RoBERTa, DistilBERT, ALBERT)