# Data manipulation
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# Machine Learning
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
confusion_matrix, classification_report, roc_auc_score, roc_curve
)
# Algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
# Timing
import time
# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
np.random.seed(42)
print("✓ All libraries imported successfully")
print(f"scikit-learn version: {sklearn.__version__}")Lab 03 - Classical ML Algorithms: Comparison & Model Selection
Pembelajaran Mesin - Semester 6
Learning Outcomes
Setelah menyelesaikan lab ini, mahasiswa diharapkan mampu:
- Mengimplementasikan berbagai algoritma Machine Learning klasik untuk klasifikasi
- Membandingkan kinerja algoritma secara objektif menggunakan metrik yang tepat
- Melakukan hyperparameter tuning untuk optimasi model
- Menerapkan cross-validation untuk validasi yang robust
- Memilih model terbaik berdasarkan metrik dan kebutuhan bisnis
- Menganalisis trade-off antar algoritma (akurasi vs kecepatan vs interpretabilitas)
Durasi: 3-4 jam Dataset: Breast Cancer Wisconsin (sklearn built-in) Tools: Python, scikit-learn, pandas, matplotlib, seaborn
Mengapa Lab Ini Penting:
Dalam praktik Machine Learning, tidak ada satu algoritma yang selalu terbaik untuk semua kasus. Lab ini mengajarkan pendekatan sistematis untuk membandingkan algoritma dan memilih model yang paling sesuai dengan data dan kebutuhan bisnis.
9.1 Background & Motivation
9.1.1 Masalah yang Akan Diselesaikan
Anda bekerja sebagai Data Scientist di rumah sakit dan diminta mengembangkan sistem untuk membantu diagnosis kanker payudara berdasarkan fitur-fitur sel tumor. Dataset berisi 30 fitur numerik yang diekstrak dari gambar Fine Needle Aspirate (FNA) sel payudara.
Tantangan:
- Dataset memiliki 30 fitur dengan skala berbeda
- Perlu model dengan akurasi tinggi (menyangkut kesehatan pasien)
- Model harus dapat dijelaskan kepada dokter
- Trade-off antara akurasi dan interpretabilitas
9.1.2 Algoritma yang Akan Dibandingkan
| Algoritma | Kelebihan | Kekurangan | Use Case |
|---|---|---|---|
| Logistic Regression | Cepat, interpretable, probabilistik | Hanya linear decision boundary | Baseline, interpretasi penting |
| Decision Tree | Sangat interpretable, non-linear | Prone to overfitting | Explanatory analysis |
| Random Forest | Akurat, robust, feature importance | Black box, lebih lambat | Production dengan akurasi tinggi |
| SVM | Efektif di high-dimensional space | Butuh scaling, sulit tuning | High-dimensional data |
| K-Nearest Neighbors | Simple, non-parametric | Lambat untuk prediksi, butuh scaling | Small datasets |
9.2 Environment Setup
9.2.1 Import Libraries
9.3 Step 1: Load and Explore Dataset
# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='diagnosis')
# Basic information
print("=" * 60)
print("BREAST CANCER WISCONSIN DATASET")
print("=" * 60)
print(f"\nDataset Shape: {X.shape}")
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")
print(f"\nClass Distribution:")
print(f" Malignant (0): {(y == 0).sum()} ({(y == 0).sum() / len(y) * 100:.1f}%)")
print(f" Benign (1): {(y == 1).sum()} ({(y == 1).sum() / len(y) * 100:.1f}%)")
print(f"\nTarget Names: {data.target_names}")
print(f"\nFeature Statistics:")
X.describe().loc[['mean', 'std', 'min', 'max']]9.3.1 Visualize Class Distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Class distribution
axes[0].bar(['Malignant', 'Benign'], [sum(y == 0), sum(y == 1)],
color=['#e74c3c', '#2ecc71'], alpha=0.7, edgecolor='black')
axes[0].set_ylabel('Count', fontsize=12)
axes[0].set_title('Class Distribution', fontsize=14, fontweight='bold')
axes[0].grid(axis='y', alpha=0.3)
# Add count labels
for i, v in enumerate([sum(y == 0), sum(y == 1)]):
axes[0].text(i, v + 5, str(v), ha='center', fontweight='bold')
# Feature correlation heatmap (top 10 features)
top_features = X.columns[:10]
corr = X[top_features].corr()
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0,
ax=axes[1], cbar_kws={'label': 'Correlation'})
axes[1].set_title('Feature Correlation (Top 10 Features)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
print("\n✓ Data exploration completed")9.4 Step 2: Data Splitting
# Split data: 60% train, 20% validation, 20% test
X_temp, X_test, y_temp, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp # 0.25 of 0.8 = 0.2
)
print("=" * 60)
print("DATA SPLITTING")
print("=" * 60)
print(f"\nTraining set: {X_train.shape[0]} samples ({X_train.shape[0]/len(X)*100:.1f}%)")
print(f"Validation set: {X_val.shape[0]} samples ({X_val.shape[0]/len(X)*100:.1f}%)")
print(f"Test set: {X_test.shape[0]} samples ({X_test.shape[0]/len(X)*100:.1f}%)")
print("\nClass distribution in splits:")
for name, target in [('Train', y_train), ('Validation', y_val), ('Test', y_test)]:
print(f" {name}: Malignant={sum(target==0)}, Benign={sum(target==1)}")9.5 Step 3: Feature Scaling
SVM dan KNN sangat sensitif terhadap skala fitur karena menggunakan distance metrics. Random Forest dan Decision Tree tidak memerlukan scaling, tetapi Logistic Regression juga diuntungkan dari scaling untuk konvergensi lebih cepat.
# Create scaler and fit on training data only
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
# Convert back to DataFrame for easier handling
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns=X.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X.columns)
# Verify scaling
print("Before scaling (first feature):")
print(f" Mean: {X_train.iloc[:, 0].mean():.2f}")
print(f" Std: {X_train.iloc[:, 0].std():.2f}")
print("\nAfter scaling (first feature):")
print(f" Mean: {X_train_scaled.iloc[:, 0].mean():.2e}")
print(f" Std: {X_train_scaled.iloc[:, 0].std():.2f}")
print("\n✓ Feature scaling completed")9.6 Step 4: Baseline Model (Dummy Classifier)
# Create baseline using most frequent class
baseline = DummyClassifier(strategy='most_frequent', random_state=42)
baseline.fit(X_train_scaled, y_train)
# Predict
y_val_pred_baseline = baseline.predict(X_val_scaled)
# Evaluate
baseline_accuracy = accuracy_score(y_val, y_val_pred_baseline)
print("=" * 60)
print("BASELINE MODEL (Most Frequent Class)")
print("=" * 60)
print(f"\nValidation Accuracy: {baseline_accuracy:.4f}")
print(f"\nStrategy: Always predict '{data.target_names[1]}' (majority class)")
print(f"\n⚠️ Any model below {baseline_accuracy:.4f} accuracy is worse than random guessing!")
print("\n✓ Baseline established")9.7 Step 5: Model 1 - Logistic Regression
print("=" * 60)
print("MODEL 1: LOGISTIC REGRESSION")
print("=" * 60)
# Train
start_time = time.time()
lr_model = LogisticRegression(random_state=42, max_iter=10000)
lr_model.fit(X_train_scaled, y_train)
train_time = time.time() - start_time
# Predict
start_time = time.time()
y_val_pred_lr = lr_model.predict(X_val_scaled)
y_val_proba_lr = lr_model.predict_proba(X_val_scaled)[:, 1]
predict_time = time.time() - start_time
# Metrics
lr_accuracy = accuracy_score(y_val, y_val_pred_lr)
lr_precision = precision_score(y_val, y_val_pred_lr)
lr_recall = recall_score(y_val, y_val_pred_lr)
lr_f1 = f1_score(y_val, y_val_pred_lr)
lr_auc = roc_auc_score(y_val, y_val_proba_lr)
print(f"\nTraining time: {train_time:.4f} seconds")
print(f"Prediction time: {predict_time:.4f} seconds")
print(f"\nPerformance Metrics:")
print(f" Accuracy: {lr_accuracy:.4f}")
print(f" Precision: {lr_precision:.4f}")
print(f" Recall: {lr_recall:.4f}")
print(f" F1-Score: {lr_f1:.4f}")
print(f" ROC AUC: {lr_auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred_lr))
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred_lr, target_names=data.target_names))9.8 Step 6: Model 2 - Decision Tree
print("=" * 60)
print("MODEL 2: DECISION TREE")
print("=" * 60)
# Train (use unscaled data - trees don't need scaling)
start_time = time.time()
dt_model = DecisionTreeClassifier(random_state=42, max_depth=5)
dt_model.fit(X_train, y_train)
train_time = time.time() - start_time
# Predict
start_time = time.time()
y_val_pred_dt = dt_model.predict(X_val)
y_val_proba_dt = dt_model.predict_proba(X_val)[:, 1]
predict_time = time.time() - start_time
# Metrics
dt_accuracy = accuracy_score(y_val, y_val_pred_dt)
dt_precision = precision_score(y_val, y_val_pred_dt)
dt_recall = recall_score(y_val, y_val_pred_dt)
dt_f1 = f1_score(y_val, y_val_pred_dt)
dt_auc = roc_auc_score(y_val, y_val_proba_dt)
print(f"\nTraining time: {train_time:.4f} seconds")
print(f"Prediction time: {predict_time:.4f} seconds")
print(f"\nPerformance Metrics:")
print(f" Accuracy: {dt_accuracy:.4f}")
print(f" Precision: {dt_precision:.4f}")
print(f" Recall: {dt_recall:.4f}")
print(f" F1-Score: {dt_f1:.4f}")
print(f" ROC AUC: {dt_auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred_dt))
print(f"\nTree depth: {dt_model.get_depth()}")
print(f"Number of leaves: {dt_model.get_n_leaves()}")9.9 Step 7: Model 3 - Random Forest
print("=" * 60)
print("MODEL 3: RANDOM FOREST")
print("=" * 60)
# Train
start_time = time.time()
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
train_time = time.time() - start_time
# Predict
start_time = time.time()
y_val_pred_rf = rf_model.predict(X_val)
y_val_proba_rf = rf_model.predict_proba(X_val)[:, 1]
predict_time = time.time() - start_time
# Metrics
rf_accuracy = accuracy_score(y_val, y_val_pred_rf)
rf_precision = precision_score(y_val, y_val_pred_rf)
rf_recall = recall_score(y_val, y_val_pred_rf)
rf_f1 = f1_score(y_val, y_val_pred_rf)
rf_auc = roc_auc_score(y_val, y_val_proba_rf)
print(f"\nTraining time: {train_time:.4f} seconds")
print(f"Prediction time: {predict_time:.4f} seconds")
print(f"\nPerformance Metrics:")
print(f" Accuracy: {rf_accuracy:.4f}")
print(f" Precision: {rf_precision:.4f}")
print(f" Recall: {rf_recall:.4f}")
print(f" F1-Score: {rf_f1:.4f}")
print(f" ROC AUC: {rf_auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred_rf))
print(f"\nNumber of trees: {rf_model.n_estimators}")9.10 Step 8: Model 4 - Support Vector Machine (SVM)
print("=" * 60)
print("MODEL 4: SUPPORT VECTOR MACHINE (RBF Kernel)")
print("=" * 60)
# Train (MUST use scaled data)
start_time = time.time()
svm_model = SVC(kernel='rbf', random_state=42, probability=True)
svm_model.fit(X_train_scaled, y_train)
train_time = time.time() - start_time
# Predict
start_time = time.time()
y_val_pred_svm = svm_model.predict(X_val_scaled)
y_val_proba_svm = svm_model.predict_proba(X_val_scaled)[:, 1]
predict_time = time.time() - start_time
# Metrics
svm_accuracy = accuracy_score(y_val, y_val_pred_svm)
svm_precision = precision_score(y_val, y_val_pred_svm)
svm_recall = recall_score(y_val, y_val_pred_svm)
svm_f1 = f1_score(y_val, y_val_pred_svm)
svm_auc = roc_auc_score(y_val, y_val_proba_svm)
print(f"\nTraining time: {train_time:.4f} seconds")
print(f"Prediction time: {predict_time:.4f} seconds")
print(f"\nPerformance Metrics:")
print(f" Accuracy: {svm_accuracy:.4f}")
print(f" Precision: {svm_precision:.4f}")
print(f" Recall: {svm_recall:.4f}")
print(f" F1-Score: {svm_f1:.4f}")
print(f" ROC AUC: {svm_auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred_svm))
print(f"\nNumber of support vectors: {svm_model.n_support_.sum()}")9.11 Step 9: Model 5 - K-Nearest Neighbors (KNN)
print("=" * 60)
print("MODEL 5: K-NEAREST NEIGHBORS (k=5)")
print("=" * 60)
# Train (MUST use scaled data)
start_time = time.time()
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)
train_time = time.time() - start_time
# Predict
start_time = time.time()
y_val_pred_knn = knn_model.predict(X_val_scaled)
y_val_proba_knn = knn_model.predict_proba(X_val_scaled)[:, 1]
predict_time = time.time() - start_time
# Metrics
knn_accuracy = accuracy_score(y_val, y_val_pred_knn)
knn_precision = precision_score(y_val, y_val_pred_knn)
knn_recall = recall_score(y_val, y_val_pred_knn)
knn_f1 = f1_score(y_val, y_val_pred_knn)
knn_auc = roc_auc_score(y_val, y_val_proba_knn)
print(f"\nTraining time: {train_time:.4f} seconds")
print(f"Prediction time: {predict_time:.4f} seconds")
print(f"\nPerformance Metrics:")
print(f" Accuracy: {knn_accuracy:.4f}")
print(f" Precision: {knn_precision:.4f}")
print(f" Recall: {knn_recall:.4f}")
print(f" F1-Score: {knn_f1:.4f}")
print(f" ROC AUC: {knn_auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred_knn))
print(f"\nNumber of neighbors: {knn_model.n_neighbors}")9.12 Step 10: Performance Comparison
# Create comparison DataFrame
results = pd.DataFrame({
'Algorithm': ['Baseline', 'Logistic Regression', 'Decision Tree',
'Random Forest', 'SVM (RBF)', 'KNN (k=5)'],
'Accuracy': [baseline_accuracy, lr_accuracy, dt_accuracy,
rf_accuracy, svm_accuracy, knn_accuracy],
'Precision': [0, lr_precision, dt_precision, rf_precision, svm_precision, knn_precision],
'Recall': [0, lr_recall, dt_recall, rf_recall, svm_recall, knn_recall],
'F1-Score': [0, lr_f1, dt_f1, rf_f1, svm_f1, knn_f1],
'ROC AUC': [0.5, lr_auc, dt_auc, rf_auc, svm_auc, knn_auc]
})
print("=" * 80)
print("ALGORITHM PERFORMANCE COMPARISON (Validation Set)")
print("=" * 80)
print(results.to_string(index=False))
# Find best model
best_idx = results['F1-Score'].idxmax()
print(f"\n🏆 Best Model (by F1-Score): {results.loc[best_idx, 'Algorithm']}")
print(f" F1-Score: {results.loc[best_idx, 'F1-Score']:.4f}")9.12.1 Visualization: Metric Comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Accuracy comparison
ax1 = axes[0, 0]
bars1 = ax1.barh(results['Algorithm'][1:], results['Accuracy'][1:], color='steelblue', alpha=0.7)
ax1.axvline(baseline_accuracy, color='red', linestyle='--', linewidth=2, label='Baseline')
ax1.set_xlabel('Accuracy', fontsize=12)
ax1.set_title('Model Accuracy Comparison', fontsize=14, fontweight='bold')
ax1.set_xlim(0, 1)
ax1.legend()
ax1.grid(axis='x', alpha=0.3)
# Add value labels
for i, bar in enumerate(bars1):
width = bar.get_width()
ax1.text(width, bar.get_y() + bar.get_height()/2,
f'{width:.4f}', ha='left', va='center', fontweight='bold')
# F1-Score comparison
ax2 = axes[0, 1]
bars2 = ax2.barh(results['Algorithm'][1:], results['F1-Score'][1:], color='seagreen', alpha=0.7)
ax2.set_xlabel('F1-Score', fontsize=12)
ax2.set_title('Model F1-Score Comparison', fontsize=14, fontweight='bold')
ax2.set_xlim(0, 1)
ax2.grid(axis='x', alpha=0.3)
for i, bar in enumerate(bars2):
width = bar.get_width()
ax2.text(width, bar.get_y() + bar.get_height()/2,
f'{width:.4f}', ha='left', va='center', fontweight='bold')
# ROC AUC comparison
ax3 = axes[1, 0]
bars3 = ax3.barh(results['Algorithm'][1:], results['ROC AUC'][1:], color='coral', alpha=0.7)
ax3.set_xlabel('ROC AUC', fontsize=12)
ax3.set_title('Model ROC AUC Comparison', fontsize=14, fontweight='bold')
ax3.set_xlim(0, 1)
ax3.grid(axis='x', alpha=0.3)
for i, bar in enumerate(bars3):
width = bar.get_width()
ax3.text(width, bar.get_y() + bar.get_height()/2,
f'{width:.4f}', ha='left', va='center', fontweight='bold')
# Metrics radar chart (top 3 models by F1)
ax4 = axes[1, 1]
top3 = results.nlargest(3, 'F1-Score')[1:] # Exclude baseline
categories = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC AUC']
angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
angles += angles[:1]
ax4 = plt.subplot(2, 2, 4, projection='polar')
colors = ['#e74c3c', '#3498db', '#2ecc71']
for idx, (i, row) in enumerate(top3.iterrows()):
values = [row['Accuracy'], row['Precision'], row['Recall'], row['F1-Score'], row['ROC AUC']]
values += values[:1]
ax4.plot(angles, values, 'o-', linewidth=2, label=row['Algorithm'], color=colors[idx])
ax4.fill(angles, values, alpha=0.15, color=colors[idx])
ax4.set_xticks(angles[:-1])
ax4.set_xticklabels(categories, fontsize=10)
ax4.set_ylim(0, 1)
ax4.set_title('Top 3 Models - Metrics Radar', fontsize=14, fontweight='bold', pad=20)
ax4.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
ax4.grid(True)
plt.tight_layout()
plt.show()9.12.2 ROC Curves Comparison
fig, ax = plt.subplots(figsize=(10, 8))
# Plot ROC curves for all models
models_data = [
('Logistic Regression', y_val_proba_lr, '#3498db'),
('Decision Tree', y_val_proba_dt, '#e74c3c'),
('Random Forest', y_val_proba_rf, '#2ecc71'),
('SVM (RBF)', y_val_proba_svm, '#9b59b6'),
('KNN (k=5)', y_val_proba_knn, '#f39c12')
]
for name, proba, color in models_data:
fpr, tpr, _ = roc_curve(y_val, proba)
auc = roc_auc_score(y_val, proba)
ax.plot(fpr, tpr, label=f'{name} (AUC={auc:.4f})', linewidth=2, color=color)
# Diagonal line (random classifier)
ax.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Random Classifier (AUC=0.5000)')
ax.set_xlabel('False Positive Rate', fontsize=12)
ax.set_ylabel('True Positive Rate', fontsize=12)
ax.set_title('ROC Curves Comparison - All Models', fontsize=14, fontweight='bold')
ax.legend(loc='lower right', fontsize=10)
ax.grid(alpha=0.3)
plt.tight_layout()
plt.show()9.13 Step 11: Hyperparameter Tuning (Top 2 Models)
Kita akan melakukan Grid Search untuk 2 model terbaik berdasarkan F1-Score. Grid Search akan mencoba semua kombinasi parameter dan menggunakan cross-validation untuk evaluasi.
# Identify top 2 models by F1-Score
top2_models = results.nlargest(3, 'F1-Score')[1:3] # Exclude baseline
print("Top 2 models to tune:")
print(top2_models[['Algorithm', 'F1-Score']].to_string(index=False))9.13.1 Tuning Random Forest
print("\n" + "=" * 60)
print("HYPERPARAMETER TUNING: RANDOM FOREST")
print("=" * 60)
# Define parameter grid
rf_param_grid = {
'n_estimators': [50, 100, 200],
'max_depth': [5, 10, 15, None],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
print(f"\nParameter grid: {len(rf_param_grid['n_estimators']) * len(rf_param_grid['max_depth']) * len(rf_param_grid['min_samples_split']) * len(rf_param_grid['min_samples_leaf'])} combinations")
# Grid Search with cross-validation
rf_grid_search = GridSearchCV(
RandomForestClassifier(random_state=42, n_jobs=-1),
rf_param_grid,
cv=5,
scoring='f1',
n_jobs=-1,
verbose=1
)
start_time = time.time()
rf_grid_search.fit(X_train, y_train)
tuning_time = time.time() - start_time
print(f"\nTuning completed in {tuning_time:.2f} seconds")
print(f"\nBest parameters:")
for param, value in rf_grid_search.best_params_.items():
print(f" {param}: {value}")
print(f"\nBest cross-validation F1-Score: {rf_grid_search.best_score_:.4f}")
# Evaluate on validation set
y_val_pred_rf_tuned = rf_grid_search.predict(X_val)
rf_tuned_f1 = f1_score(y_val, y_val_pred_rf_tuned)
rf_tuned_accuracy = accuracy_score(y_val, y_val_pred_rf_tuned)
print(f"\nValidation set performance:")
print(f" Accuracy: {rf_tuned_accuracy:.4f}")
print(f" F1-Score: {rf_tuned_f1:.4f}")
print(f"\nImprovement over default:")
print(f" Accuracy: {rf_tuned_accuracy - rf_accuracy:+.4f}")
print(f" F1-Score: {rf_tuned_f1 - rf_f1:+.4f}")9.13.2 Tuning SVM
print("\n" + "=" * 60)
print("HYPERPARAMETER TUNING: SVM")
print("=" * 60)
# Define parameter grid
svm_param_grid = {
'C': [0.1, 1, 10, 100],
'gamma': ['scale', 'auto', 0.001, 0.01, 0.1],
'kernel': ['rbf', 'linear']
}
print(f"\nParameter grid: {len(svm_param_grid['C']) * len(svm_param_grid['gamma']) * len(svm_param_grid['kernel'])} combinations")
# Grid Search with cross-validation
svm_grid_search = GridSearchCV(
SVC(random_state=42, probability=True),
svm_param_grid,
cv=5,
scoring='f1',
n_jobs=-1,
verbose=1
)
start_time = time.time()
svm_grid_search.fit(X_train_scaled, y_train)
tuning_time = time.time() - start_time
print(f"\nTuning completed in {tuning_time:.2f} seconds")
print(f"\nBest parameters:")
for param, value in svm_grid_search.best_params_.items():
print(f" {param}: {value}")
print(f"\nBest cross-validation F1-Score: {svm_grid_search.best_score_:.4f}")
# Evaluate on validation set
y_val_pred_svm_tuned = svm_grid_search.predict(X_val_scaled)
svm_tuned_f1 = f1_score(y_val, y_val_pred_svm_tuned)
svm_tuned_accuracy = accuracy_score(y_val, y_val_pred_svm_tuned)
print(f"\nValidation set performance:")
print(f" Accuracy: {svm_tuned_accuracy:.4f}")
print(f" F1-Score: {svm_tuned_f1:.4f}")
print(f"\nImprovement over default:")
print(f" Accuracy: {svm_tuned_accuracy - svm_accuracy:+.4f}")
print(f" F1-Score: {svm_tuned_f1 - svm_f1:+.4f}")9.14 Step 12: Cross-Validation Comparison
print("=" * 60)
print("CROSS-VALIDATION COMPARISON (5-Fold Stratified)")
print("=" * 60)
# Define cross-validation strategy
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Models to compare (including tuned versions)
models = {
'Logistic Regression': LogisticRegression(random_state=42, max_iter=10000),
'Decision Tree': DecisionTreeClassifier(random_state=42, max_depth=5),
'Random Forest (Default)': RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1),
'Random Forest (Tuned)': rf_grid_search.best_estimator_,
'SVM (Default)': SVC(kernel='rbf', random_state=42),
'SVM (Tuned)': svm_grid_search.best_estimator_,
'KNN (k=5)': KNeighborsClassifier(n_neighbors=5)
}
cv_results = []
for name, model in models.items():
# Use scaled data for models that need it
if 'SVM' in name or 'KNN' in name or 'Logistic' in name:
X_data = X_train_scaled
else:
X_data = X_train
# Perform cross-validation
scores = cross_val_score(model, X_data, y_train, cv=cv, scoring='f1', n_jobs=-1)
cv_results.append({
'Algorithm': name,
'Mean F1': scores.mean(),
'Std F1': scores.std(),
'Min F1': scores.min(),
'Max F1': scores.max()
})
print(f"\n{name}:")
print(f" F1-Score: {scores.mean():.4f} (+/- {scores.std():.4f})")
print(f" Range: [{scores.min():.4f}, {scores.max():.4f}]")
# Create DataFrame
cv_df = pd.DataFrame(cv_results).sort_values('Mean F1', ascending=False)
print("\n" + "=" * 60)
print("CROSS-VALIDATION SUMMARY (Sorted by Mean F1)")
print("=" * 60)
print(cv_df.to_string(index=False))9.14.1 Visualization: Cross-Validation Results
fig, ax = plt.subplots(figsize=(12, 7))
# Create bar plot with error bars
x = range(len(cv_df))
bars = ax.barh(cv_df['Algorithm'], cv_df['Mean F1'],
xerr=cv_df['Std F1'], capsize=5,
color='skyblue', alpha=0.7, edgecolor='black')
# Color the best model
bars[0].set_color('#2ecc71')
bars[0].set_alpha(0.9)
ax.set_xlabel('F1-Score (5-Fold Cross-Validation)', fontsize=12)
ax.set_title('Cross-Validation Performance Comparison', fontsize=14, fontweight='bold')
ax.set_xlim(0, 1)
ax.grid(axis='x', alpha=0.3)
# Add value labels
for i, (mean, std) in enumerate(zip(cv_df['Mean F1'], cv_df['Std F1'])):
ax.text(mean, i, f'{mean:.4f}±{std:.4f}',
va='center', ha='left', fontweight='bold', fontsize=9)
plt.tight_layout()
plt.show()9.15 Step 13: Feature Importance Analysis
print("=" * 60)
print("FEATURE IMPORTANCE ANALYSIS (Random Forest - Tuned)")
print("=" * 60)
# Get feature importances
feature_importance = pd.DataFrame({
'Feature': X.columns,
'Importance': rf_grid_search.best_estimator_.feature_importances_
}).sort_values('Importance', ascending=False)
print("\nTop 15 Most Important Features:")
print(feature_importance.head(15).to_string(index=False))
# Visualization
fig, ax = plt.subplots(figsize=(10, 8))
top_features = feature_importance.head(15)
bars = ax.barh(range(len(top_features)), top_features['Importance'],
color='steelblue', alpha=0.7, edgecolor='black')
# Color top 3
for i in range(3):
bars[i].set_color('#e74c3c')
bars[i].set_alpha(0.9)
ax.set_yticks(range(len(top_features)))
ax.set_yticklabels(top_features['Feature'])
ax.set_xlabel('Importance Score', fontsize=12)
ax.set_title('Top 15 Feature Importances (Random Forest)', fontsize=14, fontweight='bold')
ax.invert_yaxis()
ax.grid(axis='x', alpha=0.3)
# Add value labels
for i, v in enumerate(top_features['Importance']):
ax.text(v, i, f'{v:.4f}', va='center', ha='left', fontweight='bold')
plt.tight_layout()
plt.show()9.16 Step 14: Final Model Selection and Test Set Evaluation
Test set HANYA digunakan SEKALI di akhir untuk final evaluation. Jangan pernah menggunakan test set untuk tuning atau model selection!
print("=" * 80)
print("FINAL MODEL SELECTION")
print("=" * 80)
# Based on cross-validation, select best model
best_model_name = cv_df.iloc[0]['Algorithm']
best_model_f1 = cv_df.iloc[0]['Mean F1']
print(f"\n🏆 Selected Model: {best_model_name}")
print(f" Cross-Validation F1: {best_model_f1:.4f}")
# Get the best model
if 'Random Forest (Tuned)' in best_model_name:
final_model = rf_grid_search.best_estimator_
X_test_final = X_test
model_type = 'tree'
elif 'SVM (Tuned)' in best_model_name:
final_model = svm_grid_search.best_estimator_
X_test_final = X_test_scaled
model_type = 'svm'
else:
# Fallback
final_model = rf_grid_search.best_estimator_
X_test_final = X_test
model_type = 'tree'
print("\nSelection Criteria:")
print(" ✓ Highest cross-validation F1-Score")
print(" ✓ Low variance across folds (stability)")
print(" ✓ Interpretability through feature importance")
print(" ✓ Reasonable training/prediction time")
print("\n" + "=" * 80)
print("FINAL TEST SET EVALUATION")
print("=" * 80)
# Predict on test set
y_test_pred = final_model.predict(X_test_final)
y_test_proba = final_model.predict_proba(X_test_final)[:, 1]
# Calculate all metrics
test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, y_test_proba)
print(f"\nTest Set Performance:")
print(f" Accuracy: {test_accuracy:.4f}")
print(f" Precision: {test_precision:.4f}")
print(f" Recall: {test_recall:.4f}")
print(f" F1-Score: {test_f1:.4f}")
print(f" ROC AUC: {test_auc:.4f}")
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_test_pred)
print(cm)
print("\nDetailed Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=data.target_names))
# Calculate additional insights
tn, fp, fn, tp = cm.ravel()
print("\nClinical Interpretation:")
print(f" True Positives (Benign correctly identified): {tp}")
print(f" True Negatives (Malignant correctly identified): {tn}")
print(f" False Positives (Malignant misclassified as Benign): {fp} ⚠️")
print(f" False Negatives (Benign misclassified as Malignant): {fn}")
print(f"\n False Positive Rate: {fp/(fp+tn):.4f}")
print(f" False Negative Rate: {fn/(fn+tp):.4f}")
if fp > 0:
print(f"\n⚠️ WARNING: {fp} malignant cases were misclassified as benign!")
print(" This is critical in medical diagnosis - missing cancer cases.")9.16.1 Final Model Visualization
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
# Confusion Matrix Heatmap
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=data.target_names,
yticklabels=data.target_names,
ax=axes[0], cbar_kws={'label': 'Count'})
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)
axes[0].set_title('Confusion Matrix (Test Set)', fontsize=14, fontweight='bold')
# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_test_proba)
axes[1].plot(fpr, tpr, linewidth=3, label=f'{best_model_name} (AUC={test_auc:.4f})', color='#2ecc71')
axes[1].plot([0, 1], [0, 1], 'k--', linewidth=2, label='Random Classifier')
axes[1].set_xlabel('False Positive Rate', fontsize=12)
axes[1].set_ylabel('True Positive Rate', fontsize=12)
axes[1].set_title('ROC Curve (Test Set)', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(alpha=0.3)
plt.tight_layout()
plt.show()9.17 Summary and Conclusions
print("=" * 80)
print("LAB SUMMARY AND KEY TAKEAWAYS")
print("=" * 80)
print(f"""
📊 EXPERIMENTAL RESULTS:
1. Dataset: Breast Cancer Wisconsin (569 samples, 30 features)
- Training: {len(X_train)} samples
- Validation: {len(X_val)} samples
- Test: {len(X_test)} samples
2. Algorithms Compared: 5 classical ML algorithms
- Logistic Regression
- Decision Tree
- Random Forest ⭐
- Support Vector Machine
- K-Nearest Neighbors
3. Best Model: {best_model_name}
- Cross-Validation F1: {best_model_f1:.4f}
- Test Set F1: {test_f1:.4f}
- Test Set Accuracy: {test_accuracy:.4f}
4. Hyperparameter Tuning:
- Random Forest: {rf_tuned_f1 - rf_f1:+.4f} F1 improvement
- SVM: {svm_tuned_f1 - svm_f1:+.4f} F1 improvement
🎯 KEY LEARNINGS:
1. No Single Best Algorithm:
- Performance depends on dataset characteristics
- Must compare multiple algorithms empirically
2. Feature Scaling is Critical:
- SVM and KNN require scaled features
- Forgetting to scale → poor performance
3. Hyperparameter Tuning Matters:
- Default parameters often suboptimal
- Grid Search improves performance significantly
4. Cross-Validation > Single Split:
- More reliable performance estimate
- Detects overfitting and variance issues
5. Model Selection Criteria:
- Not just accuracy - consider precision/recall trade-offs
- In medical diagnosis: minimize False Negatives!
- Consider interpretability and deployment constraints
6. Feature Importance:
- Top features: {', '.join(feature_importance.head(3)['Feature'].tolist())}
- Domain knowledge + feature importance → better insights
⚠️ CRITICAL CONSIDERATIONS FOR PRODUCTION:
1. Class Imbalance: Dataset is {(y==1).sum()/(y==0).sum():.2f}:1 (Benign:Malignant)
- May need class weighting or resampling
2. False Positives vs False Negatives:
- False Negative = Missing cancer (CRITICAL!)
- May need to adjust decision threshold
3. Model Interpretability:
- Doctors need to understand predictions
- Random Forest provides feature importance
- Consider SHAP/LIME for explanations
4. Regular Retraining:
- Medical data evolves
- Monitor model performance over time
✅ LAB OBJECTIVES ACHIEVED:
✓ Implemented 5 classical ML algorithms
✓ Performed fair comparison with same train/test splits
✓ Conducted hyperparameter tuning with GridSearchCV
✓ Validated models using cross-validation
✓ Selected best model with clear justification
✓ Analyzed feature importance
""")9.18 Refleksi dan Diskusi
9.19 Further Exploration
Eksplorasi Lanjutan (Opsional):
Tuning Hyperparameter Lebih Detail:
- Coba RandomizedSearchCV untuk search space lebih besar
- Implementasi Bayesian Optimization
Ensemble Methods:
- Voting Classifier (kombinasi beberapa model)
- Stacking Classifier
Feature Engineering:
- PCA untuk dimensionality reduction
- Polynomial features
Advanced Evaluation:
- Learning curves untuk diagnose overfitting
- Precision-Recall curves untuk imbalanced data
- Calibration curves
Model Interpretability:
- SHAP (SHapley Additive exPlanations)
- LIME (Local Interpretable Model-agnostic Explanations)
9.20 References
- Pedregosa et al. (2011). Scikit-learn: Machine Learning in Python. JMLR 12, pp. 2825-2830
- Breast Cancer Wisconsin Dataset: UCI ML Repository
- Hastie, T., Tibshirani, R., & Friedman, J. (2009). The Elements of Statistical Learning
- Scikit-learn Documentation: https://scikit-learn.org/
Lab Created by: Pembelajaran Mesin Course Team Last Updated: 2025 License: Educational Use Only