Introduction
Hyperparameter tuning optimizes model performance by systematically searching over parameter combinations.
Grid Search CV
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=100, random_state=42)
param_grid = {
'C': [0.1, 1, 10],
'kernel': ['linear', 'rbf'],
'gamma': ['scale', 'auto']
}
grid_search = GridSearchCV(
SVC(),
param_grid,
cv=5,
scoring='accuracy',
n_jobs=-1
)
grid_search.fit(X, y)
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")
Randomized Search
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint
param_dist = {
'C': uniform(0.1, 10),
'kernel': ['linear', 'rbf', 'poly'],
'gamma': ['scale', 'auto', uniform(0.01, 0.1)],
'max_depth': randint(3, 15)
}
random_search = RandomizedSearchCV(
SVC(),
param_dist,
n_iter=30,
cv=5,
random_state=42,
n_jobs=-1
)
random_search.fit(X, y)
Cross-Validation Splitter
from sklearn.model_selection import GridSearchCV, StratifiedKFold
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(
RandomForestClassifier(random_state=42),
{'n_estimators': [50, 100], 'max_depth': [5, 10]},
cv=cv,
scoring='f1'
)
grid_search.fit(X, y)
Nested CV
# Outer loop for evaluation, inner for tuning
from sklearn.model_selection import cross_val_score, GridSearchCV
inner_cv = KFold(n_splits=3, shuffle=True, random_state=42)
outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
tuned_clf = GridSearchCV(SVC(), {'C': [0.1, 1]}, cv=inner_cv)
scores = cross_val_score(tuned_clf, X, y, cv=outer_cv)
Practice Problems
- Perform grid search for SVM
- Use randomized search for large parameter space
- Customize CV splitter
- Use nested cross-validation
- Access best estimator