Estimate generalization reliably with K-Fold/Stratified K-Fold and find the best hyperparameters using GridSearchCV.
Exhaustive search over parameter grid with CV. Start coarse, refine later.
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.metrics import classification_report
X, y = load_wine(return_X_y=True)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
rf = RandomForestClassifier(random_state=42)
param_grid = {
"n_estimators": [100, 200],
"max_depth": [None, 6, 10],
"min_samples_leaf": [1, 2, 4],
"max_features": ["sqrt", "log2"]
}
grid = GridSearchCV(rf, param_grid, cv=skf, n_jobs=-1, scoring="f1_macro")
grid.fit(X, y)
print("Best params:", grid.best_params_)
print("Best CV score:", round(grid.best_score_, 3))
