Visualize classification outcomes and evaluate ranking performance across thresholds.
2×2 table of TP, FP, TN, FN. Drives precision/recall and error analysis.
ROC plots TPR vs FPR for all thresholds. AUC summarizes ranking quality (1=perfect, 0.5=random).
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_samples=2000, weights=[0.9,0.1], random_state=42)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, stratify=y, random_state=42)
clf = LogisticRegression(max_iter=5000).fit(X_tr, y_tr)
proba = clf.predict_proba(X_te)[:,1]
y_pred = (proba >= 0.5).astype(int)
print(confusion_matrix(y_te, y_pred))
fpr, tpr, thr = roc_curve(y_te, proba)
print("ROC-AUC:", roc_auc_score(y_te, proba))
