Reduce to 2D or 3D with PCA to inspect class separation, outliers, and cluster structure before modeling.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa: F401
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
Xs = StandardScaler().fit_transform(X)
# 2D PCA
pca2 = PCA(n_components=2, random_state=42)
X2 = pca2.fit_transform(Xs)
plt.figure(figsize=(6,4))
plt.scatter(X2[:,0], X2[:,1], c=y, s=28)
plt.xlabel("PC1"); plt.ylabel("PC2"); plt.title("PCA 2D")
plt.tight_layout(); plt.show()
# 3D PCA
pca3 = PCA(n_components=3, random_state=42)
X3 = pca3.fit_transform(Xs)
fig = plt.figure(figsize=(7,5))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X3[:,0], X3[:,1], X3[:,2], c=y, s=18)
ax.set_xlabel("PC1"); ax.set_ylabel("PC2"); ax.set_zlabel("PC3")
ax.set_title("PCA 3D")
plt.tight_layout(); plt.show()
Tip: Use PCA plots to detect outliers and check rough separability before choosing algorithms (e.g., linear vs non-linear).
