Introduction
Dimensionality reduction techniques transform high-dimensional data into lower dimensions while preserving important structure.
PCA
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X = iris.data
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
print(f"Total variance explained: {sum(pca.explained_variance_ratio_):.3f}")
print(f"Components shape: {pca.components_.shape}")
Incremental PCA
from sklearn.decomposition import IncrementalPCA
ipca = IncrementalPCA(n_components=2, batch_size=100)
X_pca = ipca.fit_transform(X_large)
t-SNE
from sklearn.manifold import TSNE
from sklearn.datasets import make_digits
digits = make_digits(n_samples=1000, random_state=42)
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
X_tsne = tsne.fit_transform(digits.data)
UMAP
import umap
reducer = umap.UMAP(n_components=2, random_state=42)
X_umap = reducer.fit_transform(X_high_dim)
# For large datasets
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=3)
X_3d = reducer.fit_transform(X_large)
Practice Problems
- Reduce dimensions with PCA
- Visualize digits with t-SNE
- Compare PCA vs UMAP
- Determine optimal number of components
- Use inverse_transform to reconstruct