Introduction
Semantic segmentation assigns class labels to each pixel in an image, enabling pixel-level understanding.
U-Net
import torch
import torch.nn as nn
class DoubleConv(nn.Module):
def __init__(self, in_ch, out_ch):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.conv(x)
class UNet(nn.Module):
def __init__(self, in_channels, num_classes):
super().__init__()
self.enc1 = DoubleConv(in_channels, 64)
self.enc2 = DoubleConv(64, 128)
self.enc3 = DoubleConv(128, 256)
self.enc4 = DoubleConv(256, 512)
self.pool = nn.MaxPool2d(2)
self.up4 = nn.ConvTranspose2d(512, 256, 2, stride=2)
self.dec4 = DoubleConv(512, 256)
self.up3 = nn.ConvTranspose2d(256, 128, 2, stride=2)
self.dec3 = DoubleConv(256, 128)
self.final = nn.Conv2d(128, num_classes, 1)
Segmentation Models
# Using segmentation_models_pytorch
import segmentation_models_pytorch as smp
model = smp.Unet(
encoder_name="resnet34",
encoder_weights="imagenet",
in_channels=3,
classes=10
)
Mask R-CNN
import torchvision
from torchvision.models.detection import maskrcnn_resnet50_fpn
model = maskrcnn_resnet50_fpn(pretrained=True)
model.eval()
# Get masks
outputs = model(img_tensor)
masks = outputs[0]['masks']
DeepLab
import torchvision
from torchvision.models.segmentation import deeplabv3_resnet50
model = deeplabv3_resnet50(pretrained=True)
model.eval()
# Get segmentation
output = model(img_tensor)['out']
Practice Problems
- Build U-Net architecture
- Train on segmentation dataset
- Use pre-trained segmentation models
- Evaluate with IoU
- Visualize segmentation masks