Project 3: Deploy a Deep Learning Model
End-to-end project: train a deep learning model, build a FastAPI service, containerize with Docker, and deploy to cloud infrastructure.
Project Architecture
Phase 1: Model Training
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
class ImageClassifier(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.backbone = models.resnet18(pretrained=True)
for param in self.backbone.parameters():
param.requires_grad = False
self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)
def forward(self, x):
return self.backbone(x)
transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
Phase 2: Export Model
# Export to TorchScript for production
model.eval()
dummy = torch.randn(1, 3, 224, 224)
scripted = torch.jit.trace(model, dummy)
scripted.save("model.pt")
# Or export to ONNX
torch.onnx.export(model, dummy, "model.onnx",
input_names=["input"],
output_names=["output"],
dynamic_axes={"input": {0: "batch_size"}})
Phase 3: FastAPI Server
from fastapi import FastAPI
from pydantic import BaseModel
import torch
import io
from PIL import Image
import numpy as np
app = FastAPI()
model = torch.jit.load("model.pt")
model.eval()
class PredictionRequest(BaseModel):
image_base64: str
@app.post("/predict")
async def predict(request: PredictionRequest):
image_bytes = base64.b64decode(request.image_base64)
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
tensor = transform(image).unsqueeze(0)
with torch.no_grad():
output = model(tensor)
probs = torch.softmax(output, dim=1)
pred = torch.argmax(probs, dim=1).item()
return {"class": pred, "confidence": probs[0][pred].item()}
Phase 4: Docker Build
FROM python:3.11-slim AS builder
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN python export_model.py
FROM python:3.11-slim
WORKDIR /app
COPY --from=builder /app/model.pt .
COPY --from=builder /app/app.py .
COPY --from=builder /app/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
EXPOSE 8000
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
Phase 5: Cloud Deploy
# AWS ECS Task Definition
{
"family": "dl-model-service",
"networkMode": "awsvpc",
"requiresCompatibilities": ["FARGATE"],
"cpu": "1024",
"memory": "2048",
"containerDefinitions": [{
"name": "model-api",
"image": "123456789.dkr.ecr.us-east-1.amazonaws.com/dl-model:latest",
"portMappings": [{"containerPort": 8000, "protocol": "tcp"}],
"healthCheck": {
"command": ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"],
"interval": 30
}
}]
}
Evaluation Criteria
- Model accuracy meets threshold
- Inference latency under 100ms (p95)
- API handles 100+ concurrent requests
- Docker image under 1GB
- Health checks pass consistently
- Monitoring dashboards operational
Key Takeaways
- TorchScript/ONNX: Export models for production inference
- Multi-stage Docker: Minimize image size
- Async API: Handle concurrent requests efficiently
- Health checks: Enable orchestrator-managed deployments