Multi-Tenant Architecture

Difficulty: Senior Level | Companies: AWS, Google, Microsoft, Netflix, Uber

Multi-Tenant Models

Multi-tenancy shares infrastructure across customers while maintaining isolation. Choose your model based on isolation requirements and cost constraints.

ℹ️

The isolation model determines security, cost, and operational complexity. Start with shared resources and isolate only when required.

Tenant Isolation Models

Architecture Diagram

Model 1: Shared Everything          Model 2: Shared App, Separate Data
┌─────────────────────────┐        ┌─────────────────────────┐
│      Application        │        │      Application        │
│  ┌───────┐ ┌───────┐   │        │  ┌───────┐ ┌───────┐   │
│  │Tenant │ │Tenant │   │        │  │Tenant │ │Tenant │   │
│  │  A    │ │  B    │   │        │  │  A    │ │  B    │   │
│  └───┬───┘ └───┬───┘   │        │  └───┬───┘ └───┬───┘   │
│      │         │        │        │      │         │        │
│  ┌───▼─────────▼───┐   │        │  ┌───▼───┐ ┌───▼───┐   │
│  │   Shared DB     │   │        │  │ DB-A  │ │ DB-B  │   │
│  └─────────────────┘   │        │  └───────┘ └───────┘   │
└─────────────────────────┘        └─────────────────────────┘

Model 3: Separate Everything
┌─────────────────────────┐
│      Application        │
│  ┌───────┐ ┌───────┐   │
│  │Tenant │ │Tenant │   │
│  │  A    │ │  B    │   │
│  └───┬───┘ └───┬───┘   │
│  ┌───▼───┐ ┌───▼───┐   │
│  │ App-A │ │ App-B │   │
│  └───┬───┘ └───┬───┘   │
│  ┌───▼───┐ ┌───▼───┐   │
│  │ DB-A  │ │ DB-B  │   │
│  └───────┘ └───────┘   │
└─────────────────────────┘

Pattern 1: Tenant Context Propagation

Pass tenant context through all service layers.

// Middleware to extract and propagate tenant context
import { Request, Response, NextFunction } from 'express';
import { AsyncLocalStorage } from 'async_hooks';

interface TenantContext {
  tenantId: string;
  plan: 'free' | 'starter' | 'professional' | 'enterprise';
  region: string;
  features: string[];
}

const tenantStorage = new AsyncLocalStorage<TenantContext>();

export function tenantMiddleware(req: Request, res: Response, next: NextFunction) {
  const tenantId = req.headers['x-tenant-id'] as string;
  
  if (!tenantId) {
    return res.status(400).json({ error: 'Missing tenant ID' });
  }
  
  // Load tenant configuration
  const tenantConfig = loadTenantConfig(tenantId);
  
  // Store in async context (accessible throughout request)
  tenantStorage.run(tenantConfig, () => {
    // Add tenant info to request for downstream use
    req.tenant = tenantConfig;
    next();
  });
}

// Get tenant context anywhere in the request
export function getTenantContext(): TenantContext {
  return tenantStorage.getStore()!;
}

// Usage in service layer
export class OrderService {
  async createOrder(input: CreateOrderInput) {
    const tenant = getTenantContext();
    
    // Enforce tenant-specific limits
    if (tenant.plan === 'free') {
      const orderCount = await this.getOrderCount(tenant.tenantId);
      if (orderCount >= 100) {
        throw new Error('Free plan order limit reached');
      }
    }
    
    // Create order with tenant context
    return this.db.orders.create({
      data: {
        ...input,
        tenantId: tenant.tenantId,
      },
    });
  }
}

ℹ️

AsyncLocalStorage provides thread-safe tenant context without passing parameters through every function call.

Pattern 2: Row-Level Security for Data Isolation

Enforce tenant isolation at the database level.

-- PostgreSQL Row-Level Security
CREATE TABLE orders (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    tenant_id UUID NOT NULL,
    customer_id UUID NOT NULL,
    total DECIMAL(10,2),
    created_at TIMESTAMP DEFAULT NOW()
);

-- Enable RLS
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;

-- Create policy for tenant isolation
CREATE POLICY tenant_isolation ON orders
    USING (tenant_id = current_setting('app.current_tenant')::uuid);

-- Create application role
CREATE ROLE app_user;
GRANT SELECT, INSERT, UPDATE, DELETE ON orders TO app_user;

-- Application sets tenant context per connection
SET app.current_tenant = 'tenant-uuid-here';

-- Now queries are automatically filtered
SELECT * FROM orders;  -- Only returns current tenant's orders

# Application code that sets tenant context
class TenantAwareDatabase:
    def __init__(self, connection_pool):
        self.pool = connection_pool
    
    async def get_connection(self, tenant_id: str):
        conn = await self.pool.acquire()
        # Set tenant context for this connection
        await conn.execute(
            f"SET app.current_tenant = '{tenant_id}'"
        )
        return conn
    
    async def query(self, tenant_id: str, sql: str, params=None):
        conn = await self.get_connection(tenant_id)
        try:
            return await conn.fetch(sql, *params) if params else await conn.fetch(sql)
        finally:
            await self.pool.release(conn)

Pattern 3: Tenant-Aware Caching

Isolate cache namespaces per tenant.

# Redis-based tenant-aware cache
import redis
import json
from typing import Any, Optional

class TenantCache:
    def __init__(self, redis_client: redis.Redis):
        self.redis = redis_client
    
    def _get_key(self, tenant_id: str, key: str) -> str:
        return f"tenant:{tenant_id}:{key}"
    
    async def get(self, tenant_id: str, key: str) -> Optional[Any]:
        cache_key = self._get_key(tenant_id, key)
        value = await self.redis.get(cache_key)
        return json.loads(value) if value else None
    
    async def set(
        self,
        tenant_id: str,
        key: str,
        value: Any,
        ttl: int = 300,
    ):
        cache_key = self._get_key(tenant_id, key)
        await self.redis.setex(
            cache_key,
            ttl,
            json.dumps(value, default=str),
        )
    
    async def invalidate_pattern(self, tenant_id: str, pattern: str):
        """Invalidate all keys matching pattern for a tenant."""
        full_pattern = self._get_key(tenant_id, pattern)
        keys = await self.redis.keys(full_pattern)
        if keys:
            await self.redis.delete(*keys)
    
    async def get_tenant_stats(self, tenant_id: str) -> dict:
        """Get cache statistics for a tenant."""
        pattern = f"tenant:{tenant_id}:*"
        keys = await self.redis.keys(pattern)
        
        total_size = 0
        for key in keys:
            total_size += await self.redis.memory_usage(key) or 0
        
        return {
            'key_count': len(keys),
            'total_size_bytes': total_size,
            'total_size_mb': round(total_size / 1024 / 1024, 2),
        }

Pattern 4: Resource Quotas per Tenant

Enforce limits based on tenant plan.

// Tenant resource quota management
interface TenantQuotas {
  apiRequestsPerMonth: number;
  storageGB: number;
  computeHours: number;
  maxUsers: number;
  maxProjects: number;
}

const PLAN_QUOTAS: Record<string, TenantQuotas> = {
  free: {
    apiRequestsPerMonth: 10000,
    storageGB: 1,
    computeHours: 10,
    maxUsers: 3,
    maxProjects: 1,
  },
  starter: {
    apiRequestsPerMonth: 100000,
    storageGB: 10,
    computeHours: 100,
    maxUsers: 10,
    maxProjects: 5,
  },
  professional: {
    apiRequestsPerMonth: 1000000,
    storageGB: 100,
    computeHours: 1000,
    maxUsers: 50,
    maxProjects: 25,
  },
  enterprise: {
    apiRequestsPerMonth: Infinity,
    storageGB: 1000,
    computeHours: Infinity,
    maxUsers: Infinity,
    maxProjects: Infinity,
  },
};

export class QuotaEnforcer {
  constructor(private usageTracker: UsageTracker) {}

  async checkQuota(tenantId: string, resource: keyof TenantQuotas): Promise<boolean> {
    const tenant = await this.getTenant(tenantId);
    const quotas = PLAN_QUOTAS[tenant.plan];
    const usage = await this.usageTracker.getUsage(tenantId, resource);
    
    return usage < quotas[resource];
  }

  async enforceQuota(tenantId: string, resource: keyof TenantQuotas): Promise<void> {
    const allowed = await this.checkQuota(tenantId, resource);
    
    if (!allowed) {
      throw new QuotaExceededError(
        tenantId,
        resource,
        PLAN_QUOTAS[await this.getTenantPlan(tenantId)][resource],
      );
    }
    
    await this.usageTracker.incrementUsage(tenantId, resource);
  }
}

⚠️

Quota enforcement must be atomic. Use database transactions or Redis increment operations to prevent race conditions.

Pattern 5: Tenant-Aware Deployment

Deploy updates selectively based on tenant requirements.

# Kubernetes deployment with tenant awareness
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: api-service
  template:
    metadata:
      labels:
        app: api-service
    spec:
      containers:
        - name: api
          image: myregistry/api:v2.0.0
          env:
            - name: TENANT_ROUTING
              value: "enabled"
            - name: FEATURE_FLAGS
              valueFrom:
                configMapKeyRef:
                  name: tenant-features
                  key: features.json
---
# Separate deployment for enterprise tenants
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api-service-enterprise
spec:
  replicas: 5  # More replicas for enterprise
  selector:
    matchLabels:
      app: api-service
      tier: enterprise
  template:
    metadata:
      labels:
        app: api-service
        tier: enterprise
    spec:
      containers:
        - name: api
          image: myregistry/api:v2.0.0-enterprise
          resources:
            requests:
              cpu: "500m"
              memory: "512Mi"
            limits:
              cpu: "1000m"
              memory: "1Gi"

Multi-Tenant Decision Matrix

Aspect	Shared	Pool	Isolate
Cost	Lowest	Medium	Highest
Isolation	Weakest	Medium	Strongest
Compliance	Limited	Moderate	Full
Customization	Limited	Moderate	Full
Operational Effort	Low	Medium	High

Follow-Up Questions

How do you handle tenant-specific customizations without forking the codebase?
What strategies would you use to migrate a tenant from shared to isolated infrastructure?
How do you implement tenant-aware logging and monitoring for debugging?