πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Multi-Tenancy AI

🟒 Free Lesson

Advertisement

Multi-Tenancy AI

API GatewayAuth + RoutingTenant RouterIdentify TenantLoad ConfigApply LimitsTenant AGPT-4 + Custom RAGHealthcare DomainTenant BClaude + Vector DBLegal DomainTenant CLlama + Fine-tunedFinance DomainShared ServicesVector StoreCache LayerRate LimitingData IsolationPer-Tenant DBsEncrypted StorageMonitoringPer-Tenant Metrics

Multi-tenant AI architectures enable serving multiple customers from a shared infrastructure while maintaining data isolation, custom configurations, and per-tenant billing.

Tenant Configuration Manager

from dataclasses import dataclass, field
from typing import Dict, Optional
import json
from pathlib import Path

@dataclass
class TenantConfig:
    tenant_id: str
    name: str
    model_config: Dict = field(default_factory=dict)
    rag_config: Dict = field(default_factory=dict)
    limits: Dict = field(default_factory=dict)
    billing_plan: str = "standard"
    custom_settings: Dict = field(default_factory=dict)

class TenantManager:
    def __init__(self, config_path: str = "tenants"):
        self.config_path = Path(config_path)
        self.config_path.mkdir(exist_ok=True)
        self.cache = {}

    def get_config(self, tenant_id: str) -> TenantConfig:
        if tenant_id in self.cache:
            return self.cache[tenant_id]

        config_file = self.config_path / f"{tenant_id}.json"
        if config_file.exists():
            with open(config_file) as f:
                data = json.load(f)
                config = TenantConfig(**data)
        else:
            config = self._default_config(tenant_id)
            self.save_config(config)

        self.cache[tenant_id] = config
        return config

    def _default_config(self, tenant_id: str) -> TenantConfig:
        return TenantConfig(
            tenant_id=tenant_id,
            name=f"Tenant {tenant_id}",
            model_config={
                "provider": "openai",
                "model": "gpt-4",
                "temperature": 0.7,
                "max_tokens": 2000
            },
            rag_config={
                "top_k": 5,
                "similarity_threshold": 0.7,
                "chunk_size": 512
            },
            limits={
                "requests_per_minute": 60,
                "tokens_per_day": 100000,
                "max_documents": 1000
            },
            billing_plan="standard"
        )

    def save_config(self, config: TenantConfig):
        config_file = self.config_path / f"{config.tenant_id}.json"
        with open(config_file, "w") as f:
            json.dump(config.__dict__, f, indent=2)

    def update_config(self, tenant_id: str, updates: Dict):
        config = self.get_config(tenant_id)
        for key, value in updates.items():
            if hasattr(config, key):
                setattr(config, key, value)
        self.save_config(config)
        self.cache[tenant_id] = config

Data Isolation Layer

from typing import Any
import hashlib

class TenantDataIsolator:
    def __init__(self, base_db_url: str):
        self.base_db_url = base_db_url
        self.connections = {}

    def _get_tenant_db(self, tenant_id: str):
        if tenant_id not in self.connections:
            db_name = f"tenant_{hashlib.md5(tenant_id.encode()).hexdigest()[:8]}"
            self.connections[tenant_id] = self._create_connection(db_name)
        return self.connections[tenant_id]

    def _create_connection(self, db_name: str):
        import sqlite3
        return sqlite3.connect(f"{db_name}.db")

    def query(self, tenant_id: str, sql: str, params: tuple = ()) -> list:
        conn = self._get_tenant_db(tenant_id)
        cursor = conn.execute(sql, params)
        return cursor.fetchall()

    def insert(self, tenant_id: str, table: str, data: dict):
        columns = ", ".join(data.keys())
        placeholders = ", ".join(["?" for _ in data])
        sql = f"INSERT INTO {table} ({columns}) VALUES ({placeholders})"
        self.query(tenant_id, sql, tuple(data.values()))
        self._get_tenant_db(tenant_id).commit()

    def get_vector_store(self, tenant_id: str):
        from langchain_community.vectorstores import FAISS
        from langchain_openai import OpenAIEmbeddings

        embeddings = OpenAIEmbeddings()
        store_path = f"vector_stores/{tenant_id}"

        try:
            return FAISS.load_local(store_path, embeddings)
        except:
            return FAISS.from_documents([], embeddings)

    def add_documents(self, tenant_id: str, documents: list):
        from langchain_community.vectorstores import FAISS
        from langchain_openai import OpenAIEmbeddings

        embeddings = OpenAIEmbeddings()
        store_path = f"vector_stores/{tenant_id}"

        try:
            vector_store = FAISS.load_local(store_path, embeddings)
            vector_store.add_documents(documents)
        except:
            vector_store = FAISS.from_documents(documents, embeddings)

        vector_store.save_local(store_path)

Per-Tenant Rate Limiter

import time
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict

@dataclass
class RateLimit:
    requests_per_minute: int
    tokens_per_day: int
    burst_limit: int = 10

class TenantRateLimiter:
    def __init__(self):
        self.request_counts: Dict[str, list] = defaultdict(list)
        self.token_counts: Dict[str, int] = defaultdict(int)
        self.limits: Dict[str, RateLimit] = {}

    def set_limits(self, tenant_id: str, limits: RateLimit):
        self.limits[tenant_id] = limits

    def check_rate_limit(self, tenant_id: str, tokens: int = 0) -> dict:
        limits = self.limits.get(tenant_id, RateLimit(60, 100000))
        now = time.time()

        self.request_counts[tenant_id] = [
            t for t in self.request_counts[tenant_id]
            if now - t < 60
        ]

        requests_ok = len(self.request_counts[tenant_id]) < limits.requests_per_minute
        tokens_ok = self.token_counts[tenant_id] + tokens <= limits.tokens_per_day

        return {
            "allowed": requests_ok and tokens_ok,
            "requests_remaining": limits.requests_per_minute - len(self.request_counts[tenant_id]),
            "tokens_remaining": limits.tokens_per_day - self.token_counts[tenant_id],
            "retry_after": 60 - (now - self.request_counts[tenant_id][0]) if self.request_counts[tenant_id] else 0
        }

    def record_request(self, tenant_id: str, tokens_used: int):
        self.request_counts[tenant_id].append(time.time())
        self.token_counts[tenant_id] += tokens_used

    def reset_daily(self):
        self.token_counts.clear()

Tenant-Aware LLM Router

class TenantAwareLLMRouter:
    def __init__(self, tenant_manager, rate_limiter, data_isolator):
        self.tenant_manager = tenant_manager
        self.rate_limiter = rate_limiter
        self.data_isolator = data_isolator
        self.model_clients = {}

    def _get_client(self, tenant_id: str):
        config = self.tenant_manager.get_config(tenant_id)
        provider = config.model_config.get("provider", "openai")
        model = config.model_config.get("model", "gpt-4")

        key = f"{provider}:{model}"
        if key not in self.model_clients:
            if provider == "openai":
                from langchain_openai import ChatOpenAI
                self.model_clients[key] = ChatOpenAI(model=model)
            elif provider == "anthropic":
                from langchain_anthropic import ChatAnthropic
                self.model_clients[key] = ChatAnthropic(model=model)

        return self.model_clients[key]

    def route_request(self, tenant_id: str, messages: list, **kwargs) -> dict:
        config = self.tenant_manager.get_config(tenant_id)

        rate_check = self.rate_limiter.check_rate_limit(tenant_id)
        if not rate_check["allowed"]:
            return {"error": "Rate limit exceeded", "retry_after": rate_check["retry_after"]}

        client = self._get_client(tenant_id)
        model_config = config.model_config.copy()
        model_config.update(kwargs)

        response = client.invoke(messages, **model_config)
        tokens_used = len(response.content.split()) * 1.3
        self.rate_limiter.record_request(tenant_id, int(tokens_used))

        return {
            "response": response.content,
            "model": config.model_config["model"],
            "tokens_used": int(tokens_used)
        }

Key Takeaways

  • Tenant isolation ensures data privacy and security
  • Per-tenant configs enable customization without code changes
  • Rate limiting prevents abuse and ensures fair usage
  • Model routing optimizes cost per tenant based on their plan
  • Centralized monitoring provides visibility across all tenants
⭐

Premium Content

Multi-Tenancy AI

Unlock this lesson and 900+ advanced tutorials with a Premium plan.

🎯End-to-end Projects
πŸ’ΌInterview Prep
πŸ“œCertificates
🀝Community Access

Already a member? Log in

Need Expert Generative AI Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement