Multi-Tenancy AI

Multi-tenant AI architectures enable serving multiple customers from a shared infrastructure while maintaining data isolation, custom configurations, and per-tenant billing.

Tenant Configuration Manager

from dataclasses import dataclass, field
from typing import Dict, Optional
import json
from pathlib import Path

@dataclass
class TenantConfig:
    tenant_id: str
    name: str
    model_config: Dict = field(default_factory=dict)
    rag_config: Dict = field(default_factory=dict)
    limits: Dict = field(default_factory=dict)
    billing_plan: str = "standard"
    custom_settings: Dict = field(default_factory=dict)

class TenantManager:
    def __init__(self, config_path: str = "tenants"):
        self.config_path = Path(config_path)
        self.config_path.mkdir(exist_ok=True)
        self.cache = {}

    def get_config(self, tenant_id: str) -> TenantConfig:
        if tenant_id in self.cache:
            return self.cache[tenant_id]

        config_file = self.config_path / f"{tenant_id}.json"
        if config_file.exists():
            with open(config_file) as f:
                data = json.load(f)
                config = TenantConfig(**data)
        else:
            config = self._default_config(tenant_id)
            self.save_config(config)

        self.cache[tenant_id] = config
        return config

    def _default_config(self, tenant_id: str) -> TenantConfig:
        return TenantConfig(
            tenant_id=tenant_id,
            name=f"Tenant {tenant_id}",
            model_config={
                "provider": "openai",
                "model": "gpt-4",
                "temperature": 0.7,
                "max_tokens": 2000
            },
            rag_config={
                "top_k": 5,
                "similarity_threshold": 0.7,
                "chunk_size": 512
            },
            limits={
                "requests_per_minute": 60,
                "tokens_per_day": 100000,
                "max_documents": 1000
            },
            billing_plan="standard"
        )

    def save_config(self, config: TenantConfig):
        config_file = self.config_path / f"{config.tenant_id}.json"
        with open(config_file, "w") as f:
            json.dump(config.__dict__, f, indent=2)

    def update_config(self, tenant_id: str, updates: Dict):
        config = self.get_config(tenant_id)
        for key, value in updates.items():
            if hasattr(config, key):
                setattr(config, key, value)
        self.save_config(config)
        self.cache[tenant_id] = config

Data Isolation Layer

from typing import Any
import hashlib

class TenantDataIsolator:
    def __init__(self, base_db_url: str):
        self.base_db_url = base_db_url
        self.connections = {}

    def _get_tenant_db(self, tenant_id: str):
        if tenant_id not in self.connections:
            db_name = f"tenant_{hashlib.md5(tenant_id.encode()).hexdigest()[:8]}"
            self.connections[tenant_id] = self._create_connection(db_name)
        return self.connections[tenant_id]

    def _create_connection(self, db_name: str):
        import sqlite3
        return sqlite3.connect(f"{db_name}.db")

    def query(self, tenant_id: str, sql: str, params: tuple = ()) -> list:
        conn = self._get_tenant_db(tenant_id)
        cursor = conn.execute(sql, params)
        return cursor.fetchall()

    def insert(self, tenant_id: str, table: str, data: dict):
        columns = ", ".join(data.keys())
        placeholders = ", ".join(["?" for _ in data])
        sql = f"INSERT INTO {table} ({columns}) VALUES ({placeholders})"
        self.query(tenant_id, sql, tuple(data.values()))
        self._get_tenant_db(tenant_id).commit()

    def get_vector_store(self, tenant_id: str):
        from langchain_community.vectorstores import FAISS
        from langchain_openai import OpenAIEmbeddings

        embeddings = OpenAIEmbeddings()
        store_path = f"vector_stores/{tenant_id}"

        try:
            return FAISS.load_local(store_path, embeddings)
        except:
            return FAISS.from_documents([], embeddings)

    def add_documents(self, tenant_id: str, documents: list):
        from langchain_community.vectorstores import FAISS
        from langchain_openai import OpenAIEmbeddings

        embeddings = OpenAIEmbeddings()
        store_path = f"vector_stores/{tenant_id}"

        try:
            vector_store = FAISS.load_local(store_path, embeddings)
            vector_store.add_documents(documents)
        except:
            vector_store = FAISS.from_documents(documents, embeddings)

        vector_store.save_local(store_path)

Per-Tenant Rate Limiter

import time
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict

@dataclass
class RateLimit:
    requests_per_minute: int
    tokens_per_day: int
    burst_limit: int = 10

class TenantRateLimiter:
    def __init__(self):
        self.request_counts: Dict[str, list] = defaultdict(list)
        self.token_counts: Dict[str, int] = defaultdict(int)
        self.limits: Dict[str, RateLimit] = {}

    def set_limits(self, tenant_id: str, limits: RateLimit):
        self.limits[tenant_id] = limits

    def check_rate_limit(self, tenant_id: str, tokens: int = 0) -> dict:
        limits = self.limits.get(tenant_id, RateLimit(60, 100000))
        now = time.time()

        self.request_counts[tenant_id] = [
            t for t in self.request_counts[tenant_id]
            if now - t < 60
        ]

        requests_ok = len(self.request_counts[tenant_id]) < limits.requests_per_minute
        tokens_ok = self.token_counts[tenant_id] + tokens <= limits.tokens_per_day

        return {
            "allowed": requests_ok and tokens_ok,
            "requests_remaining": limits.requests_per_minute - len(self.request_counts[tenant_id]),
            "tokens_remaining": limits.tokens_per_day - self.token_counts[tenant_id],
            "retry_after": 60 - (now - self.request_counts[tenant_id][0]) if self.request_counts[tenant_id] else 0
        }

    def record_request(self, tenant_id: str, tokens_used: int):
        self.request_counts[tenant_id].append(time.time())
        self.token_counts[tenant_id] += tokens_used

    def reset_daily(self):
        self.token_counts.clear()

Tenant-Aware LLM Router

class TenantAwareLLMRouter:
    def __init__(self, tenant_manager, rate_limiter, data_isolator):
        self.tenant_manager = tenant_manager
        self.rate_limiter = rate_limiter
        self.data_isolator = data_isolator
        self.model_clients = {}

    def _get_client(self, tenant_id: str):
        config = self.tenant_manager.get_config(tenant_id)
        provider = config.model_config.get("provider", "openai")
        model = config.model_config.get("model", "gpt-4")

        key = f"{provider}:{model}"
        if key not in self.model_clients:
            if provider == "openai":
                from langchain_openai import ChatOpenAI
                self.model_clients[key] = ChatOpenAI(model=model)
            elif provider == "anthropic":
                from langchain_anthropic import ChatAnthropic
                self.model_clients[key] = ChatAnthropic(model=model)

        return self.model_clients[key]

    def route_request(self, tenant_id: str, messages: list, **kwargs) -> dict:
        config = self.tenant_manager.get_config(tenant_id)

        rate_check = self.rate_limiter.check_rate_limit(tenant_id)
        if not rate_check["allowed"]:
            return {"error": "Rate limit exceeded", "retry_after": rate_check["retry_after"]}

        client = self._get_client(tenant_id)
        model_config = config.model_config.copy()
        model_config.update(kwargs)

        response = client.invoke(messages, **model_config)
        tokens_used = len(response.content.split()) * 1.3
        self.rate_limiter.record_request(tenant_id, int(tokens_used))

        return {
            "response": response.content,
            "model": config.model_config["model"],
            "tokens_used": int(tokens_used)
        }

Key Takeaways

Tenant isolation ensures data privacy and security
Per-tenant configs enable customization without code changes
Rate limiting prevents abuse and ensures fair usage
Model routing optimizes cost per tenant based on their plan
Centralized monitoring provides visibility across all tenants

Multi-Tenancy AI

Multi-Tenancy AI

Tenant Configuration Manager

Data Isolation Layer

Per-Tenant Rate Limiter

Tenant-Aware LLM Router

Key Takeaways

Premium Content

Need Expert Generative AI Help?