Multi-Tenancy AI
Multi-tenant AI architectures enable serving multiple customers from a shared infrastructure while maintaining data isolation, custom configurations, and per-tenant billing.
Tenant Configuration Manager
from dataclasses import dataclass, field
from typing import Dict, Optional
import json
from pathlib import Path
@dataclass
class TenantConfig:
tenant_id: str
name: str
model_config: Dict = field(default_factory=dict)
rag_config: Dict = field(default_factory=dict)
limits: Dict = field(default_factory=dict)
billing_plan: str = "standard"
custom_settings: Dict = field(default_factory=dict)
class TenantManager:
def __init__(self, config_path: str = "tenants"):
self.config_path = Path(config_path)
self.config_path.mkdir(exist_ok=True)
self.cache = {}
def get_config(self, tenant_id: str) -> TenantConfig:
if tenant_id in self.cache:
return self.cache[tenant_id]
config_file = self.config_path / f"{tenant_id}.json"
if config_file.exists():
with open(config_file) as f:
data = json.load(f)
config = TenantConfig(**data)
else:
config = self._default_config(tenant_id)
self.save_config(config)
self.cache[tenant_id] = config
return config
def _default_config(self, tenant_id: str) -> TenantConfig:
return TenantConfig(
tenant_id=tenant_id,
name=f"Tenant {tenant_id}",
model_config={
"provider": "openai",
"model": "gpt-4",
"temperature": 0.7,
"max_tokens": 2000
},
rag_config={
"top_k": 5,
"similarity_threshold": 0.7,
"chunk_size": 512
},
limits={
"requests_per_minute": 60,
"tokens_per_day": 100000,
"max_documents": 1000
},
billing_plan="standard"
)
def save_config(self, config: TenantConfig):
config_file = self.config_path / f"{config.tenant_id}.json"
with open(config_file, "w") as f:
json.dump(config.__dict__, f, indent=2)
def update_config(self, tenant_id: str, updates: Dict):
config = self.get_config(tenant_id)
for key, value in updates.items():
if hasattr(config, key):
setattr(config, key, value)
self.save_config(config)
self.cache[tenant_id] = config
Data Isolation Layer
from typing import Any
import hashlib
class TenantDataIsolator:
def __init__(self, base_db_url: str):
self.base_db_url = base_db_url
self.connections = {}
def _get_tenant_db(self, tenant_id: str):
if tenant_id not in self.connections:
db_name = f"tenant_{hashlib.md5(tenant_id.encode()).hexdigest()[:8]}"
self.connections[tenant_id] = self._create_connection(db_name)
return self.connections[tenant_id]
def _create_connection(self, db_name: str):
import sqlite3
return sqlite3.connect(f"{db_name}.db")
def query(self, tenant_id: str, sql: str, params: tuple = ()) -> list:
conn = self._get_tenant_db(tenant_id)
cursor = conn.execute(sql, params)
return cursor.fetchall()
def insert(self, tenant_id: str, table: str, data: dict):
columns = ", ".join(data.keys())
placeholders = ", ".join(["?" for _ in data])
sql = f"INSERT INTO {table} ({columns}) VALUES ({placeholders})"
self.query(tenant_id, sql, tuple(data.values()))
self._get_tenant_db(tenant_id).commit()
def get_vector_store(self, tenant_id: str):
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
store_path = f"vector_stores/{tenant_id}"
try:
return FAISS.load_local(store_path, embeddings)
except:
return FAISS.from_documents([], embeddings)
def add_documents(self, tenant_id: str, documents: list):
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
store_path = f"vector_stores/{tenant_id}"
try:
vector_store = FAISS.load_local(store_path, embeddings)
vector_store.add_documents(documents)
except:
vector_store = FAISS.from_documents(documents, embeddings)
vector_store.save_local(store_path)
Per-Tenant Rate Limiter
import time
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict
@dataclass
class RateLimit:
requests_per_minute: int
tokens_per_day: int
burst_limit: int = 10
class TenantRateLimiter:
def __init__(self):
self.request_counts: Dict[str, list] = defaultdict(list)
self.token_counts: Dict[str, int] = defaultdict(int)
self.limits: Dict[str, RateLimit] = {}
def set_limits(self, tenant_id: str, limits: RateLimit):
self.limits[tenant_id] = limits
def check_rate_limit(self, tenant_id: str, tokens: int = 0) -> dict:
limits = self.limits.get(tenant_id, RateLimit(60, 100000))
now = time.time()
self.request_counts[tenant_id] = [
t for t in self.request_counts[tenant_id]
if now - t < 60
]
requests_ok = len(self.request_counts[tenant_id]) < limits.requests_per_minute
tokens_ok = self.token_counts[tenant_id] + tokens <= limits.tokens_per_day
return {
"allowed": requests_ok and tokens_ok,
"requests_remaining": limits.requests_per_minute - len(self.request_counts[tenant_id]),
"tokens_remaining": limits.tokens_per_day - self.token_counts[tenant_id],
"retry_after": 60 - (now - self.request_counts[tenant_id][0]) if self.request_counts[tenant_id] else 0
}
def record_request(self, tenant_id: str, tokens_used: int):
self.request_counts[tenant_id].append(time.time())
self.token_counts[tenant_id] += tokens_used
def reset_daily(self):
self.token_counts.clear()
Tenant-Aware LLM Router
class TenantAwareLLMRouter:
def __init__(self, tenant_manager, rate_limiter, data_isolator):
self.tenant_manager = tenant_manager
self.rate_limiter = rate_limiter
self.data_isolator = data_isolator
self.model_clients = {}
def _get_client(self, tenant_id: str):
config = self.tenant_manager.get_config(tenant_id)
provider = config.model_config.get("provider", "openai")
model = config.model_config.get("model", "gpt-4")
key = f"{provider}:{model}"
if key not in self.model_clients:
if provider == "openai":
from langchain_openai import ChatOpenAI
self.model_clients[key] = ChatOpenAI(model=model)
elif provider == "anthropic":
from langchain_anthropic import ChatAnthropic
self.model_clients[key] = ChatAnthropic(model=model)
return self.model_clients[key]
def route_request(self, tenant_id: str, messages: list, **kwargs) -> dict:
config = self.tenant_manager.get_config(tenant_id)
rate_check = self.rate_limiter.check_rate_limit(tenant_id)
if not rate_check["allowed"]:
return {"error": "Rate limit exceeded", "retry_after": rate_check["retry_after"]}
client = self._get_client(tenant_id)
model_config = config.model_config.copy()
model_config.update(kwargs)
response = client.invoke(messages, **model_config)
tokens_used = len(response.content.split()) * 1.3
self.rate_limiter.record_request(tenant_id, int(tokens_used))
return {
"response": response.content,
"model": config.model_config["model"],
"tokens_used": int(tokens_used)
}
Key Takeaways
- Tenant isolation ensures data privacy and security
- Per-tenant configs enable customization without code changes
- Rate limiting prevents abuse and ensures fair usage
- Model routing optimizes cost per tenant based on their plan
- Centralized monitoring provides visibility across all tenants