Prompt Management
Prompt management systems provide version control, testing, and deployment pipelines for LLM prompts, treating them as first-class citizens in your development workflow.
Prompt Registry
import json
import hashlib
from datetime import datetime
from dataclasses import dataclass, asdict
from typing import Dict, List, Optional
@dataclass
class PromptVersion:
name: str
version: str
template: str
variables: List[str]
model: str
created_at: str
metrics: Dict = None
metadata: Dict = None
class PromptRegistry:
def __init__(self, storage_path: str = "prompts.json"):
self.storage_path = storage_path
self.prompts = self._load()
def _load(self) -> dict:
try:
with open(self.storage_path, "r") as f:
return json.load(f)
except FileNotFoundError:
return {}
def _save(self):
with open(self.storage_path, "w") as f:
json.dump(self.prompts, f, indent=2)
def register(self, name: str, template: str, variables: list,
model: str, metadata: dict = None) -> PromptVersion:
if name not in self.prompts:
self.prompts[name] = {"versions": []}
version_num = len(self.prompts[name]["versions"]) + 1
version = PromptVersion(
name=name,
version=f"v{version_num}",
template=template,
variables=variables,
model=model,
created_at=datetime.now().isoformat(),
metadata=metadata or {}
)
self.prompts[name]["versions"].append(asdict(version))
self._save()
return version
def get(self, name: str, version: str = "latest") -> PromptVersion:
versions = self.prompts.get(name, {}).get("versions", [])
if not versions:
raise ValueError(f"Prompt '{name}' not found")
if version == "latest":
return PromptVersion(**versions[-1])
for v in versions:
if v["version"] == version:
return PromptVersion(**v)
raise ValueError(f"Version '{version}' not found")
def list_versions(self, name: str) -> List[str]:
return [v["version"] for v in self.prompts.get(name, {}).get("versions", [])]
# Usage
registry = PromptRegistry()
registry.register(
name="summarizer",
template="Summarize this text in {style} style:\n{text}",
variables=["text", "style"],
model="gpt-4"
)
Prompt Versioning with Git
import hashlib
from datetime import datetime
class GitPromptManager:
def __init__(self):
self.prompts = {}
self.history = []
def commit(self, name: str, template: str, message: str, author: str = "system"):
content_hash = hashlib.sha256(template.encode()).hexdigest()[:8]
version = {
"name": name,
"template": template,
"hash": content_hash,
"message": message,
"author": author,
"timestamp": datetime.now().isoformat(),
"parent": self.prompts.get(name, {}).get("current_hash")
}
self.prompts[name] = {
"current_hash": content_hash,
"versions": self.prompts.get(name, {}).get("versions", []) + [version]
}
self.history.append(version)
return content_hash
def diff(self, name: str, hash1: str, hash2: str) -> dict:
v1 = self._find_version(name, hash1)
v2 = self._find_version(name, hash2)
return {"from": v1["template"], "to": v2["template"]}
def _find_version(self, name: str, hash: str) -> dict:
for v in self.prompts[name]["versions"]:
if v["hash"] == hash:
return v
raise ValueError(f"Version {hash} not found")
# Usage
git_prompts = GitPromptManager()
git_prompts.commit("summarizer", "Summarize: {text}", "Initial prompt")
git_prompts.commit("summarizer", "Provide a concise summary of: {text}", "Improved clarity")
Prompt Testing Framework
from dataclasses import dataclass
from typing import Callable
@dataclass
class TestCase:
input_vars: dict
expected_output: str = None
should_contain: list = None
should_not_contain: list = None
max_length: int = None
custom_check: Callable = None
class PromptTester:
def __init__(self, llm):
self.llm = llm
self.results = []
def test_prompt(self, template: str, test_cases: list, prompt_name: str = "unnamed"):
passed = 0
failed = 0
for i, test in enumerate(test_cases):
try:
formatted = template.format(**test.input_vars)
output = self.llm.invoke(formatted).content
checks = []
if test.expected_output:
checks.append(("exact_match", test.expected_output == output))
if test.should_contain:
checks.append(("contains", all(w in output for w in test.should_contain)))
if test.should_not_contain:
checks.append(("not_contains", all(w not in output for w in test.should_not_contain)))
if test.max_length:
checks.append(("length", len(output) <= test.max_length))
if test.custom_check:
checks.append(("custom", test.custom_check(output)))
test_passed = all(c[1] for c in checks)
if test_passed:
passed += 1
else:
failed += 1
self.results.append({
"prompt": prompt_name,
"test_id": i,
"passed": test_passed,
"checks": checks,
"input": test.input_vars,
"output": output[:200]
})
except Exception as e:
failed += 1
self.results.append({
"prompt": prompt_name,
"test_id": i,
"passed": False,
"error": str(e)
})
return {"passed": passed, "failed": failed, "total": len(test_cases)}
def compare_prompts(self, templates: dict, test_cases: list) -> dict:
comparisons = {}
for name, template in templates.items():
comparisons[name] = self.test_prompt(template, test_cases, name)
return comparisons
# Usage
tester = PromptTester(llm)
test_cases = [
TestCase(
input_vars={"text": "AI is transforming industries..."},
should_contain=["AI", "transforming"],
max_length=200
)
]
results = tester.test_prompt("Summarize: {text}", test_cases)
Dynamic Prompt Templates
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
class DynamicPromptManager:
def __init__(self):
self.templates = {}
self.examples = {}
def create_few_shot_prompt(self, name: str, system_msg: str,
example_selector: Callable = None):
example_prompt = ChatPromptTemplate.from_messages([
("human", "{input}"),
("ai", "{output}")
])
few_shot_prompt = FewShotChatMessagePromptTemplate(
example_prompt=example_prompt,
examples=self.examples.get(name, []),
input_variables=["input"]
)
return ChatPromptTemplate.from_messages([
("system", system_msg),
few_shot_prompt,
("human", "{input}")
])
def add_examples(self, prompt_name: str, examples: list):
self.examples[prompt_name] = self.examples.get(prompt_name, []) + examples
def auto_optimize(self, name: str, inputs: list, outputs: list):
examples = [{"input": i, "output": o} for i, o in zip(inputs, outputs)]
self.add_examples(name, examples[:5])
Key Takeaways
- Prompt registries centralize prompt storage and access
- Version control enables rollback and comparison of prompts
- Testing frameworks ensure prompt quality before deployment
- Few-shot examples can be dynamically managed and optimized
- Monitoring tracks prompt performance in production