Agentic RAG

Agentic RAG systems go beyond simple retrieval by using autonomous agents that plan multi-step retrieval strategies, reason over gathered information, and iteratively refine their approach.

ReAct Agent Pattern

from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_react_agent
from langchain.tools import Tool
from langchain.prompts import PromptTemplate

class AgenticRAG:
    def __init__(self, retrievers: dict, llm):
        self.llm = llm
        self.tools = self._create_tools(retrievers)
        self.agent = self._create_agent()

    def _create_tools(self, retrievers: dict) -> list:
        tools = []
        for name, retriever in retrievers.items():
            tool = Tool(
                name=name,
                func=lambda q, r=retriever: r.get_relevant_documents(q)[:3],
                description=f"Search {name} for relevant information"
            )
            tools.append(tool)
        return tools

    def _create_agent(self):
        prompt = PromptTemplate.from_template(
            """You are a helpful assistant that uses multiple sources.

            You have access to these tools:
            {tools}

            Tool names: {tool_names}

            Use this format:
            Question: the input question
            Thought: think about what to do
            Action: the action to take, should be one of [{tool_names}]
            Action Input: the input to the action
            Observation: the result of the action
            ... (this Thought/Action/Action Input/Observation can repeat N times)
            Thought: I now know the final answer
            Final Answer: the final answer

            Begin!

            Question: {input}
            Thought:{agent_scratchpad}"""
        )
        agent = create_react_agent(self.llm, self.tools, prompt)
        return AgentExecutor(agent=agent, tools=self.tools, verbose=True, max_iterations=5)

    def query(self, question: str) -> str:
        return self.agent.invoke({"input": question})

# Usage
retrievers = {
    "knowledge_base": vector_retriever,
    "database": sql_retriever,
    "web": web_search_tool
}
agent = AgenticRAG(retrievers, llm)
answer = agent.query("What are the Q3 revenue projections and how do they compare to market trends?")

Multi-Step Retrieval Pipeline

from dataclasses import dataclass
from typing import List, Optional

@dataclass
class RetrievalStep:
    query: str
    source: str
    results: list
    confidence: float

class MultiStepRetriever:
    def __init__(self, llm, retrievers):
        self.llm = llm
        self.retrievers = retrievers
        self.history = []

    def decompose_query(self, question: str) -> List[str]:
        prompt = f"""Break this question into 2-4 sub-questions for retrieval:
        Question: {question}
        Sub-questions (one per line):"""
        response = self.llm.invoke(prompt).content
        return [q.strip() for q in response.split("\n") if q.strip()]

    def select_source(self, sub_query: str) -> str:
        prompt = f"""Which source is best for: {sub_query}
        Options: {list(self.retrievers.keys())}
        Answer with just the source name:"""
        return self.llm.invoke(prompt).content.strip()

    def retrieve_with_confidence(self, query: str, source: str) -> RetrievalStep:
        docs = self.retrievers[source].get_relevant_documents(query)[:3]
        score_prompt = f"""Rate confidence 0-1 that these docs answer: {query}
        Docs: {[d.page_content[:100] for d in docs]}
        Score:"""
        confidence = float(self.llm.invoke(score_prompt).content)
        return RetrievalStep(query, source, docs, confidence)

    def execute(self, question: str) -> str:
        sub_queries = self.decompose_query(question)
        all_results = []

        for sq in sub_queries:
            source = self.select_source(sq)
            step = self.retrieve_with_confidence(sq, source)
            all_results.append(step)
            self.history.append(step)

        high_conf = [r for r in all_results if r.confidence > 0.7]
        if not high_conf:
            high_conf = all_results

        context = "\n".join([
            f"[{r.source}] {r.query}: {r.results[0].page_content[:500]}"
            for r in high_conf
        ])

        prompt = f"""Answer using this context:
        {context}
        Question: {question}
        Answer:"""
        return self.llm.invoke(prompt).content

Self-Reflecting Agent

class SelfReflectingAgent:
    def __init__(self, llm, retriever):
        self.llm = llm
        self.retriever = retriever
        self.max_reflections = 3

    def evaluate_answer(self, question: str, answer: str, context: str) -> dict:
        prompt = f"""Evaluate this answer:
        Question: {question}
        Answer: {answer}
        Context: {context}

        Return JSON: {{"score": 0-1, "missing": "what's missing", "improvements": ["list"]}}"""
        import json
        return json.loads(self.llm.invoke(prompt).content)

    def improve_query(self, original_query: str, feedback: dict) -> str:
        prompt = f"""Improve this search query based on feedback:
        Original: {original_query}
        Missing: {feedback['missing']}
        Improved query:"""
        return self.llm.invoke(prompt).content

    def query(self, question: str) -> str:
        best_answer = None
        best_score = 0

        current_query = question
        for i in range(self.max_reflections):
            docs = self.retriever.get_relevant_documents(current_query)
            context = "\n".join([d.page_content for d in docs[:3]])

            prompt = f"""Context: {context}\nQuestion: {question}\nAnswer:"""
            answer = self.llm.invoke(prompt).content

            eval_result = self.evaluate_answer(question, answer, context)
            print(f"Reflection {i+1}: Score = {eval_result['score']}")

            if eval_result["score"] > best_score:
                best_score = eval_result["score"]
                best_answer = answer

            if eval_result["score"] > 0.8:
                break

            current_query = self.improve_query(current_query, eval_result)

        return best_answer

Tool-Use Agent with Custom Tools

from langchain_core.tools import tool

@tool
def search_knowledge_base(query: str) -> str:
    """Search internal knowledge base for company information."""
    docs = kb_retriever.get_relevant_documents(query)
    return "\n".join([d.page_content for d in docs[:3]])

@tool
def query_database(sql_query: str) -> str:
    """Execute SQL query on the database."""
    import sqlite3
    conn = sqlite3.connect("data.db")
    cursor = conn.execute(sql_query)
    results = cursor.fetchall()
    return str(results)

@tool
def calculate(expression: str) -> str:
    """Calculate mathematical expressions safely."""
    allowed = set("0123456789+-*/.() ")
    if all(c in allowed for c in expression):
        return str(eval(expression))
    return "Invalid expression"

@tool
def web_search(query: str) -> str:
    """Search the web for current information."""
    from tavily import TavilyClient
    client = TavilyClient(api_key="your-api-key")
    results = client.search(query, max_results=3)
    return "\n".join([r["content"] for r in results["results"]])

# Create agent with tools
tools = [search_knowledge_base, query_database, calculate, web_search]
agent = create_react_agent(llm, tools, prompt)

Key Takeaways

Agentic RAG enables multi-step, adaptive retrieval strategies
ReAct pattern combines reasoning with tool use for transparency
Self-reflection iteratively improves answer quality
Multi-source retrieval leverages diverse data repositories
Plan-then-execute reduces unnecessary retrievals

Agentic RAG

Agentic RAG

ReAct Agent Pattern

Multi-Step Retrieval Pipeline

Self-Reflecting Agent

Tool-Use Agent with Custom Tools

Key Takeaways

Premium Content

Need Expert Generative AI Help?