πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Advanced RAG Techniques

🟒 Free Lesson

Advertisement

Advanced RAG Techniques

User QueryHyDEHypothetical DocSelf-RAGReflect + CritiqueCRAGCorrective RAGVector StoreEmbeddings + IndexSemantic SearchRetrieverHybrid SearchRerankingLLM GeneratorAnswer SynthesisCitation TrackingAnswerSelf-Reflection Loop

Advanced RAG techniques go beyond basic retrieval-augmented generation by incorporating self-reflection, corrective mechanisms, and adaptive strategies to significantly improve answer quality.

HyDE (Hypothetical Document Embeddings)

HyDE generates a hypothetical answer first, then uses its embedding to find real documents:

from langchain_openai import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate

class HyDERetriever:
    def __init__(self, vector_store, llm, embeddings):
        self.vector_store = vector_store
        self.llm = llm
        self.embeddings = embeddings

    def generate_hypothetical_doc(self, query: str) -> str:
        prompt = ChatPromptTemplate.from_template(
            """Please write a passage that answers this question:
            Question: {query}
            Passage:"""
        )
        chain = prompt | self.llm
        result = chain.invoke({"query": query})
        return result.content

    def retrieve(self, query: str, k: int = 5):
        hypothetical = self.generate_hypothetical_doc(query)
        hyde_embedding = self.embeddings.embed_query(hypothetical)
        results = self.vector_store.similarity_search_by_vector(
            hyde_embedding, k=k
        )
        return results

# Usage
retriever = HyDERetriever(vector_store, llm, embeddings)
docs = retriever.retrieve("How does quantum computing work?")

Self-RAG

Self-RAG uses reflection tokens to decide when to retrieve and evaluate relevance:

from enum import Enum
from dataclasses import dataclass

class ReflectionToken(Enum):
    RETRIEVE = "[Retrieval]"
    REL = "[Relevance]"
    SUP = "[Support]"
    USE = "[Utility]"

@dataclass
class SelfRAGOutput:
    answer: str
    retrieved_docs: list
    reflection_tokens: list
    is_useful: bool

class SelfRAG:
    def __init__(self, llm, retriever, threshold_rel=0.5):
        self.llm = llm
        self.retriever = retriever
        self.threshold_rel = threshold_rel

    def should_retrieve(self, query: str) -> bool:
        prompt = f"""Given the query, determine if retrieval is needed.
        Query: {query}
        Answer with [Retrieval] if needed, [No Retrieval] if not."""
        response = self.llm.invoke(prompt)
        return "[Retrieval]" in response.content

    def assess_relevance(self, query: str, doc: str) -> float:
        prompt = f"""Rate relevance of document to query (0-1).
        Query: {query}
        Document: {doc}
        Score:"""
        score = float(self.llm.invoke(prompt).content)
        return score

    def generate(self, query: str) -> SelfRAGOutput:
        if not self.should_retrieve(query):
            answer = self.llm.invoke(query).content
            return SelfRAGOutput(answer, [], [], True)

        docs = self.retriever.get_relevant_documents(query)
        filtered = [
            d for d in docs
            if self.assess_relevance(query, d.page_content) >= self.threshold_rel
        ]
        context = "\n".join([d.page_content for d in filtered[:3]])
        answer = self.llm.invoke(
            f"Context: {context}\nQuestion: {query}"
        ).content
        return SelfRAGOutput(answer, filtered, [], True)

CRAG (Corrective RAG)

CRAG evaluates retrieval quality and takes corrective actions:

class CRAG:
    def __init__(self, llm, retriever, knowledge_store):
        self.llm = llm
        self.retriever = retriever
        self.knowledge_store = knowledge_store

    def evaluate_retrieval(self, query: str, docs: list) -> str:
        prompt = f"""Evaluate if documents are relevant (Correct/Ambiguous/Incorrect).
        Query: {query}
        Documents: {[d.page_content[:200] for d in docs]}
        Assessment:"""
        return self.llm.invoke(prompt).content

    def decompose_and_search(self, query: str) -> list:
        prompt = f"Decompose into sub-queries: {query}"
        sub_queries = self.llm.invoke(prompt).content.split("\n")
        all_docs = []
        for sq in sub_queries[:3]:
            all_docs.extend(self.retriever.get_relevant_documents(sq))
        return all_docs

    def retrieve_and_correct(self, query: str) -> str:
        docs = self.retriever.get_relevant_documents(query)
        assessment = self.evaluate_retrieval(query, docs)

        if "Correct" in assessment:
            filtered = [d for d in docs if self._is_relevant(d)]
        elif "Ambiguous" in assessment:
            filtered = self.decompose_and_search(query)
        else:
            filtered = self.knowledge_store.search_web(query)

        context = "\n".join([d.page_content for d in filtered[:5]])
        prompt = f"""Answer using only this context:
        {context}
        Question: {query}"""
        return self.llm.invoke(prompt).content

Adaptive Query Expansion

class AdaptiveQueryExpansion:
    def __init__(self, llm):
        self.llm = llm

    def expand_query(self, query: str, strategy: str = "hyde") -> list:
        if strategy == "multi_query":
            prompt = f"""Generate 3 different versions of this query:
            {query}
            Return each on a new line."""
            expansions = self.llm.invoke(prompt).content.split("\n")
            return [query] + expansions[:3]
        elif strategy == "step_back":
            prompt = f"""Generate a more general, broader question:
            {query}"""
            return [query, self.llm.invoke(prompt).content]
        elif strategy == "sub_query":
            prompt = f"""Break into 2-3 sub-questions:
            {query}"""
            return self.llm.invoke(prompt).content.split("\n")[:4]
        return [query]

Reranking with Cross-Encoders

from sentence_transformers import CrossEncoder

class Reranker:
    def __init__(self, model_name="cross-encoder/ms-marco-MiniLM-L-6-v2"):
        self.model = CrossEncoder(model_name)

    def rerank(self, query: str, documents: list, top_k: int = 3) -> list:
        pairs = [(query, doc.page_content) for doc in documents]
        scores = self.model.predict(pairs)
        scored_docs = list(zip(documents, scores))
        scored_docs.sort(key=lambda x: x[1], reverse=True)
        return [doc for doc, score in scored_docs[:top_k]]

Key Takeaways

  • HyDE improves retrieval by matching on hypothetical document embeddings
  • Self-RAG adds reflection tokens for adaptive retrieval decisions
  • CRAG handles poor retrievals with corrective web search fallback
  • Reranking significantly improves precision of retrieved results
  • Combine techniques for maximum effectiveness in production systems
⭐

Premium Content

Advanced RAG Techniques

Unlock this lesson and 900+ advanced tutorials with a Premium plan.

🎯End-to-end Projects
πŸ’ΌInterview Prep
πŸ“œCertificates
🀝Community Access

Already a member? Log in

Need Expert Generative AI Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement