πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Graph RAG

🟒 Free Lesson

Advertisement

Graph RAG

DocumentsPDF, Web, TextStructured DataEntity ExtractionNER + RE + CorefLLM-based NERKnowledge GraphE1E2E3E4E5CommunitiesLeiden / LouvainHierarchicalSummariesLLM SummarizationMap-ReduceUser QueryNatural LanguageGraph TraversalGraph RetrievalCypher QueriesPath FindingLLM SynthesisContext-Aware Answer

Graph RAG combines knowledge graphs with retrieval-augmented generation to leverage structured relationships between entities for more accurate and comprehensive answers.

Entity Extraction with LLMs

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import json

class EntityExtractor:
    def __init__(self, llm):
        self.llm = llm
        self.prompt = ChatPromptTemplate.from_template(
            """Extract entities and relationships from this text.
            Return JSON with entities (name, type) and relationships (source, target, type).

            Text: {text}
            JSON:"""
        )

    def extract(self, text: str) -> dict:
        chain = self.prompt | self.llm
        result = chain.invoke({"text": text})
        return json.loads(result.content)

    def extract_batch(self, documents: list) -> list:
        all_entities = []
        for doc in documents:
            entities = self.extract(doc.page_content)
            all_entities.append(entities)
        return all_entities

# Usage
extractor = EntityExtractor(llm)
entities = extractor.extract("Apple was founded by Steve Jobs in Cupertino.")
# Returns: {"entities": [{"name": "Apple", "type": "Organization"},
#          {"name": "Steve Jobs", "type": "Person"},
#          {"name": "Cupertino", "type": "Location"}],
#          "relationships": [{"source": "Steve Jobs", "target": "Apple",
#          "type": "founded"}]}

Knowledge Graph Construction with Neo4j

from neo4j import GraphDatabase

class KnowledgeGraphBuilder:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_entity(self, name: str, entity_type: str, properties: dict = None):
        query = """
        MERGE (e:Entity {name: $name, type: $type})
        SET e += $properties
        RETURN e
        """
        with self.driver.session() as session:
            session.run(query, name=name, type=entity_type, properties=properties or {})

    def create_relationship(self, source: str, target: str, rel_type: str, properties: dict = None):
        query = """
        MATCH (a:Entity {name: $source})
        MATCH (b:Entity {name: $target})
        MERGE (a)-[r:RELATES_TO {type: $rel_type}]->(b)
        SET r += $properties
        """
        with self.driver.session() as session:
            session.run(query, source=source, target=target, rel_type=rel_type, properties=properties or {})

    def build_from_extraction(self, extraction: dict):
        for entity in extraction.get("entities", []):
            self.create_entity(entity["name"], entity["type"])
        for rel in extraction.get("relationships", []):
            self.create_relationship(rel["source"], rel["target"], rel["type"])

    def query_subgraph(self, entity_name: str, depth: int = 2) -> dict:
        query = """
        MATCH path = (start:Entity {name: $name})-[*1..""" + str(depth) + """]-(related)
        RETURN path
        """
        with self.driver.session() as session:
            result = session.run(query, name=entity_name)
            return [record["path"] for record in result]

# Usage
kg = KnowledgeGraphBuilder("bolt://localhost:7687", "neo4j", "password")
kg.build_from_extraction(extracted_entities)
subgraph = kg.query_subgraph("Apple", depth=2)

Community Detection and Summarization

import networkx as nx
from cdlib import algorithms

class CommunitySummarizer:
    def __init__(self, llm):
        self.llm = llm

    def detect_communities(self, graph: nx.Graph) -> list:
        communities = algorithms.leiden(graph)
        return communities.communities

    def summarize_community(self, entities: list, relationships: list) -> str:
        entity_str = ", ".join([f"{e['name']} ({e['type']})" for e in entities])
        rel_str = "; ".join([f"{r['source']} -{r['type']}- {r['target']}" for r in relationships])

        prompt = f"""Summarize this community of entities and their relationships:
        Entities: {entity_str}
        Relationships: {rel_str}
        Provide a concise summary:"""

        return self.llm.invoke(prompt).content

    def map_reduce_summarize(self, graph: nx.Graph) -> str:
        communities = self.detect_communities(graph)
        summaries = []
        for community in communities:
            entities = [{"name": n, "type": graph.nodes[n].get("type", "")} for n in community]
            relationships = [{"source": u, "target": v, "type": graph[u][v].get("type", "")}
                           for u, v in graph.subgraph(community).edges()]
            summary = self.summarize_community(entities, relationships)
            summaries.append(summary)

        combined = "\n".join(summaries)
        final_prompt = f"""Synthesize these community summaries into one answer:
        {combined}
        Final answer:"""
        return self.llm.invoke(final_prompt).content

Graph Query Engine

class GraphQueryEngine:
    def __init__(self, graph_builder, llm, community_summarizer):
        self.graph_builder = graph_builder
        self.llm = llm
        self.community_summarizer = community_summarizer

    def natural_language_to_cypher(self, question: str) -> str:
        prompt = f"""Convert this question to a Cypher query for Neo4j.
        Schema: (Entity {{name, type}})-[:RELATES_TO {{type}}]->(Entity)
        Question: {question}
        Cypher:"""
        return self.llm.invoke(prompt).content

    def retrieve(self, question: str) -> str:
        cypher = self.natural_language_to_cypher(question)
        with self.graph_builder.driver.session() as session:
            result = session.run(cypher)
            records = [dict(r) for r in result]
        return str(records)

    def answer(self, question: str) -> str:
        graph_context = self.retrieve(question)
        prompt = f"""Answer based on this graph data:
        {graph_context}
        Question: {question}
        Answer:"""
        return self.llm.invoke(prompt).content

Key Takeaways

  • Graph RAG captures relationships that vector search misses
  • Entity extraction with LLMs enables automatic KG construction
  • Community detection enables hierarchical summarization
  • Hybrid approaches combine vector and graph search for best results
⭐

Premium Content

Graph RAG

Unlock this lesson and 900+ advanced tutorials with a Premium plan.

🎯End-to-end Projects
πŸ’ΌInterview Prep
πŸ“œCertificates
🀝Community Access

Already a member? Log in

Need Expert Generative AI Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement