Graph RAG

Graph RAG combines knowledge graphs with retrieval-augmented generation to leverage structured relationships between entities for more accurate and comprehensive answers.

Entity Extraction with LLMs

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import json

class EntityExtractor:
    def __init__(self, llm):
        self.llm = llm
        self.prompt = ChatPromptTemplate.from_template(
            """Extract entities and relationships from this text.
            Return JSON with entities (name, type) and relationships (source, target, type).

            Text: {text}
            JSON:"""
        )

    def extract(self, text: str) -> dict:
        chain = self.prompt | self.llm
        result = chain.invoke({"text": text})
        return json.loads(result.content)

    def extract_batch(self, documents: list) -> list:
        all_entities = []
        for doc in documents:
            entities = self.extract(doc.page_content)
            all_entities.append(entities)
        return all_entities

# Usage
extractor = EntityExtractor(llm)
entities = extractor.extract("Apple was founded by Steve Jobs in Cupertino.")
# Returns: {"entities": [{"name": "Apple", "type": "Organization"},
#          {"name": "Steve Jobs", "type": "Person"},
#          {"name": "Cupertino", "type": "Location"}],
#          "relationships": [{"source": "Steve Jobs", "target": "Apple",
#          "type": "founded"}]}

Knowledge Graph Construction with Neo4j

from neo4j import GraphDatabase

class KnowledgeGraphBuilder:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_entity(self, name: str, entity_type: str, properties: dict = None):
        query = """
        MERGE (e:Entity {name: $name, type: $type})
        SET e += $properties
        RETURN e
        """
        with self.driver.session() as session:
            session.run(query, name=name, type=entity_type, properties=properties or {})

    def create_relationship(self, source: str, target: str, rel_type: str, properties: dict = None):
        query = """
        MATCH (a:Entity {name: $source})
        MATCH (b:Entity {name: $target})
        MERGE (a)-[r:RELATES_TO {type: $rel_type}]->(b)
        SET r += $properties
        """
        with self.driver.session() as session:
            session.run(query, source=source, target=target, rel_type=rel_type, properties=properties or {})

    def build_from_extraction(self, extraction: dict):
        for entity in extraction.get("entities", []):
            self.create_entity(entity["name"], entity["type"])
        for rel in extraction.get("relationships", []):
            self.create_relationship(rel["source"], rel["target"], rel["type"])

    def query_subgraph(self, entity_name: str, depth: int = 2) -> dict:
        query = """
        MATCH path = (start:Entity {name: $name})-[*1..""" + str(depth) + """]-(related)
        RETURN path
        """
        with self.driver.session() as session:
            result = session.run(query, name=entity_name)
            return [record["path"] for record in result]

# Usage
kg = KnowledgeGraphBuilder("bolt://localhost:7687", "neo4j", "password")
kg.build_from_extraction(extracted_entities)
subgraph = kg.query_subgraph("Apple", depth=2)

Community Detection and Summarization

import networkx as nx
from cdlib import algorithms

class CommunitySummarizer:
    def __init__(self, llm):
        self.llm = llm

    def detect_communities(self, graph: nx.Graph) -> list:
        communities = algorithms.leiden(graph)
        return communities.communities

    def summarize_community(self, entities: list, relationships: list) -> str:
        entity_str = ", ".join([f"{e['name']} ({e['type']})" for e in entities])
        rel_str = "; ".join([f"{r['source']} -{r['type']}- {r['target']}" for r in relationships])

        prompt = f"""Summarize this community of entities and their relationships:
        Entities: {entity_str}
        Relationships: {rel_str}
        Provide a concise summary:"""

        return self.llm.invoke(prompt).content

    def map_reduce_summarize(self, graph: nx.Graph) -> str:
        communities = self.detect_communities(graph)
        summaries = []
        for community in communities:
            entities = [{"name": n, "type": graph.nodes[n].get("type", "")} for n in community]
            relationships = [{"source": u, "target": v, "type": graph[u][v].get("type", "")}
                           for u, v in graph.subgraph(community).edges()]
            summary = self.summarize_community(entities, relationships)
            summaries.append(summary)

        combined = "\n".join(summaries)
        final_prompt = f"""Synthesize these community summaries into one answer:
        {combined}
        Final answer:"""
        return self.llm.invoke(final_prompt).content

Graph Query Engine

class GraphQueryEngine:
    def __init__(self, graph_builder, llm, community_summarizer):
        self.graph_builder = graph_builder
        self.llm = llm
        self.community_summarizer = community_summarizer

    def natural_language_to_cypher(self, question: str) -> str:
        prompt = f"""Convert this question to a Cypher query for Neo4j.
        Schema: (Entity {{name, type}})-[:RELATES_TO {{type}}]->(Entity)
        Question: {question}
        Cypher:"""
        return self.llm.invoke(prompt).content

    def retrieve(self, question: str) -> str:
        cypher = self.natural_language_to_cypher(question)
        with self.graph_builder.driver.session() as session:
            result = session.run(cypher)
            records = [dict(r) for r in result]
        return str(records)

    def answer(self, question: str) -> str:
        graph_context = self.retrieve(question)
        prompt = f"""Answer based on this graph data:
        {graph_context}
        Question: {question}
        Answer:"""
        return self.llm.invoke(prompt).content

Key Takeaways

Graph RAG captures relationships that vector search misses
Entity extraction with LLMs enables automatic KG construction
Community detection enables hierarchical summarization
Hybrid approaches combine vector and graph search for best results

Graph RAG

Graph RAG

Entity Extraction with LLMs

Knowledge Graph Construction with Neo4j

Community Detection and Summarization

Graph Query Engine

Key Takeaways

Premium Content

Need Expert Generative AI Help?