Graph RAG
Graph RAG combines knowledge graphs with retrieval-augmented generation to leverage structured relationships between entities for more accurate and comprehensive answers.
Entity Extraction with LLMs
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import json
class EntityExtractor:
def __init__(self, llm):
self.llm = llm
self.prompt = ChatPromptTemplate.from_template(
"""Extract entities and relationships from this text.
Return JSON with entities (name, type) and relationships (source, target, type).
Text: {text}
JSON:"""
)
def extract(self, text: str) -> dict:
chain = self.prompt | self.llm
result = chain.invoke({"text": text})
return json.loads(result.content)
def extract_batch(self, documents: list) -> list:
all_entities = []
for doc in documents:
entities = self.extract(doc.page_content)
all_entities.append(entities)
return all_entities
# Usage
extractor = EntityExtractor(llm)
entities = extractor.extract("Apple was founded by Steve Jobs in Cupertino.")
# Returns: {"entities": [{"name": "Apple", "type": "Organization"},
# {"name": "Steve Jobs", "type": "Person"},
# {"name": "Cupertino", "type": "Location"}],
# "relationships": [{"source": "Steve Jobs", "target": "Apple",
# "type": "founded"}]}
Knowledge Graph Construction with Neo4j
from neo4j import GraphDatabase
class KnowledgeGraphBuilder:
def __init__(self, uri, user, password):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def close(self):
self.driver.close()
def create_entity(self, name: str, entity_type: str, properties: dict = None):
query = """
MERGE (e:Entity {name: $name, type: $type})
SET e += $properties
RETURN e
"""
with self.driver.session() as session:
session.run(query, name=name, type=entity_type, properties=properties or {})
def create_relationship(self, source: str, target: str, rel_type: str, properties: dict = None):
query = """
MATCH (a:Entity {name: $source})
MATCH (b:Entity {name: $target})
MERGE (a)-[r:RELATES_TO {type: $rel_type}]->(b)
SET r += $properties
"""
with self.driver.session() as session:
session.run(query, source=source, target=target, rel_type=rel_type, properties=properties or {})
def build_from_extraction(self, extraction: dict):
for entity in extraction.get("entities", []):
self.create_entity(entity["name"], entity["type"])
for rel in extraction.get("relationships", []):
self.create_relationship(rel["source"], rel["target"], rel["type"])
def query_subgraph(self, entity_name: str, depth: int = 2) -> dict:
query = """
MATCH path = (start:Entity {name: $name})-[*1..""" + str(depth) + """]-(related)
RETURN path
"""
with self.driver.session() as session:
result = session.run(query, name=entity_name)
return [record["path"] for record in result]
# Usage
kg = KnowledgeGraphBuilder("bolt://localhost:7687", "neo4j", "password")
kg.build_from_extraction(extracted_entities)
subgraph = kg.query_subgraph("Apple", depth=2)
Community Detection and Summarization
import networkx as nx
from cdlib import algorithms
class CommunitySummarizer:
def __init__(self, llm):
self.llm = llm
def detect_communities(self, graph: nx.Graph) -> list:
communities = algorithms.leiden(graph)
return communities.communities
def summarize_community(self, entities: list, relationships: list) -> str:
entity_str = ", ".join([f"{e['name']} ({e['type']})" for e in entities])
rel_str = "; ".join([f"{r['source']} -{r['type']}- {r['target']}" for r in relationships])
prompt = f"""Summarize this community of entities and their relationships:
Entities: {entity_str}
Relationships: {rel_str}
Provide a concise summary:"""
return self.llm.invoke(prompt).content
def map_reduce_summarize(self, graph: nx.Graph) -> str:
communities = self.detect_communities(graph)
summaries = []
for community in communities:
entities = [{"name": n, "type": graph.nodes[n].get("type", "")} for n in community]
relationships = [{"source": u, "target": v, "type": graph[u][v].get("type", "")}
for u, v in graph.subgraph(community).edges()]
summary = self.summarize_community(entities, relationships)
summaries.append(summary)
combined = "\n".join(summaries)
final_prompt = f"""Synthesize these community summaries into one answer:
{combined}
Final answer:"""
return self.llm.invoke(final_prompt).content
Graph Query Engine
class GraphQueryEngine:
def __init__(self, graph_builder, llm, community_summarizer):
self.graph_builder = graph_builder
self.llm = llm
self.community_summarizer = community_summarizer
def natural_language_to_cypher(self, question: str) -> str:
prompt = f"""Convert this question to a Cypher query for Neo4j.
Schema: (Entity {{name, type}})-[:RELATES_TO {{type}}]->(Entity)
Question: {question}
Cypher:"""
return self.llm.invoke(prompt).content
def retrieve(self, question: str) -> str:
cypher = self.natural_language_to_cypher(question)
with self.graph_builder.driver.session() as session:
result = session.run(cypher)
records = [dict(r) for r in result]
return str(records)
def answer(self, question: str) -> str:
graph_context = self.retrieve(question)
prompt = f"""Answer based on this graph data:
{graph_context}
Question: {question}
Answer:"""
return self.llm.invoke(prompt).content
Key Takeaways
- Graph RAG captures relationships that vector search misses
- Entity extraction with LLMs enables automatic KG construction
- Community detection enables hierarchical summarization
- Hybrid approaches combine vector and graph search for best results