From latestaiagents
Implement GraphRAG combining knowledge graphs with RAG for multi-hop reasoning. Use this skill when building knowledge graph RAG, implementing multi-hop queries, using Neo4j with RAG, or connecting entities across documents. Activate when: GraphRAG, knowledge graph, multi-hop reasoning, Neo4j RAG, entity extraction, relationship queries, graph database, connected data.
npx claudepluginhub latestaiagents/agent-skills --plugin skills-authoringThis skill uses the workspace's default tool permissions.
**Combine knowledge graphs with RAG for complex reasoning over connected data.**
Builds GraphRAG retrieval pipelines on Neo4j using neo4j-graphrag Python package. Covers retriever selection (VectorRetriever, HybridRetriever, Cypher variants), retrieval_query Cypher fragments, LLM wiring, embedder/index setup, LangChain/LlamaIndex integration.
Design and build knowledge graphs for modeling complex relationships, semantic search, and knowledge bases. Guides ontology design, entity relationships, and graph database selection.
Designs GraphRAG systems integrating graph DBs, vector stores, orchestration frameworks, LLMs. Guides pattern selection, tech stacks, pipelines, customizations for multi-hop retrieval.
Share bugs, ideas, or general feedback.
Combine knowledge graphs with RAG for complex reasoning over connected data.
| Use Case | Vector RAG | GraphRAG |
|---|---|---|
| Simple Q&A | ✅ | Overkill |
| Factual lookup | ✅ | ✅ |
| Multi-hop reasoning | ❌ | ✅ |
| "How is X related to Y?" | ❌ | ✅ |
| Entity relationships | ❌ | ✅ |
| Compliance/audit trails | ❌ | ✅ |
| Summarizing themes | ❌ | ✅ |
┌─────────────────────────────────────────────────────────────┐
│ User Query │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ Query Analyzer │
│ (Determine: vector, graph, or hybrid?) │
└─────────────────────────────────────────────────────────────┘
│ │
┌────────┴────────┐ ┌────────┴────────┐
▼ ▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐
│ Vector Search │ │ Graph Traverse │
│ (Semantic) │ │ (Structured) │
└─────────────────┘ └─────────────────┘
│ │
└────────┬──────────┘
▼
┌─────────────────────────────────────────────────────────────┐
│ Context Fusion │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ LLM Generation │
└─────────────────────────────────────────────────────────────┘
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from neo4j import GraphDatabase
# Step 1: Extract entities and relationships from documents
EXTRACTION_PROMPT = """Extract entities and relationships from this text.
Text: {text}
Return JSON format:
{{
"entities": [
{{"name": "...", "type": "Person|Organization|Concept|Event|Location"}}
],
"relationships": [
{{"source": "...", "target": "...", "type": "..."}}
]
}}
"""
async def extract_knowledge(text: str, llm: ChatOpenAI) -> dict:
"""Extract entities and relationships from text."""
prompt = ChatPromptTemplate.from_template(EXTRACTION_PROMPT)
chain = prompt | llm
result = await chain.ainvoke({"text": text})
return json.loads(result.content)
# Step 2: Store in Neo4j
class KnowledgeGraph:
def __init__(self, uri: str, user: str, password: str):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def add_entity(self, name: str, entity_type: str, properties: dict = None):
with self.driver.session() as session:
session.run(
f"""
MERGE (e:{entity_type} {{name: $name}})
SET e += $properties
""",
name=name,
properties=properties or {}
)
def add_relationship(self, source: str, target: str, rel_type: str):
with self.driver.session() as session:
session.run(
"""
MATCH (a {name: $source})
MATCH (b {name: $target})
MERGE (a)-[r:""" + rel_type + """]->(b)
""",
source=source,
target=target
)
from llama_index.core import PropertyGraphIndex
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
def create_property_graph_index(documents):
"""Create a property graph index with LlamaIndex."""
# Connect to Neo4j
graph_store = Neo4jPropertyGraphStore(
username="neo4j",
password="password",
url="bolt://localhost:7687",
)
# Build index - automatically extracts entities/relationships
index = PropertyGraphIndex.from_documents(
documents,
property_graph_store=graph_store,
show_progress=True,
)
return index
def query_with_graph(index, query: str):
"""Query using both vector and graph retrieval."""
# Create retriever that uses both paths
retriever = index.as_retriever(
include_text=True, # Include original text chunks
similarity_top_k=5,
)
# Get results
nodes = retriever.retrieve(query)
return nodes
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
def create_text_to_cypher_chain():
"""Create a chain that converts natural language to Cypher queries."""
# Connect to Neo4j
graph = Neo4jGraph(
url="bolt://localhost:7687",
username="neo4j",
password="password"
)
# Print schema for debugging
print(graph.schema)
# Create chain
chain = GraphCypherQAChain.from_llm(
llm=ChatOpenAI(model="gpt-4", temperature=0),
graph=graph,
verbose=True,
validate_cypher=True, # Validate before executing
return_intermediate_steps=True
)
return chain
# Usage
chain = create_text_to_cypher_chain()
result = chain.invoke({
"query": "What companies has John Smith worked for?"
})
# Generated Cypher: MATCH (p:Person {name: 'John Smith'})-[:WORKED_AT]->(c:Company) RETURN c.name
class HybridGraphRAG:
"""Combine vector similarity with graph traversal."""
def __init__(self, vector_store, graph_store):
self.vector_store = vector_store
self.graph_store = graph_store
def retrieve(self, query: str, top_k: int = 5) -> list[dict]:
# 1. Vector search for relevant chunks
vector_results = self.vector_store.similarity_search(query, k=top_k)
# 2. Extract entities from query
query_entities = self._extract_entities(query)
# 3. Graph traversal from those entities
graph_context = []
for entity in query_entities:
# Get 1-hop neighbors
neighbors = self.graph_store.query(f"""
MATCH (e {{name: '{entity}'}})-[r]-(n)
RETURN e.name, type(r), n.name, n.description
LIMIT 10
""")
graph_context.extend(neighbors)
# 4. Combine results
combined = {
"vector_chunks": [r.page_content for r in vector_results],
"graph_context": graph_context,
"entities": query_entities
}
return combined
def _extract_entities(self, text: str) -> list[str]:
# Use NER or LLM to extract entities
# Simplified version:
prompt = f"Extract entity names from: {text}"
# ... LLM call
return entities
# Microsoft's GraphRAG approach uses community detection
# for global summarization queries
from graphrag.index import run_indexing
from graphrag.query import LocalSearch, GlobalSearch
# Index documents (creates communities)
await run_indexing(
input_dir="./documents",
output_dir="./index",
config={
"llm": {"model": "gpt-4"},
"embeddings": {"model": "text-embedding-3-small"},
"chunks": {"size": 300, "overlap": 100},
"community_detection": {
"algorithm": "leiden",
"resolution": 1.0
}
}
)
# Local search (specific entity questions)
local = LocalSearch(index_dir="./index")
result = local.search("What is Company X's main product?")
# Global search (summarization across communities)
global_search = GlobalSearch(index_dir="./index")
result = global_search.search("What are the main themes in these documents?")
| Pattern | Use When |
|---|---|
| Entity Extraction → KG | Building from scratch, custom schema |
| Property Graph Index | Quick setup, LlamaIndex ecosystem |
| Text-to-Cypher | Existing graph, complex queries |
| Hybrid Vector + Graph | Need both semantic + structural |
| Microsoft GraphRAG | Large corpus, summarization queries |