Help us improve
Share bugs, ideas, or general feedback.
From dspy-api-skills
Guides building a multi-turn conversational AI chatbot with memory, state, and intent routing using DSPy and LangGraph. Use for customer support, helpdesk, onboarding, FAQ, or sales qualification bots.
npx claudepluginhub lebsral/dspy-programming-not-prompting-lms-skills --plugin dspy-build-skillsHow this skill is triggered — by the user, by Claude, or both
Slash command
/dspy-api-skills:ai-building-chatbotsThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
Guide the user through building a multi-turn chatbot that remembers context, follows conversation flows, and produces high-quality responses. Uses DSPy for optimizable response generation and LangGraph for conversation state, memory, and flow control.
Guides technical evaluation of code review feedback: read fully, restate for understanding, verify against codebase, respond with reasoning or pushback before implementing.
Share bugs, ideas, or general feedback.
Guide the user through building a multi-turn chatbot that remembers context, follows conversation flows, and produces high-quality responses. Uses DSPy for optimizable response generation and LangGraph for conversation state, memory, and flow control.
Ask the user:
The core of your chatbot is a DSPy module that generates responses given conversation history and context.
lm = dspy.LM("openai/gpt-4o-mini") # or "anthropic/claude-sonnet-4-5-20250929", etc.
dspy.configure(lm=lm)
import dspy
class ChatResponse(dspy.Signature):
"""Generate a helpful, on-brand response to the user's message."""
conversation_history: str = dspy.InputField(desc="Previous messages in the conversation")
context: str = dspy.InputField(desc="Relevant information from docs or database")
user_message: str = dspy.InputField(desc="The user's latest message")
response: str = dspy.OutputField(desc="Helpful response to the user")
class ChatBot(dspy.Module):
def __init__(self):
self.respond = dspy.ChainOfThought(ChatResponse)
def forward(self, conversation_history, context, user_message):
return self.respond(
conversation_history=conversation_history,
context=context,
user_message=user_message,
)
Route different intents to specialized handlers:
from typing import Literal
class ClassifyIntent(dspy.Signature):
"""Classify the user's intent from their message."""
conversation_history: str = dspy.InputField()
user_message: str = dspy.InputField()
intent: Literal["question", "complaint", "request", "greeting", "goodbye"] = dspy.OutputField()
class ChatBotWithRouting(dspy.Module):
def __init__(self):
self.classify = dspy.Predict(ClassifyIntent)
self.respond_question = dspy.ChainOfThought(AnswerQuestion)
self.respond_complaint = dspy.ChainOfThought(HandleComplaint)
self.respond_request = dspy.ChainOfThought(HandleRequest)
self.respond_greeting = dspy.Predict(Greeting)
def forward(self, conversation_history, context, user_message):
intent = self.classify(
conversation_history=conversation_history,
user_message=user_message,
).intent
handler = {
"question": self.respond_question,
"complaint": self.respond_complaint,
"request": self.respond_request,
"greeting": self.respond_greeting,
}.get(intent, self.respond_question)
return handler(
conversation_history=conversation_history,
context=context,
user_message=user_message,
)
LangGraph manages the conversation flow — what state the bot is in, when to transition, and when to escalate.
from langgraph.graph import StateGraph, START, END
from typing import TypedDict, Annotated
import operator
class ConversationState(TypedDict):
messages: Annotated[list[dict], operator.add] # full message history
current_intent: str
context: str # retrieved docs/data for current turn
escalate: bool # whether to hand off to a human
resolved: bool # whether the issue is resolved
turn_count: int
import dspy
# Initialize DSPy modules
classifier = dspy.Predict(ClassifyIntent)
responder = dspy.ChainOfThought(ChatResponse)
def classify_node(state: ConversationState) -> dict:
"""Classify the user's intent."""
history = format_history(state["messages"][:-1])
user_msg = state["messages"][-1]["content"]
result = classifier(conversation_history=history, user_message=user_msg)
return {"current_intent": result.intent}
def retrieve_node(state: ConversationState) -> dict:
"""Retrieve relevant docs for the current message."""
user_msg = state["messages"][-1]["content"]
# Your retrieval logic here (see /ai-searching-docs)
docs = retrieve_relevant_docs(user_msg)
return {"context": "\n".join(docs)}
def respond_node(state: ConversationState) -> dict:
"""Generate a response using DSPy."""
history = format_history(state["messages"][:-1])
user_msg = state["messages"][-1]["content"]
result = responder(
conversation_history=history,
context=state["context"],
user_message=user_msg,
)
return {
"messages": [{"role": "assistant", "content": result.response}],
"turn_count": state["turn_count"] + 1,
}
def check_escalation(state: ConversationState) -> dict:
"""Decide if this needs human handoff."""
should_escalate = (
state["current_intent"] == "complaint"
and state["turn_count"] > 3
)
return {"escalate": should_escalate}
def format_history(messages: list[dict]) -> str:
return "\n".join(f"{m['role']}: {m['content']}" for m in messages[-10:])
# Build the graph
graph = StateGraph(ConversationState)
graph.add_node("classify", classify_node)
graph.add_node("retrieve", retrieve_node)
graph.add_node("respond", respond_node)
graph.add_node("check_escalation", check_escalation)
graph.add_edge(START, "classify")
graph.add_edge("classify", "retrieve")
graph.add_edge("retrieve", "respond")
graph.add_edge("respond", "check_escalation")
def route_after_escalation_check(state: ConversationState) -> str:
if state["escalate"]:
return "escalate"
return "done"
graph.add_conditional_edges(
"check_escalation",
route_after_escalation_check,
{"escalate": END, "done": END},
)
app = graph.compile()
result = app.invoke({
"messages": [{"role": "user", "content": "How do I reset my password?"}],
"current_intent": "",
"context": "",
"escalate": False,
"resolved": False,
"turn_count": 0,
})
print(result["messages"][-1]["content"])
LangGraph's checkpointer persists conversation state across requests:
from langgraph.checkpoint.memory import MemorySaver
checkpointer = MemorySaver()
app = graph.compile(checkpointer=checkpointer)
# Each user session gets a unique thread_id
config = {"configurable": {"thread_id": "user-abc-123"}}
# Turn 1
result = app.invoke(
{"messages": [{"role": "user", "content": "Hi, I need help with billing"}],
"current_intent": "", "context": "", "escalate": False, "resolved": False, "turn_count": 0},
config=config,
)
# Turn 2 — state is preserved, the bot remembers the conversation
result = app.invoke(
{"messages": [{"role": "user", "content": "I was charged twice last month"}]},
config=config,
)
For production, use a persistent backend:
from langgraph.checkpoint.postgres import PostgresSaver
checkpointer = PostgresSaver(conn_string="postgresql://user:pass@localhost/chatbot")
app = graph.compile(checkpointer=checkpointer)
When conversations get long, summarize older messages to stay within token limits:
class SummarizeConversation(dspy.Signature):
"""Summarize the conversation so far, preserving key details."""
conversation: str = dspy.InputField()
summary: str = dspy.OutputField(desc="Concise summary of the conversation so far")
summarizer = dspy.Predict(SummarizeConversation)
def maybe_summarize(state: ConversationState) -> dict:
"""Summarize if conversation is getting long."""
if len(state["messages"]) > 20:
history = format_history(state["messages"][:-5])
summary = summarizer(conversation=history).summary
# Keep summary + last 5 messages
return {
"messages": [
{"role": "system", "content": f"Summary of earlier conversation: {summary}"},
*state["messages"][-5:],
]
}
return {}
Retrieve relevant documents each turn to keep responses factual.
class DocGroundedResponse(dspy.Signature):
"""Answer the user's question based on the provided documentation.
Only use information from the docs. If the docs don't cover it, say so."""
conversation_history: str = dspy.InputField()
docs: list[str] = dspy.InputField(desc="Relevant documentation passages")
user_message: str = dspy.InputField()
response: str = dspy.OutputField()
class GroundedChatBot(dspy.Module):
def __init__(self, retriever):
self.retriever = retriever
self.respond = dspy.ChainOfThought(DocGroundedResponse)
def forward(self, conversation_history, user_message):
# Retrieve docs relevant to the current message
docs = self.retriever(user_message).passages
return self.respond(
conversation_history=conversation_history,
docs=docs,
user_message=user_message,
)
See /ai-searching-docs for setting up retrievers and vector stores, including loading data from PDFs, Notion, and other sources with LangChain document loaders.
Use dspy.Refine with a reward function to enforce guardrails on chatbot responses:
class GroundedChatBotInner(dspy.Module):
def __init__(self, retriever):
self.retriever = retriever
self.respond = dspy.ChainOfThought(DocGroundedResponse)
def forward(self, conversation_history, user_message):
docs = self.retriever(user_message).passages
return self.respond(
conversation_history=conversation_history,
docs=docs,
user_message=user_message,
)
def chatbot_response_reward(args, pred):
"""Score chatbot response quality. Returns 0.0-1.0."""
response = pred.response
score = 1.0
# Hard constraint -- don't break character
if "I am an AI" in response:
return 0.0
# Soft penalties
if len(response.split()) >= 200:
score -= 0.2 # prefer concise responses
condescending = ["obviously", "clearly", "simply"]
if any(word in response.lower() for word in condescending):
score -= 0.1 # avoid condescending language
return max(score, 0.0)
def make_guarded_chatbot(retriever):
return dspy.Refine(
module=GroundedChatBotInner(retriever),
N=3,
reward_fn=chatbot_response_reward,
threshold=0.8,
)
Use LangGraph's interrupt to pause before the bot takes real actions:
app = graph.compile(
checkpointer=checkpointer,
interrupt_before=["execute_refund", "cancel_account"], # pause here
)
# Bot runs until it reaches a sensitive action
result = app.invoke(input_state, config)
# Human agent reviews the proposed action
# If approved, resume:
result = app.invoke(None, config) # continues from checkpoint
def chatbot_metric(example, prediction, trace=None):
"""Score a single conversation turn."""
judge = dspy.Predict(JudgeTurn)
result = judge(
user_message=example.user_message,
expected_response=example.response,
actual_response=prediction.response,
conversation_history=example.conversation_history,
)
return result.is_good
class JudgeTurn(dspy.Signature):
"""Judge if the chatbot response is helpful, accurate, and on-topic."""
user_message: str = dspy.InputField()
expected_response: str = dspy.InputField()
actual_response: str = dspy.InputField()
conversation_history: str = dspy.InputField()
is_good: bool = dspy.OutputField()
trainset = []
for convo in real_conversations:
for turn in convo["turns"]:
trainset.append(
dspy.Example(
conversation_history=turn["history"],
user_message=turn["user_message"],
context=turn["context"],
response=turn["response"],
).with_inputs("conversation_history", "user_message", "context")
)
optimizer = dspy.MIPROv2(metric=chatbot_metric, auto="medium")
optimized_bot = optimizer.compile(chatbot, trainset=trainset)
# Save optimized prompts
optimized_bot.save("chatbot_optimized.json")
interrupt_before so humans approve refunds, cancellations, etc.if/else chains inside forward() to manage conversation state, move that logic to LangGraph.maybe_summarize pattern from Step 4.with_inputs() when building conversation training data. Every dspy.Example for chatbot training needs .with_inputs("conversation_history", "user_message", "context") — without it, the optimizer treats all fields as outputs and optimization silently produces garbage.ChatResponse signature for all intents. Different intents need different handling — a complaint needs empathy and escalation logic, a question needs retrieval accuracy, a greeting needs brevity. Use ClassifyIntent + separate handler modules per intent rather than one signature trying to do everything.Install any skill:
npx skills add lebsral/DSPy-Programming-not-prompting-LMs-skills --skill <name>
/ai-searching-docs/ai-taking-actions/ai-coordinating-agents/ai-improving-accuracy/ai-building-pipelines/dspy-modules/dspy-refine/dspy-react/ai-do if you do not have it — it routes any AI problem to the right skill and is the fastest way to work: npx skills add lebsral/DSPy-Programming-not-prompting-LMs-skills --skill ai-do