From langchain-pack
Sets up LangSmith tracing, Prometheus metrics callbacks, OpenTelemetry, structured logging, and Grafana dashboards for LangChain apps.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin langchain-packThis skill is limited to using the following tools:
Production observability for LangChain: LangSmith tracing (zero-code), custom Prometheus metrics, OpenTelemetry integration, structured logging, and Grafana dashboards.
Sets up Langfuse observability with Prometheus metrics, Grafana dashboards, alerts, and Metrics API for monitoring LLM traces, costs, and latency.
Provides patterns for Prometheus metrics, Grafana dashboards, Langfuse v4 LLM tracing, and drift detection. Use when adding logging, metrics, distributed tracing, LLM cost tracking, or quality drift monitoring.
Provides expertise on Langfuse open-source LLM observability platform for tracing, prompt management, evaluation, datasets, and integrations with LangChain, LlamaIndex, OpenAI. For debugging and monitoring LLM apps.
Share bugs, ideas, or general feedback.
Production observability for LangChain: LangSmith tracing (zero-code), custom Prometheus metrics, OpenTelemetry integration, structured logging, and Grafana dashboards.
LangSmith automatically traces all LangChain calls when env vars are set.
# Add to .env — that's it. No code changes needed.
LANGSMITH_TRACING=true
LANGSMITH_API_KEY=lsv2_pt_...
LANGSMITH_PROJECT=my-app-production
# Optional: background callbacks for lower latency (non-serverless)
LANGCHAIN_CALLBACKS_BACKGROUND=true
Every chain, LLM call, tool invocation, and retriever query is automatically traced with:
import { Client } from "langsmith";
const client = new Client();
// Get recent failed runs
const failedRuns = client.listRuns({
projectName: "my-app-production",
error: true,
limit: 10,
});
for await (const run of failedRuns) {
console.log(`${run.name}: ${run.error} (${run.totalTokens} tokens)`);
}
import { BaseCallbackHandler } from "@langchain/core/callbacks/base";
interface Metrics {
totalRequests: number;
totalErrors: number;
totalTokens: number;
latencies: number[];
}
class MetricsCallback extends BaseCallbackHandler {
name = "MetricsCallback";
metrics: Metrics = { totalRequests: 0, totalErrors: 0, totalTokens: 0, latencies: [] };
private startTimes = new Map<string, number>();
handleLLMStart(_llm: any, _prompts: string[], runId: string) {
this.metrics.totalRequests++;
this.startTimes.set(runId, Date.now());
}
handleLLMEnd(output: any, runId: string) {
const start = this.startTimes.get(runId);
if (start) {
this.metrics.latencies.push(Date.now() - start);
this.startTimes.delete(runId);
}
const usage = output.llmOutput?.tokenUsage;
if (usage) {
this.metrics.totalTokens += (usage.totalTokens ?? 0);
}
}
handleLLMError(_error: Error, runId: string) {
this.metrics.totalErrors++;
this.startTimes.delete(runId);
}
getReport() {
const latencies = this.metrics.latencies;
const sorted = [...latencies].sort((a, b) => a - b);
return {
requests: this.metrics.totalRequests,
errors: this.metrics.totalErrors,
errorRate: this.metrics.totalRequests > 0
? (this.metrics.totalErrors / this.metrics.totalRequests * 100).toFixed(1) + "%"
: "0%",
totalTokens: this.metrics.totalTokens,
p50Latency: sorted[Math.floor(sorted.length * 0.5)] ?? 0,
p95Latency: sorted[Math.floor(sorted.length * 0.95)] ?? 0,
p99Latency: sorted[Math.floor(sorted.length * 0.99)] ?? 0,
};
}
}
// Usage
const metrics = new MetricsCallback();
const model = new ChatOpenAI({
model: "gpt-4o-mini",
callbacks: [metrics],
});
// After some operations:
console.table(metrics.getReport());
from prometheus_client import Counter, Histogram, start_http_server
from langchain_core.callbacks import BaseCallbackHandler
# Define metrics
llm_requests = Counter("langchain_llm_requests_total", "LLM requests", ["model", "status"])
llm_latency = Histogram("langchain_llm_latency_seconds", "LLM latency", ["model"])
llm_tokens = Counter("langchain_llm_tokens_total", "Tokens used", ["model", "type"])
class PrometheusCallback(BaseCallbackHandler):
def __init__(self):
self._start_times = {}
def on_llm_start(self, serialized, prompts, run_id, **kwargs):
self._start_times[str(run_id)] = time.time()
def on_llm_end(self, response, run_id, **kwargs):
model = "unknown"
elapsed = time.time() - self._start_times.pop(str(run_id), time.time())
llm_requests.labels(model=model, status="success").inc()
llm_latency.labels(model=model).observe(elapsed)
if response.llm_output and "token_usage" in response.llm_output:
usage = response.llm_output["token_usage"]
llm_tokens.labels(model=model, type="input").inc(usage.get("prompt_tokens", 0))
llm_tokens.labels(model=model, type="output").inc(usage.get("completion_tokens", 0))
def on_llm_error(self, error, run_id, **kwargs):
self._start_times.pop(str(run_id), None)
llm_requests.labels(model="unknown", status="error").inc()
# Start metrics server on :9090
start_http_server(9090)
# Request rate
rate(langchain_llm_requests_total[5m])
# P95 latency
histogram_quantile(0.95, rate(langchain_llm_latency_seconds_bucket[5m]))
# Error rate percentage
sum(rate(langchain_llm_requests_total{status="error"}[5m]))
/ sum(rate(langchain_llm_requests_total[5m])) * 100
# Token usage per hour
increase(langchain_llm_tokens_total[1h])
# prometheus/rules/langchain.yml
groups:
- name: langchain
rules:
- alert: HighErrorRate
expr: |
sum(rate(langchain_llm_requests_total{status="error"}[5m]))
/ sum(rate(langchain_llm_requests_total[5m])) > 0.05
for: 5m
labels: { severity: critical }
annotations:
summary: "LangChain error rate above 5%"
- alert: HighLatency
expr: |
histogram_quantile(0.95, rate(langchain_llm_latency_seconds_bucket[5m])) > 5
for: 5m
labels: { severity: warning }
annotations:
summary: "LangChain P95 latency above 5 seconds"
- alert: TokenBudgetExceeded
expr: increase(langchain_llm_tokens_total[1d]) > 1000000
labels: { severity: warning }
annotations:
summary: "Daily token usage exceeded 1M"
| Issue | Cause | Fix |
|---|---|---|
| Missing traces in LangSmith | Env vars not set | Verify LANGSMITH_TRACING=true |
| Callback not firing | Not passed to model | Add to callbacks: [handler] in constructor |
| Metrics missing in Prometheus | Server not started | Call start_http_server(9090) |
| Alert storms | Thresholds too sensitive | Tune for duration and thresholds |
Use langchain-incident-runbook for incident response procedures.