From mistral-pack
Instruments Mistral AI API with TypeScript wrapper for metrics on usage, latency, tokens, errors, costs. Sets up Prometheus metrics, Grafana dashboards, and alerting rules.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin mistral-packThis skill is limited to using the following tools:
Monitor Mistral AI API usage, latency, token consumption, error rates, and costs. Covers instrumented client wrapper, Prometheus metrics, Grafana dashboard panels, alerting rules, and structured logging.
Executes Mistral AI incident triage, mitigation, and postmortem using bash scripts to check API status, rate limits, auth errors, and service health via curl and kubectl.
Sets up Langfuse observability with Prometheus metrics, Grafana dashboards, alerts, and Metrics API for monitoring LLM traces, costs, and latency.
Sets up LangSmith tracing, Prometheus metrics callbacks, OpenTelemetry, structured logging, and Grafana dashboards for LangChain apps.
Share bugs, ideas, or general feedback.
Monitor Mistral AI API usage, latency, token consumption, error rates, and costs. Covers instrumented client wrapper, Prometheus metrics, Grafana dashboard panels, alerting rules, and structured logging.
import { Mistral } from '@mistralai/mistralai';
const PRICING: Record<string, { input: number; output: number }> = {
'mistral-small-latest': { input: 0.10, output: 0.30 },
'mistral-large-latest': { input: 0.50, output: 1.50 },
'codestral-latest': { input: 0.30, output: 0.90 },
'mistral-embed': { input: 0.10, output: 0 },
};
interface MetricsEvent {
model: string;
endpoint: string;
durationMs: number;
status: 'success' | 'error';
statusCode?: number;
inputTokens?: number;
outputTokens?: number;
costUsd?: number;
}
function emitMetrics(event: MetricsEvent): void {
// Push to your metrics backend (Prometheus, Datadog, etc.)
console.log(JSON.stringify({ type: 'mistral_metric', ...event }));
}
async function instrumentedChat(
client: Mistral,
model: string,
messages: any[],
options?: any,
) {
const start = performance.now();
try {
const response = await client.chat.complete({ model, messages, ...options });
const duration = Math.round(performance.now() - start);
const pricing = PRICING[model] ?? PRICING['mistral-small-latest'];
const pt = response.usage?.promptTokens ?? 0;
const ct = response.usage?.completionTokens ?? 0;
emitMetrics({
model,
endpoint: 'chat.complete',
durationMs: duration,
status: 'success',
inputTokens: pt,
outputTokens: ct,
costUsd: (pt / 1e6) * pricing.input + (ct / 1e6) * pricing.output,
});
return response;
} catch (error: any) {
emitMetrics({
model,
endpoint: 'chat.complete',
durationMs: Math.round(performance.now() - start),
status: 'error',
statusCode: error.status,
});
throw error;
}
}
// Using prom-client
import { Counter, Histogram, Gauge } from 'prom-client';
const mistralRequests = new Counter({
name: 'mistral_requests_total',
help: 'Total Mistral API requests',
labelNames: ['model', 'endpoint', 'status'],
});
const mistralDuration = new Histogram({
name: 'mistral_request_duration_ms',
help: 'Mistral request duration in milliseconds',
labelNames: ['model', 'endpoint'],
buckets: [100, 250, 500, 1000, 2500, 5000, 10000],
});
const mistralTokens = new Counter({
name: 'mistral_tokens_total',
help: 'Total tokens consumed',
labelNames: ['model', 'direction'], // direction: input | output
});
const mistralCost = new Counter({
name: 'mistral_cost_usd_total',
help: 'Estimated cost in USD',
labelNames: ['model'],
});
const mistralErrors = new Counter({
name: 'mistral_errors_total',
help: 'Total Mistral errors',
labelNames: ['model', 'status_code'],
});
// Record metrics from instrumented wrapper
function recordPrometheusMetrics(event: MetricsEvent): void {
mistralRequests.inc({ model: event.model, endpoint: event.endpoint, status: event.status });
mistralDuration.observe({ model: event.model, endpoint: event.endpoint }, event.durationMs);
if (event.status === 'success') {
if (event.inputTokens) mistralTokens.inc({ model: event.model, direction: 'input' }, event.inputTokens);
if (event.outputTokens) mistralTokens.inc({ model: event.model, direction: 'output' }, event.outputTokens);
if (event.costUsd) mistralCost.inc({ model: event.model }, event.costUsd);
} else {
mistralErrors.inc({ model: event.model, status_code: String(event.statusCode ?? 'unknown') });
}
}
# prometheus/mistral-alerts.yaml
groups:
- name: mistral
rules:
- alert: MistralHighErrorRate
expr: rate(mistral_errors_total[5m]) / rate(mistral_requests_total[5m]) > 0.05
for: 5m
labels: { severity: critical }
annotations:
summary: "Mistral error rate exceeds 5%"
runbook: "See mistral-incident-runbook skill"
- alert: MistralHighLatency
expr: histogram_quantile(0.95, rate(mistral_request_duration_ms_bucket[5m])) > 5000
for: 5m
labels: { severity: warning }
annotations:
summary: "Mistral P95 latency exceeds 5 seconds"
- alert: MistralRateLimited
expr: rate(mistral_errors_total{status_code="429"}[5m]) > 0
for: 2m
labels: { severity: warning }
annotations:
summary: "Mistral rate limiting detected"
- alert: MistralCostSpike
expr: increase(mistral_cost_usd_total[1h]) > 10
labels: { severity: warning }
annotations:
summary: "Mistral spend exceeds $10/hour"
- alert: MistralAuthFailure
expr: increase(mistral_errors_total{status_code="401"}[5m]) > 0
labels: { severity: critical }
annotations:
summary: "Mistral authentication failing — API key may be revoked"
Key panels to create:
| Panel | Query | Type |
|---|---|---|
| Request Rate | rate(mistral_requests_total[5m]) | Time series |
| P50/P95/P99 Latency | histogram_quantile(0.95, rate(..._bucket[5m])) | Time series |
| Token Velocity | rate(mistral_tokens_total{direction="output"}[5m]) | Time series |
| Hourly Cost | increase(mistral_cost_usd_total[1h]) | Stat |
| Error Rate | rate(mistral_errors_total[5m]) by status_code | Time series |
| Model Distribution | sum by (model) (rate(mistral_requests_total[5m])) | Pie chart |
interface MistralLogEntry {
ts: string;
level: 'info' | 'warn' | 'error';
model: string;
endpoint: string;
durationMs: number;
inputTokens?: number;
outputTokens?: number;
costUsd?: number;
status: string;
statusCode?: number;
requestId?: string;
}
function logMistralRequest(entry: MistralLogEntry): void {
// Ship to SIEM, CloudWatch, or log aggregator
// NEVER log message content — PII risk
console.log(JSON.stringify(entry));
}
| Issue | Cause | Solution |
|---|---|---|
| Missing token counts | Streaming not aggregated | Sum tokens from stream chunks |
| Cost drift from bill | Pricing table outdated | Update PRICING map when rates change |
| Alert storm on 429s | Rate limit burst | Tune alert threshold, add request queue |
| High cardinality | Per-request labels | Never label by request ID or user ID |