From openrouter-pack
Configure automatic model fallbacks for high availability on OpenRouter. Use when building resilient systems that need to survive provider outages. Triggers: 'openrouter fallback', 'model fallback', 'openrouter failover', 'openrouter backup model'.
npx claudepluginhub flight505/skill-forge --plugin openrouter-packThis skill is limited to using the following tools:
OpenRouter supports native model fallbacks: pass multiple model IDs and OpenRouter tries each in order until one succeeds. You can also use `provider.order` to control which provider serves a specific model. This skill covers native fallbacks, provider routing, client-side fallback chains, and timeout configuration.
references/basic-fallback-pattern.mdreferences/configuration-examples.mdreferences/error-specific-fallback-logic.mdreferences/errors.mdreferences/examples.mdreferences/fallback-health-tracking.mdreferences/provider-based-fallback.mdreferences/smart-fallback-configuration.mdreferences/task-specific-fallbacks.mdGuides Next.js Cache Components and Partial Prerendering (PPR): 'use cache' directives, cacheLife(), cacheTag(), revalidateTag() for caching, invalidation, static/dynamic optimization. Auto-activates on cacheComponents: true.
Guides building MCP servers enabling LLMs to interact with external services via tools. Covers best practices, TypeScript/Node (MCP SDK), Python (FastMCP).
Share bugs, ideas, or general feedback.
OpenRouter supports native model fallbacks: pass multiple model IDs and OpenRouter tries each in order until one succeeds. You can also use provider.order to control which provider serves a specific model. This skill covers native fallbacks, provider routing, client-side fallback chains, and timeout configuration.
import os
from openai import OpenAI
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ["OPENROUTER_API_KEY"],
default_headers={"HTTP-Referer": "https://my-app.com", "X-Title": "my-app"},
)
# Pass multiple models -- OpenRouter tries each in order
response = client.chat.completions.create(
model="anthropic/claude-3.5-sonnet", # Primary (used for param validation)
messages=[{"role": "user", "content": "Explain recursion"}],
max_tokens=500,
extra_body={
"models": [
"anthropic/claude-3.5-sonnet",
"openai/gpt-4o",
"google/gemini-2.0-flash-001",
],
"route": "fallback", # Try in order until one succeeds
},
)
# Check which model actually served the request
print(f"Served by: {response.model}")
# Route to specific providers in priority order
response = client.chat.completions.create(
model="anthropic/claude-3.5-sonnet",
messages=[{"role": "user", "content": "Hello"}],
max_tokens=200,
extra_body={
"provider": {
"order": ["Anthropic", "AWS Bedrock", "GCP Vertex"],
"allow_fallbacks": True, # Fall to next provider if first fails
},
},
)
import logging
from openai import OpenAI, APIError, APITimeoutError
log = logging.getLogger("openrouter.fallback")
FALLBACK_CHAIN = [
{"model": "anthropic/claude-3.5-sonnet", "timeout": 30.0, "label": "primary"},
{"model": "openai/gpt-4o", "timeout": 25.0, "label": "secondary"},
{"model": "openai/gpt-4o-mini", "timeout": 15.0, "label": "budget-fallback"},
{"model": "google/gemini-2.0-flash-001", "timeout": 15.0, "label": "last-resort"},
]
def resilient_completion(messages: list[dict], max_tokens: int = 1024, **kwargs):
"""Try each model in the fallback chain until one succeeds."""
last_error = None
for config in FALLBACK_CHAIN:
try:
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ["OPENROUTER_API_KEY"],
timeout=config["timeout"],
default_headers={"HTTP-Referer": "https://my-app.com", "X-Title": "my-app"},
)
response = client.chat.completions.create(
model=config["model"],
messages=messages,
max_tokens=max_tokens,
**kwargs,
)
log.info(f"Served by {config['label']}: {response.model}")
return response
except (APIError, APITimeoutError) as e:
last_error = e
log.warning(f"{config['label']} failed ({config['model']}): {e}")
continue
raise RuntimeError(f"All fallbacks exhausted. Last error: {last_error}")
# Different models support different features. Match capabilities.
CAPABILITY_CHAINS = {
"tool_calling": [
"anthropic/claude-3.5-sonnet",
"openai/gpt-4o",
"openai/gpt-4o-mini",
],
"vision": [
"openai/gpt-4o",
"anthropic/claude-3.5-sonnet",
"google/gemini-2.0-flash-001",
],
"long_context": [
"google/gemini-2.0-flash-001", # 1M context
"anthropic/claude-3.5-sonnet", # 200K context
"openai/gpt-4o", # 128K context
],
"budget": [
"openai/gpt-4o-mini",
"meta-llama/llama-3.1-8b-instruct",
"google/gemma-2-9b-it:free",
],
}
def capability_fallback(messages, capability="tool_calling", **kwargs):
"""Select fallback chain based on required capability."""
chain = CAPABILITY_CHAINS.get(capability, CAPABILITY_CHAINS["tool_calling"])
return resilient_completion(messages, **kwargs) # Uses FALLBACK_CHAIN
# Test with an invalid model to trigger fallback
curl -s https://openrouter.ai/api/v1/chat/completions \
-H "Authorization: Bearer $OPENROUTER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "invalid/model-name",
"messages": [{"role": "user", "content": "test"}],
"max_tokens": 10,
"models": ["invalid/model-name", "openai/gpt-4o-mini"],
"route": "fallback"
}' | jq '{model: .model, content: .choices[0].message.content}'
# Should succeed with openai/gpt-4o-mini
| Error | Cause | Fix |
|---|---|---|
| All fallbacks exhausted | Every model in chain failed | Add more diverse providers; alert on full chain failure |
| Slow cascade | Each model timing out sequentially | Reduce per-model timeout to 10-15s |
| Inconsistent responses | Different models have different capabilities | Ensure all fallback models support features your prompt uses |
| Wrong model served | Fallback triggered unexpectedly | Log which model served each request; check primary model health |
models + route: "fallback") for simplicity; client-side for fine-grained controlprovider.order when you need the same model from a different provider (e.g., Claude via Anthropic direct vs AWS Bedrock)