From openrouter-pack
Configures OpenRouter model fallbacks for high availability in Python apps using OpenAI client. Covers native server-side, provider routing, client-side chains, and timeouts to survive outages.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin openrouter-packThis skill is limited to using the following tools:
OpenRouter supports native model fallbacks: pass multiple model IDs and OpenRouter tries each in order until one succeeds. You can also use `provider.order` to control which provider serves a specific model. This skill covers native fallbacks, provider routing, client-side fallback chains, and timeout configuration.
references/basic-fallback-pattern.mdreferences/configuration-examples.mdreferences/error-specific-fallback-logic.mdreferences/errors.mdreferences/examples.mdreferences/fallback-health-tracking.mdreferences/provider-based-fallback.mdreferences/smart-fallback-configuration.mdreferences/task-specific-fallbacks.mdFixes common OpenRouter API pitfalls: model provider prefixes, max_tokens limits, model validation, fallback providers. Use for integrations or code reviews.
Deploys self-hosted OpenAI-compatible proxy aggregating 14 free-tier LLM providers (Groq, Gemini, Cerebras) with automatic failover, per-key rate tracking, sticky sessions, and React dashboard for key management.
Integrate LLM providers like OpenAI, Anthropic, Groq; configure fallbacks, streaming, and settings in PydanticAI for resilient model usage.
Share bugs, ideas, or general feedback.
OpenRouter supports native model fallbacks: pass multiple model IDs and OpenRouter tries each in order until one succeeds. You can also use provider.order to control which provider serves a specific model. This skill covers native fallbacks, provider routing, client-side fallback chains, and timeout configuration.
import os
from openai import OpenAI
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ["OPENROUTER_API_KEY"],
default_headers={"HTTP-Referer": "https://my-app.com", "X-Title": "my-app"},
)
# Pass multiple models -- OpenRouter tries each in order
response = client.chat.completions.create(
model="anthropic/claude-3.5-sonnet", # Primary (used for param validation)
messages=[{"role": "user", "content": "Explain recursion"}],
max_tokens=500,
extra_body={
"models": [
"anthropic/claude-3.5-sonnet",
"openai/gpt-4o",
"google/gemini-2.0-flash-001",
],
"route": "fallback", # Try in order until one succeeds
},
)
# Check which model actually served the request
print(f"Served by: {response.model}")
# Route to specific providers in priority order
response = client.chat.completions.create(
model="anthropic/claude-3.5-sonnet",
messages=[{"role": "user", "content": "Hello"}],
max_tokens=200,
extra_body={
"provider": {
"order": ["Anthropic", "AWS Bedrock", "GCP Vertex"],
"allow_fallbacks": True, # Fall to next provider if first fails
},
},
)
import logging
from openai import OpenAI, APIError, APITimeoutError
log = logging.getLogger("openrouter.fallback")
FALLBACK_CHAIN = [
{"model": "anthropic/claude-3.5-sonnet", "timeout": 30.0, "label": "primary"},
{"model": "openai/gpt-4o", "timeout": 25.0, "label": "secondary"},
{"model": "openai/gpt-4o-mini", "timeout": 15.0, "label": "budget-fallback"},
{"model": "google/gemini-2.0-flash-001", "timeout": 15.0, "label": "last-resort"},
]
def resilient_completion(messages: list[dict], max_tokens: int = 1024, **kwargs):
"""Try each model in the fallback chain until one succeeds."""
last_error = None
for config in FALLBACK_CHAIN:
try:
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ["OPENROUTER_API_KEY"],
timeout=config["timeout"],
default_headers={"HTTP-Referer": "https://my-app.com", "X-Title": "my-app"},
)
response = client.chat.completions.create(
model=config["model"],
messages=messages,
max_tokens=max_tokens,
**kwargs,
)
log.info(f"Served by {config['label']}: {response.model}")
return response
except (APIError, APITimeoutError) as e:
last_error = e
log.warning(f"{config['label']} failed ({config['model']}): {e}")
continue
raise RuntimeError(f"All fallbacks exhausted. Last error: {last_error}")
# Different models support different features. Match capabilities.
CAPABILITY_CHAINS = {
"tool_calling": [
"anthropic/claude-3.5-sonnet",
"openai/gpt-4o",
"openai/gpt-4o-mini",
],
"vision": [
"openai/gpt-4o",
"anthropic/claude-3.5-sonnet",
"google/gemini-2.0-flash-001",
],
"long_context": [
"google/gemini-2.0-flash-001", # 1M context
"anthropic/claude-3.5-sonnet", # 200K context
"openai/gpt-4o", # 128K context
],
"budget": [
"openai/gpt-4o-mini",
"meta-llama/llama-3.1-8b-instruct",
"google/gemma-2-9b-it:free",
],
}
def capability_fallback(messages, capability="tool_calling", **kwargs):
"""Select fallback chain based on required capability."""
chain = CAPABILITY_CHAINS.get(capability, CAPABILITY_CHAINS["tool_calling"])
return resilient_completion(messages, **kwargs) # Uses FALLBACK_CHAIN
# Test with an invalid model to trigger fallback
curl -s https://openrouter.ai/api/v1/chat/completions \
-H "Authorization: Bearer $OPENROUTER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "invalid/model-name",
"messages": [{"role": "user", "content": "test"}],
"max_tokens": 10,
"models": ["invalid/model-name", "openai/gpt-4o-mini"],
"route": "fallback"
}' | jq '{model: .model, content: .choices[0].message.content}'
# Should succeed with openai/gpt-4o-mini
| Error | Cause | Fix |
|---|---|---|
| All fallbacks exhausted | Every model in chain failed | Add more diverse providers; alert on full chain failure |
| Slow cascade | Each model timing out sequentially | Reduce per-model timeout to 10-15s |
| Inconsistent responses | Different models have different capabilities | Ensure all fallback models support features your prompt uses |
| Wrong model served | Fallback triggered unexpectedly | Log which model served each request; check primary model health |
models + route: "fallback") for simplicity; client-side for fine-grained controlprovider.order when you need the same model from a different provider (e.g., Claude via Anthropic direct vs AWS Bedrock)