From ftitos-claude-code
Cost optimization patterns for LLM API usage -- model routing by task complexity, budget tracking, retry logic, and prompt caching.
npx claudepluginhub nassimbf/ftitos-claude-codeThis skill uses the workspace's default tool permissions.
Patterns for controlling LLM API costs while maintaining quality.
Guides Next.js Cache Components and Partial Prerendering (PPR) with cacheComponents enabled. Implements 'use cache', cacheLife(), cacheTag(), revalidateTag(), static/dynamic optimization, and cache debugging.
Guides building MCP servers enabling LLMs to interact with external services via tools. Covers best practices, TypeScript/Node (MCP SDK), Python (FastMCP).
Share bugs, ideas, or general feedback.
Patterns for controlling LLM API costs while maintaining quality.
MODEL_SONNET = "claude-sonnet-4-6"
MODEL_HAIKU = "claude-haiku-4-5-20251001"
_SONNET_TEXT_THRESHOLD = 10_000 # chars
_SONNET_ITEM_THRESHOLD = 30 # items
def select_model(
text_length: int,
item_count: int,
force_model: str | None = None,
) -> str:
if force_model is not None:
return force_model
if text_length >= _SONNET_TEXT_THRESHOLD or item_count >= _SONNET_ITEM_THRESHOLD:
return MODEL_SONNET
return MODEL_HAIKU # 3-4x cheaper
from dataclasses import dataclass
@dataclass(frozen=True, slots=True)
class CostRecord:
model: str
input_tokens: int
output_tokens: int
cost_usd: float
@dataclass(frozen=True, slots=True)
class CostTracker:
budget_limit: float = 1.00
records: tuple[CostRecord, ...] = ()
def add(self, record: CostRecord) -> "CostTracker":
return CostTracker(
budget_limit=self.budget_limit,
records=(*self.records, record),
)
@property
def total_cost(self) -> float:
return sum(r.cost_usd for r in self.records)
@property
def over_budget(self) -> bool:
return self.total_cost > self.budget_limit
from anthropic import APIConnectionError, InternalServerError, RateLimitError
_RETRYABLE_ERRORS = (APIConnectionError, RateLimitError, InternalServerError)
def call_with_retry(func, *, max_retries: int = 3):
for attempt in range(max_retries):
try:
return func()
except _RETRYABLE_ERRORS:
if attempt == max_retries - 1:
raise
time.sleep(2 ** attempt)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}},
{"type": "text", "text": user_input},
],
}
]