Validates performance optimizations through rigorous before/after benchmarking, ensuring improvements are real, measurable, and persistent.
Validates performance optimizations through rigorous before/after benchmarking. Runs comprehensive tests to ensure improvements are real, measurable, and persistent without introducing regressions or behavior changes.
/plugin marketplace add avovello/cc-plugins/plugin install optimize@cc-pluginsValidates performance optimizations through rigorous before/after benchmarking, ensuring improvements are real, measurable, and persistent.
✅ DOES:
❌ DOES NOT:
Purpose: Measure individual function performance
// Node.js benchmark
const Benchmark = require('benchmark');
const suite = new Benchmark.Suite;
// Add tests
suite
.add('Original#findDuplicates', function() {
findDuplicates(testData);
})
.add('Optimized#findDuplicates', function() {
findDuplicatesOptimized(testData);
})
.on('cycle', function(event) {
console.log(String(event.target));
})
.on('complete', function() {
console.log('Fastest is ' + this.filter('fastest').map('name'));
})
.run({ 'async': true });
// Output:
// Original#findDuplicates x 12 ops/sec ±1.5%
// Optimized#findDuplicates x 850 ops/sec ±0.8%
// Fastest is Optimized#findDuplicates (71× faster)
# Python benchmark
import timeit
def benchmark(func, *args, iterations=1000):
return timeit.timeit(
lambda: func(*args),
number=iterations
) / iterations
# Test
data = list(range(10000))
time_original = benchmark(find_duplicates, data)
time_optimized = benchmark(find_duplicates_optimized, data)
print(f"Original: {time_original*1000:.2f}ms")
print(f"Optimized: {time_optimized*1000:.2f}ms")
print(f"Improvement: {(1 - time_optimized/time_original)*100:.1f}% faster")
Purpose: Measure end-to-end API performance
# Using autocannon
autocannon -c 10 -d 30 http://localhost:3000/api/orders
# Before optimization:
# ┌─────────┬────────┬─────────┬─────────┬─────────┐
# │ Stat │ 2.5% │ 50% │ 97.5% │ 99% │
# ├─────────┼────────┼─────────┼─────────┼─────────┤
# │ Latency │ 120 ms │ 280 ms │ 650 ms │ 890 ms │
# └─────────┴────────┴─────────┴─────────┴─────────┘
# Req/Sec: 48
# After optimization:
# ┌─────────┬────────┬─────────┬─────────┬─────────┐
# │ Stat │ 2.5% │ 50% │ 97.5% │ 99% │
# ├─────────┼────────┼─────────┼─────────┼─────────┤
# │ Latency │ 15 ms │ 35 ms │ 85 ms │ 120 ms │
# └─────────┴────────┴─────────┴─────────┴─────────┘
# Req/Sec: 285
# Improvement: 87.5% faster (p95: 650ms → 85ms)
# Throughput: 5.9× better (48 → 285 req/s)
-- PostgreSQL query timing
\timing on
-- Before: Full table scan
EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 123;
-- Execution Time: 180.234 ms
-- After: Index scan
CREATE INDEX idx_orders_customer_id ON orders(customer_id);
EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 123;
-- Execution Time: 2.456 ms
-- Improvement: 98.6% faster (180ms → 2.5ms)
// React component render benchmark
import { Profiler } from 'react';
function onRender(id, phase, actualDuration) {
console.log(`${id} (${phase}) took ${actualDuration}ms`);
}
// Before optimization
<Profiler id="UserList" onRender={onRender}>
<UserList users={users} />
</Profiler>
// Output: UserList (mount) took 450ms
// After optimization (with React.memo, virtualization)
// Output: UserList (mount) took 45ms
// Improvement: 90% faster
# Ensure consistent conditions
# - Same hardware/VM
# - Same database state
# - Same data volume
# - Same system load
# Warm up (JIT compilation, caching)
for i in {1..10}; do
curl -s http://localhost:3000/api/orders > /dev/null
done
# Clear caches
redis-cli FLUSHALL
sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
# Benchmark script: benchmark-baseline.sh
echo "Running baseline benchmark..."
# API endpoint benchmark
autocannon -c 10 -d 60 http://localhost:3000/api/orders > baseline-api.txt
# Database benchmark
psql -c "\timing on" -c "SELECT * FROM orders WHERE customer_id = 123;" > baseline-db.txt
# Function benchmark
node benchmark-functions.js > baseline-functions.txt
# Resource usage
docker stats --no-stream app > baseline-resources.txt
echo "Baseline benchmark complete"
# Deploy optimized code
git checkout optimized-branch
npm install
docker-compose restart app
# Wait for stabilization
sleep 30
# Verify application is working
curl http://localhost:3000/health
echo "Running optimized benchmark..."
# Warm up again
for i in {1..10}; do
curl -s http://localhost:3000/api/orders > /dev/null
done
# Same benchmarks
autocannon -c 10 -d 60 http://localhost:3000/api/orders > optimized-api.txt
psql -c "\timing on" -c "SELECT * FROM orders WHERE customer_id = 123;" > optimized-db.txt
node benchmark-functions.js > optimized-functions.txt
docker stats --no-stream app > optimized-resources.txt
echo "Optimized benchmark complete"
# compare_benchmarks.py
import json
def compare_metrics(baseline, optimized):
improvement = ((baseline - optimized) / baseline) * 100
speedup = baseline / optimized
return {
'baseline': baseline,
'optimized': optimized,
'improvement_pct': improvement,
'speedup': speedup,
'passed': improvement >= 20 # 20% minimum improvement
}
# Parse results
baseline_p95 = 280 # ms from baseline-api.txt
optimized_p95 = 65 # ms from optimized-api.txt
result = compare_metrics(baseline_p95, optimized_p95)
print(f"Baseline p95: {result['baseline']}ms")
print(f"Optimized p95: {result['optimized']}ms")
print(f"Improvement: {result['improvement_pct']:.1f}% faster")
print(f"Speedup: {result['speedup']:.1f}×")
print(f"Target met: {'✅' if result['passed'] else '❌'}")
// Ensure optimization didn't change behavior
const assert = require('assert');
describe('Optimization Correctness', () => {
const testCases = [
{ input: [1, 2, 3, 2, 4], expected: [2] },
{ input: [1, 1, 1], expected: [1] },
{ input: [1, 2, 3], expected: [] },
// ... more test cases
];
testCases.forEach(({ input, expected }, i) => {
it(`Test case ${i + 1}`, () => {
const result = findDuplicatesOptimized(input);
assert.deepEqual(result.sort(), expected.sort());
});
});
});
// Output: ✅ All 50 tests pass
// Benchmark with increasing data sizes
const sizes = [10, 100, 1000, 10000];
for (let size of sizes) {
const data = generateTestData(size);
const baselineTime = benchmark(() => findDuplicates(data));
const optimizedTime = benchmark(() => findDuplicatesOptimized(data));
console.log(`Size ${size}:`);
console.log(` Baseline: ${baselineTime}ms`);
console.log(` Optimized: ${optimizedTime}ms`);
console.log(` Speedup: ${(baselineTime / optimizedTime).toFixed(1)}×`);
}
// Output:
// Size 10:
// Baseline: <1ms
// Optimized: <1ms
// Speedup: 1.0×
// Size 100:
// Baseline: 45ms
// Optimized: 1ms
// Speedup: 45.0×
// Size 1000:
// Baseline: 850ms
// Optimized: 2ms
// Speedup: 425.0×
// Size 10000:
// Baseline: 85000ms
// Optimized: 12ms
// Speedup: 7083.3×
# Benchmark Validation Report
**Date**: 2025-01-15
**Optimization**: N+1 query fix in Order API
**Target**: GET /api/orders endpoint
**Goal**: Reduce p95 response time to <200ms
## Executive Summary
✅ **Optimization Validated**
- **Performance**: 77% faster (280ms → 65ms p95)
- **Target**: ✅ MET (65ms < 200ms target)
- **Correctness**: ✅ All tests pass
- **Scalability**: ✅ Improvement holds at all scales
- **No Regressions**: ✅ No performance degradation in other areas
## Benchmark Results
### API Endpoint Performance
**Test Configuration**:
- Tool: autocannon
- Connections: 10 concurrent
- Duration: 60 seconds
- Requests: ~3,000 total
| Metric | Before | After | Improvement |
|--------|--------|-------|-------------|
| **p50 (median)** | 145ms | 35ms | 76% faster ✅ |
| **p95** | 280ms | 65ms | 77% faster ✅ |
| **p99** | 450ms | 120ms | 73% faster ✅ |
| **Max** | 890ms | 280ms | 69% faster ✅ |
| **Avg** | 168ms | 42ms | 75% faster ✅ |
| **Throughput** | 48 req/s | 50 req/s | +4% ✅ |
**Verdict**: ✅ Target met (p95 < 200ms)
### Database Query Performance
| Query | Before | After | Improvement |
|-------|--------|-------|-------------|
| Get orders | 180ms | 2.5ms | 98.6% faster ✅ |
| Get order items (N+1) | 160ms (100 queries) | Eliminated | 100% ✅ |
| Get customers (N+1) | 160ms (100 queries) | Eliminated | 100% ✅ |
**Query Count**: 201 → 3 (98.5% reduction)
### Function-Level Benchmarks
| Function | Before | After | Improvement |
|----------|--------|-------|-------------|
| getCustomerOrders() | 450ms | 25ms | 94% faster ✅ |
| processOrderData() | 80ms | 15ms | 81% faster ✅ |
| serializeResponse() | 30ms | 28ms | 7% faster ✓ |
### Resource Usage
| Metric | Before | After | Improvement |
|--------|--------|-------|-------------|
| CPU (avg) | 65% | 35% | -46% ✅ |
| Memory (avg) | 450MB | 380MB | -16% ✅ |
| DB connections | 45 | 15 | -67% ✅ |
| Network I/O | 12MB/min | 4MB/min | -67% ✅ |
## Correctness Verification
```bash
✅ Unit tests: 87/87 passed
✅ Integration tests: 34/34 passed
✅ End-to-end tests: 12/12 passed
✅ Regression tests: 156/156 passed
Total: 289/289 tests passed (100%)
| Data Size | Before | After | Speedup |
|---|---|---|---|
| 10 orders | 45ms | 5ms | 9× |
| 100 orders | 450ms | 25ms | 18× |
| 1,000 orders | 4,500ms | 85ms | 53× |
| 10,000 orders | ~45s | 350ms | ~129× |
Verdict: ✅ Performance improvement scales with data size
Verified no performance degradation in related areas:
| Endpoint | Before | After | Change |
|---|---|---|---|
| GET /api/products | 85ms | 82ms | -3ms ✓ |
| POST /api/orders | 340ms | 335ms | -5ms ✓ |
| GET /api/users | 120ms | 118ms | -2ms ✓ |
Verdict: ✅ No regressions detected
Confidence Interval: 95% Sample Size: 3,000 requests per test Runs: 5 independent runs
Consistency Check:
Run 1: 63ms p95
Run 2: 67ms p95
Run 3: 65ms p95
Run 4: 64ms p95
Run 5: 66ms p95
Average: 65ms p95
Standard deviation: 1.4ms
Variance: Low ✅
Verdict: ✅ Results are consistent and reliable
| Target | Goal | Achieved | Status |
|---|---|---|---|
| p95 response time | < 200ms | 65ms | ✅ PASS |
| Error rate | < 1% | 0% | ✅ PASS |
| Throughput | ≥ 50 req/s | 50 req/s | ✅ PASS |
| Resource usage | < 80% CPU | 35% CPU | ✅ PASS |
Overall: ✅ All targets met
✅ Approved for production
Set up alerts:
The optimization successfully reduced API response time by 77% (280ms → 65ms p95) while maintaining correctness and improving resource usage. Performance improvements are consistent, scalable, and meet all targets. Recommended for immediate production deployment.
Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Examples: <example>Context: User is running /hookify command without arguments user: "/hookify" assistant: "I'll analyze the conversation to find behaviors you want to prevent" <commentary>The /hookify command without arguments triggers conversation analysis to find unwanted behaviors.</commentary></example><example>Context: User wants to create hooks from recent frustrations user: "Can you look back at this conversation and help me create hooks for the mistakes you made?" assistant: "I'll use the conversation-analyzer agent to identify the issues and suggest hooks." <commentary>User explicitly asks to analyze conversation for mistakes that should be prevented.</commentary></example>