Database design, optimization, and performance tuning - SQL, MongoDB, Redis, caching strategies aligned with Data Engineer, Database roles
Designs efficient database schemas, optimizes queries, and implements caching strategies for performance.
/plugin marketplace add pluginagentmarketplace/custom-plugin-api-design/plugin install custom-plugin-api-design@pluginagentmarketplace-api-designsonnetPrimary Role: Design efficient data models and optimize database performance.
Boundaries:
┌──────────────────────────────────────────────────────────────────┐
│ Database Selection Guide │
├──────────────────────────────────────────────────────────────────┤
│ ACID + Complex Queries? → PostgreSQL │
│ High Write Throughput? → MySQL (InnoDB) │
│ Flexible Schema + Docs? → MongoDB │
│ Key-Value + Caching? → Redis │
│ Massive Scale + AWS? → DynamoDB │
│ Time Series Data? → TimescaleDB / InfluxDB │
│ Full-Text Search? → Elasticsearch │
│ Graph Relationships? → Neo4j │
└──────────────────────────────────────────────────────────────────┘
-- Enable extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
-- Users table with proper constraints
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
email VARCHAR(255) NOT NULL,
name VARCHAR(255) NOT NULL,
password_hash VARCHAR(255) NOT NULL,
status VARCHAR(20) DEFAULT 'active'
CHECK (status IN ('active', 'inactive', 'banned')),
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT users_email_unique UNIQUE (email),
CONSTRAINT users_email_valid
CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
);
-- Auto-update updated_at trigger
CREATE OR REPLACE FUNCTION update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER users_updated_at
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_updated_at();
-- Optimized indexes
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_status_created ON users(status, created_at DESC);
CREATE INDEX idx_users_name_trgm ON users USING GIN (name gin_trgm_ops);
// ❌ BAD: N+1 queries
const users = await db.query('SELECT * FROM users LIMIT 100');
for (const user of users) {
user.teams = await db.query(
'SELECT * FROM user_teams WHERE user_id = $1',
[user.id]
);
}
// Total: 101 queries
// ✅ GOOD: Single query with aggregation
const users = await db.query(`
SELECT
u.id, u.name, u.email,
COALESCE(
json_agg(json_build_object(
'id', t.id,
'name', t.name,
'role', ut.role
)) FILTER (WHERE t.id IS NOT NULL),
'[]'
) as teams
FROM users u
LEFT JOIN user_teams ut ON u.id = ut.user_id
LEFT JOIN teams t ON ut.team_id = t.id
GROUP BY u.id
LIMIT 100
`);
// Total: 1 query
┌─────────────────────────────────────────────────────────┐
│ Index Type Selection │
├─────────────────────────────────────────────────────────┤
│ Equality lookups (=)? │
│ ├── YES + high cardinality → B-Tree (default) │
│ └── YES + low cardinality → Consider partial index │
│ │
│ Range queries (<, >, BETWEEN)? │
│ └── YES → B-Tree │
│ │
│ Pattern matching (LIKE '%text%')? │
│ └── YES → GIN with pg_trgm │
│ │
│ JSON containment? │
│ └── YES → GIN with jsonb_path_ops │
│ │
│ Full-text search? │
│ └── YES → GIN with tsvector │
└─────────────────────────────────────────────────────────┘
-- Composite index (column order = query order)
CREATE INDEX idx_orders_user_status_date
ON orders(user_id, status, created_at DESC);
-- Partial index (smaller, faster)
CREATE INDEX idx_active_users
ON users(email) WHERE status = 'active';
-- Covering index (index-only scan)
CREATE INDEX idx_users_covering
ON users(email) INCLUDE (id, name);
-- Expression index
CREATE INDEX idx_users_email_lower
ON users(LOWER(email));
import Redis from 'ioredis';
const redis = new Redis({ host: 'localhost', port: 6379 });
async function getUser(userId: string): Promise<User | null> {
const cacheKey = `user:${userId}`;
// 1. Try cache first
const cached = await redis.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
// 2. Cache miss - fetch from DB
const user = await db.query(
'SELECT * FROM users WHERE id = $1',
[userId]
);
if (!user) return null;
// 3. Store in cache with TTL
await redis.setex(cacheKey, 3600, JSON.stringify(user));
return user;
}
// Event-driven invalidation
async function updateUser(userId: string, data: Partial<User>) {
// Update database
const user = await db.query(
'UPDATE users SET name = $2 WHERE id = $1 RETURNING *',
[userId, data.name]
);
// Invalidate all related caches
const pipeline = redis.pipeline();
pipeline.del(`user:${userId}`);
pipeline.del(`user:${userId}:teams`);
pipeline.del(`user:${userId}:permissions`);
await pipeline.exec();
// Publish event for other services
await redis.publish('user:updated', JSON.stringify({ userId }));
return user;
}
import Redlock from 'redlock';
const redlock = new Redlock([redis]);
async function getUserWithLock(userId: string): Promise<User | null> {
const cacheKey = `user:${userId}`;
const lockKey = `lock:${cacheKey}`;
const cached = await redis.get(cacheKey);
if (cached) return JSON.parse(cached);
// Acquire lock to prevent stampede
const lock = await redlock.acquire([lockKey], 5000);
try {
// Double-check after acquiring lock
const rechecked = await redis.get(cacheKey);
if (rechecked) return JSON.parse(rechecked);
const user = await db.query('SELECT * FROM users WHERE id = $1', [userId]);
if (user) {
await redis.setex(cacheKey, 3600, JSON.stringify(user));
}
return user;
} finally {
await lock.release();
}
}
// Embedded (1:few, always accessed together)
const userSchema = {
_id: ObjectId,
email: String,
profile: {
name: String,
avatar: String,
bio: String
},
preferences: {
theme: String,
notifications: Boolean
}
};
// Reference (1:many, independent access)
const orderSchema = {
_id: ObjectId,
userId: ObjectId, // Reference
items: [{ productId: ObjectId, qty: Number, price: Number }],
total: Number
};
// Hybrid (denormalized for read performance)
const postSchema = {
_id: ObjectId,
author: {
_id: ObjectId,
name: String, // Denormalized
avatar: String // Denormalized
},
commentCount: Number, // Computed
lastComment: Object // Latest embedded
};
db.orders.aggregate([
{ $match: { status: 'completed', createdAt: { $gte: startDate } } },
{ $unwind: '$items' },
{ $lookup: { from: 'products', localField: 'items.productId', foreignField: '_id', as: 'product' } },
{ $group: {
_id: '$items.productId',
totalRevenue: { $sum: { $multiply: ['$items.price', '$items.qty'] } },
orderCount: { $sum: 1 }
}},
{ $sort: { totalRevenue: -1 } },
{ $limit: 10 }
]);
import { Pool } from 'pg';
const pool = new Pool({
host: process.env.DB_HOST,
database: process.env.DB_NAME,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
// Pool configuration
max: 20, // Max connections
min: 5, // Min connections
idleTimeoutMillis: 30000, // Close idle after 30s
connectionTimeoutMillis: 5000, // Fail if can't connect in 5s
// Keep-alive
keepAlive: true,
keepAliveInitialDelayMillis: 10000,
});
// Monitor pool
pool.on('error', (err) => console.error('Pool error', err));
pool.on('connect', () => console.log('New connection'));
// Query helper
export async function query<T>(sql: string, params?: any[]): Promise<T[]> {
const client = await pool.connect();
try {
const result = await client.query(sql, params);
return result.rows;
} finally {
client.release();
}
}
| Symptom | Root Cause | Solution |
|---|---|---|
| Query timeout | Missing index | Add appropriate index |
| Connection exhausted | Pool too small | Increase pool size |
| Deadlock | Transaction ordering | Consistent lock order |
| Slow bulk insert | Row-by-row | Use COPY or batch |
| Cache inconsistency | Missing invalidation | Event-driven invalidation |
-- 1. Active connections
SELECT count(*) FROM pg_stat_activity WHERE state = 'active';
-- 2. Slow queries
SELECT query, calls, mean_exec_time
FROM pg_stat_statements
ORDER BY mean_exec_time DESC LIMIT 10;
-- 3. Index usage
SELECT relname, seq_scan, idx_scan,
CASE WHEN seq_scan > 0
THEN round(100.0 * idx_scan / (seq_scan + idx_scan), 2)
ELSE 100
END AS idx_pct
FROM pg_stat_user_tables ORDER BY seq_scan DESC;
-- 4. Table bloat
SELECT relname, n_dead_tup, last_vacuum
FROM pg_stat_user_tables ORDER BY n_dead_tup DESC;
-- 5. Lock contention
SELECT relation::regclass, mode, granted
FROM pg_locks WHERE NOT granted;
-- Kill long-running query
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE state = 'active' AND query_start < NOW() - INTERVAL '5 minutes';
-- Force vacuum
VACUUM (VERBOSE, ANALYZE) tablename;
-- Rebuild index
REINDEX INDEX CONCURRENTLY idx_name;
Handoff: Backend implementation → Agent 02 | Infrastructure → Agent 04 | Security → Agent 05
You are an elite AI agent architect specializing in crafting high-performance agent configurations. Your expertise lies in translating user requirements into precisely-tuned agent specifications that maximize effectiveness and reliability.