From clickhouse-pack
Provides production reference architecture for ClickHouse apps: project layout, data flows, multi-tenancy, operational topology in TypeScript/Node.js. For designing/reviewing systems.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin clickhouse-packThis skill is limited to using the following tools:
Production-grade architecture for ClickHouse analytics platforms covering
Designs and optimizes ClickHouse schemas: ORDER BY keys, compression codecs, MergeTree partitioning, projections, indexes for v24.4+ Cloud and self-hosted.
Designs ClickHouse schemas with MergeTree engines, ORDER BY sort keys, partitioning, and codecs for analytical workloads and query optimization.
Advises on ClickHouse architectures for workloads like observability, analytics, and IoT, selecting ingestion/modeling patterns, partitioning, joins, upserts via decision rules with provenance labels.
Share bugs, ideas, or general feedback.
Production-grade architecture for ClickHouse analytics platforms covering project layout, data flow, multi-tenancy, and operational patterns.
my-analytics-platform/
├── src/
│ ├── clickhouse/
│ │ ├── client.ts # Singleton client with health checks
│ │ ├── schemas/ # SQL DDL files (source of truth)
│ │ │ ├── 001-events.sql
│ │ │ ├── 002-users.sql
│ │ │ └── 003-materialized-views.sql
│ │ ├── queries/ # Named query functions
│ │ │ ├── events.ts
│ │ │ ├── users.ts
│ │ │ └── dashboards.ts
│ │ └── migrations/ # Schema migrations
│ │ ├── runner.ts
│ │ └── 001-add-country.sql
│ ├── ingestion/
│ │ ├── webhook-receiver.ts # HTTP webhook endpoint
│ │ ├── kafka-consumer.ts # Kafka consumer (if applicable)
│ │ └── buffer.ts # Insert batching buffer
│ ├── api/
│ │ ├── routes.ts # API endpoints
│ │ └── middleware.ts # Auth, rate limiting
│ └── jobs/
│ ├── daily-rollup.ts # Scheduled aggregations
│ └── cleanup.ts # TTL enforcement
├── tests/
│ ├── unit/
│ └── integration/
├── docker-compose.yml # Local ClickHouse
├── init-db/ # Docker init scripts
└── config/
├── development.env
├── staging.env
└── production.env
┌─────────────────┐
│ Data Sources │
│ (Webhooks, API, │
│ Kafka, S3) │
└────────┬────────┘
│
┌────────▼────────┐
│ Ingestion Layer │
│ (Buffer + Batch │
│ 10K+ rows/ins) │
└────────┬────────┘
│
┌──────────────▼──────────────┐
│ ClickHouse Server │
│ │
│ ┌────────────────────────┐ │
│ │ Raw Event Tables │ │
│ │ (MergeTree, append) │ │
│ └───────────┬────────────┘ │
│ │ │
│ ┌───────────▼────────────┐ │
│ │ Materialized Views │ │
│ │ (Auto-aggregate on │ │
│ │ INSERT — hourly, │ │
│ │ daily, tenant-level) │ │
│ └───────────┬────────────┘ │
│ │ │
│ ┌───────────▼────────────┐ │
│ │ Aggregate Tables │ │
│ │ (AggregatingMergeTree)│ │
│ └────────────────────────┘ │
└──────────────┬──────────────┘
│
┌────────▼────────┐
│ API Layer │
│ (Query aggregate│
│ tables, not │
│ raw events) │
└────────┬────────┘
│
┌────────▼────────┐
│ Dashboards / │
│ Client Apps │
└─────────────────┘
-- Layer 1: Raw events (append-only, full fidelity)
CREATE TABLE analytics.events_raw (
event_id UUID DEFAULT generateUUIDv4(),
tenant_id UInt32,
event_type LowCardinality(String),
user_id UInt64,
properties String CODEC(ZSTD(3)),
created_at DateTime64(3) DEFAULT now64(3)
)
ENGINE = MergeTree()
ORDER BY (tenant_id, event_type, toDate(created_at), user_id)
PARTITION BY toYYYYMM(created_at)
TTL created_at + INTERVAL 90 DAY;
-- Layer 2: Hourly aggregation (auto-populated via materialized view)
CREATE TABLE analytics.events_hourly (
hour DateTime,
tenant_id UInt32,
event_type LowCardinality(String),
cnt UInt64,
users AggregateFunction(uniq, UInt64)
)
ENGINE = AggregatingMergeTree()
ORDER BY (tenant_id, event_type, hour);
CREATE MATERIALIZED VIEW analytics.events_hourly_mv TO analytics.events_hourly AS
SELECT toStartOfHour(created_at) AS hour, tenant_id, event_type,
count() AS cnt, uniqState(user_id) AS users
FROM analytics.events_raw GROUP BY hour, tenant_id, event_type;
-- Layer 3: Daily rollup for dashboards
CREATE TABLE analytics.events_daily (
date Date,
tenant_id UInt32,
total UInt64,
users AggregateFunction(uniq, UInt64)
)
ENGINE = AggregatingMergeTree()
ORDER BY (tenant_id, date);
CREATE MATERIALIZED VIEW analytics.events_daily_mv TO analytics.events_daily AS
SELECT toDate(created_at) AS date, tenant_id,
count() AS total, uniqState(user_id) AS users
FROM analytics.events_raw GROUP BY date, tenant_id;
Approach A: Shared table with tenant_id in ORDER BY (recommended)
-- Tenant_id first in ORDER BY = queries filter on tenant efficiently
ORDER BY (tenant_id, event_type, created_at)
-- Query: only scans data for this tenant
SELECT count() FROM events_raw WHERE tenant_id = 42;
Approach B: Database per tenant (for strict isolation)
CREATE DATABASE tenant_42;
CREATE TABLE tenant_42.events (...) ENGINE = MergeTree() ...;
-- Pros: Full isolation, easy to drop tenant
-- Cons: Schema changes need per-tenant DDL, more operational overhead
Approach C: Row-level security (ClickHouse RBAC)
CREATE ROW POLICY tenant_isolation ON analytics.events_raw
FOR SELECT USING tenant_id = getSetting('custom_tenant_id')
TO app_user;
// src/clickhouse/client.ts
import { createClient, ClickHouseClient } from '@clickhouse/client';
let instance: ClickHouseClient | null = null;
export function getClient(): ClickHouseClient {
if (!instance) {
instance = createClient({
url: process.env.CLICKHOUSE_HOST!,
username: process.env.CLICKHOUSE_USER!,
password: process.env.CLICKHOUSE_PASSWORD!,
database: process.env.CLICKHOUSE_DATABASE ?? 'analytics',
max_open_connections: Number(process.env.CH_MAX_CONNECTIONS ?? 10),
request_timeout: 30_000,
compression: { request: true, response: true },
});
}
return instance;
}
// src/clickhouse/queries/dashboards.ts
export async function getTenantDashboard(tenantId: number, days = 30) {
const client = getClient();
const rs = await client.query({
query: `
SELECT date, sum(total) AS events, uniqMerge(users) AS unique_users
FROM analytics.events_daily
WHERE tenant_id = {tid:UInt32} AND date >= today() - {days:UInt32}
GROUP BY date ORDER BY date
`,
query_params: { tid: tenantId, days },
format: 'JSONEachRow',
});
return rs.json<{ date: string; events: string; unique_users: string }>();
}
| Decision | Choice | Why |
|---|---|---|
| Engine | MergeTree (raw) + AggregatingMergeTree (rollups) | Best for append + pre-agg |
| Multi-tenant | Shared table + tenant_id in ORDER BY | Scales to 10K+ tenants |
| Ingestion | Buffer + batch INSERT | Avoids "too many parts" |
| Aggregation | Materialized views (not cron) | Real-time, zero-lag |
| Format | JSONEachRow | Client support, debugging |
| Compression | ZSTD(3) for strings, Delta for ints | 10-20x compression |
| Issue | Cause | Solution |
|---|---|---|
| Cross-tenant data leak | Missing WHERE tenant_id | Use row policies or middleware |
| Stale dashboard data | MV not created | Verify MV exists and is attached |
| Schema drift | Manual DDL changes | Use migration runner |
| Slow dashboard queries | Querying raw table | Query aggregate tables instead |
For multi-environment configuration, see clickhouse-multi-env-setup.