**Trigger**: Use when implementing data validation for new features, APIs, or database models
Implements systematic data validation using Pydantic v2, SQLModel, and Great Expectations for APIs and databases.
/plugin marketplace add greyhaven-ai/claude-code-config/plugin install data-quality@grey-haven-pluginsTrigger: Use when implementing data validation for new features, APIs, or database models
Purpose: Systematically implement Pydantic v2 validation, ensure database schema alignment with PostgreSQL, and set up data quality monitoring
pip install pydantic[email]>=2.0)pip install sqlmodel)pip install great-expectations)pip install pytest)8-Phase systematic approach (90-180 minutes total):
Define what needs validation:
Identify data fields:
Define validation rules:
Database considerations:
Example Requirements:
User Registration:
- email: Valid format, unique
- username: 3-30 chars, alphanumeric + hyphens/underscores, unique
- password: Min 12 chars, complexity rules
- age: 13-120, integer
- role: One of [user, admin, moderator], default 'user'
- Business rule: Admins must be 18+
Use TDD methodology with tdd-python-implementer agent.
Process:
Test Structure:
# tests/validation/test_user_schema.py
import pytest
from pydantic import ValidationError
from app.schemas.user import UserCreateSchema
class TestUserCreateSchema:
def test_valid_user_data(self):
"""Valid user data passes validation."""
data = {
"email": "test@example.com",
"username": "test_user",
"password": "SecurePass123!",
"age": 25,
"role": "user"
}
user = UserCreateSchema(**data)
assert user.email == "test@example.com"
def test_invalid_email_format(self):
"""Invalid email raises ValidationError."""
with pytest.raises(ValidationError):
UserCreateSchema(
email="invalid-email",
username="test",
password="SecurePass123!",
age=25
)
Create production-ready Pydantic v2 models:
# app/schemas/user.py
from pydantic import (
BaseModel,
Field,
field_validator,
model_validator,
EmailStr,
constr,
conint
)
from typing import Literal
from datetime import datetime
from uuid import UUID
class UserCreateSchema(BaseModel):
"""User creation data contract."""
# Field definitions with constraints
email: EmailStr = Field(
...,
description="User email address",
examples=["user@greyhaven.io"]
)
username: constr(
min_length=3,
max_length=30,
pattern=r'^[a-zA-Z0-9_-]+$'
) = Field(
...,
description="Username (alphanumeric, hyphens, underscores)"
)
password: constr(min_length=12) = Field(
...,
description="Password (min 12 characters)"
)
age: conint(ge=13, le=120) = Field(
...,
description="User age (13-120)"
)
role: Literal['user', 'admin', 'moderator'] = Field(
default='user',
description="User role"
)
# Field-level validators
@field_validator('password')
@classmethod
def password_complexity(cls, v: str) -> str:
"""Validate password complexity."""
if not any(c.isupper() for c in v):
raise ValueError('Password must contain uppercase letter')
if not any(c.islower() for c in v):
raise ValueError('Password must contain lowercase letter')
if not any(c.isdigit() for c in v):
raise ValueError('Password must contain digit')
if not any(c in '!@#$%^&*()_+-=' for c in v):
raise ValueError('Password must contain special character')
return v
# Model-level validators
@model_validator(mode='after')
def validate_admin_age(self):
"""Admins must be 18+."""
if self.role == 'admin' and self.age < 18:
raise ValueError('Admin users must be 18 or older')
return self
# Pydantic v2 configuration
model_config = {
'str_strip_whitespace': True,
'validate_assignment': True,
'json_schema_extra': {
'title': 'User Creation Schema v1.0',
'examples': [{
'email': 'alice@greyhaven.io',
'username': 'alice_dev',
'password': 'SecurePass123!',
'age': 25,
'role': 'user'
}]
}
}
class UserResponseSchema(BaseModel):
"""User data returned from API."""
id: UUID
email: EmailStr
username: str
role: str
created_at: datetime
updated_at: datetime
model_config = {
'from_attributes': True, # For ORM compatibility
}
Key Pydantic v2 Patterns:
Field() for metadata and constraints@field_validator for single-field validation@model_validator for cross-field validationmodel_config dict (not class Config)model_validate() method (not parse_obj)model_dump() method (not dict())Ensure Pydantic models match SQLAlchemy/SQLModel schema:
# app/models/user.py
from sqlmodel import SQLModel, Field
from datetime import datetime
from uuid import UUID, uuid4
from enum import Enum
class UserRole(str, Enum):
USER = 'user'
ADMIN = 'admin'
MODERATOR = 'moderator'
class User(SQLModel, table=True):
"""User model for PostgreSQL."""
__tablename__ = 'users'
id: UUID = Field(default_factory=uuid4, primary_key=True)
email: str = Field(max_length=255, unique=True, index=True)
username: str = Field(max_length=30, unique=True, index=True)
password_hash: str = Field(max_length=255)
age: int
role: UserRole = Field(default=UserRole.USER)
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
Schema Alignment Test:
# tests/validation/test_schema_alignment.py
def test_pydantic_sqlalchemy_alignment():
"""Ensure Pydantic fields match SQLAlchemy columns."""
from app.schemas.user import UserCreateSchema
from app.models.user import User
pydantic_fields = set(UserCreateSchema.model_fields.keys())
sqlalchemy_columns = set(User.__table__.columns.keys())
expected_columns = {'email', 'username', 'age', 'role'}
assert expected_columns.issubset(sqlalchemy_columns)
Integrate validation into API endpoints:
# API handler with validation
from pydantic import ValidationError
from app.schemas.user import UserCreateSchema, UserResponseSchema
class ValidationErrorFormatter:
"""Format Pydantic errors for API responses."""
@staticmethod
def format_for_api(e: ValidationError) -> dict:
"""Format validation errors."""
errors = {}
for error in e.errors():
field = '.'.join(str(loc) for loc in error['loc'])
message = error['msg']
if field not in errors:
errors[field] = []
errors[field].append(message)
return {
'success': False,
'error': 'validation_error',
'message': 'Request validation failed',
'errors': errors
}
async def create_user_handler(request):
"""Handle POST /api/users."""
try:
# 1. Parse and validate request body
data = await request.json()
user_data = UserCreateSchema.model_validate(data)
# 2. Create user (hash password, save to DB)
# ... implementation
# 3. Return validated response
return UserResponseSchema(...)
except ValidationError as e:
return ValidationErrorFormatter.format_for_api(e), 400
Set up Great Expectations for continuous validation:
# data_quality/user_expectations.py
import great_expectations as ge
def create_user_expectations():
"""Define expectations for user data quality."""
context = ge.get_context()
# Create expectation suite
suite = context.add_expectation_suite("user_data_quality")
# Define expectations
suite.expect_column_values_to_not_be_null("email")
suite.expect_column_values_to_be_unique("email")
suite.expect_column_values_to_match_regex("email", r'^[^@]+@[^@]+\.[^@]+$')
suite.expect_column_values_to_be_between("age", min_value=13, max_value=120)
return suite
def validate_batch(df):
"""Validate data batch."""
context = ge.get_context()
batch = context.get_batch(df, "user_data_quality")
results = batch.validate()
return results.success, results.statistics
Add validation metrics:
# Prometheus metrics
from prometheus_client import Counter, Histogram
validation_errors = Counter(
'validation_errors_total',
'Total validation errors',
['field', 'error_type']
)
validation_duration = Histogram(
'validation_duration_seconds',
'Time spent validating requests'
)
# Track in handler
with validation_duration.time():
user_data = UserCreateSchema.model_validate(data)
Generate OpenAPI specs from Pydantic models:
# Generate OpenAPI specification
spec = {
'openapi': '3.1.0',
'info': {'title': 'Grey Haven API', 'version': '1.0.0'},
'paths': {
'/api/users': {
'post': {
'summary': 'Create new user',
'requestBody': {
'content': {
'application/json': {
'schema': UserCreateSchema.model_json_schema()
}
}
},
'responses': {
'200': {
'content': {
'application/json': {
'schema': UserResponseSchema.model_json_schema()
}
}
}
}
}
}
}
}
All supporting files are under 500 lines per Anthropic best practices:
examples/ - Complete validation workflow examples
reference/ - Pydantic v2 and validation references
templates/ - Copy-paste ready templates
checklists/ - Validation implementation checklist
Use this workflow when:
model_config dict, not class Config