Skill

test-fixture-generator

Install
1
Install the plugin
$
npx claudepluginhub majesticlabs-dev/majestic-marketplace --plugin majestic-data

Want just this skill?

Add to a custom plugin, then install with one command.

Description

Generate synthetic test data with edge cases for ETL pipeline testing.

Tool Access

This skill uses the workspace's default tool permissions.

Skill Content

Test Fixture Generator

Generate test fixtures matching schema specifications with automatic edge case injection.

Core Generator

def generate_fixtures(
    schema: dict,
    count: int = 100,
    edge_cases: bool = True
) -> pd.DataFrame:
    """Generate test data matching schema."""
    data = {}

    for col, spec in schema.items():
        if spec['type'] == 'integer':
            data[col] = generate_integers(count, spec)
        elif spec['type'] == 'string':
            data[col] = generate_strings(count, spec)
        elif spec['type'] == 'date':
            data[col] = generate_dates(count, spec)
        elif spec['type'] == 'float':
            data[col] = generate_floats(count, spec)
        elif spec['type'] == 'boolean':
            data[col] = generate_booleans(count)
        elif spec['type'] == 'enum':
            data[col] = generate_enums(count, spec['values'])

    df = pd.DataFrame(data)

    if edge_cases:
        df = add_edge_cases(df, schema)

    return df

Edge Case Injection

def add_edge_cases(df: pd.DataFrame, schema: dict) -> pd.DataFrame:
    """Add rows with boundary and edge case values."""
    edge_rows = []

    # Null row (where nullable)
    null_row = {
        col: None if spec.get('nullable', True) else df[col].iloc[0]
        for col, spec in schema.items()
    }
    edge_rows.append(null_row)

    # Boundary values per column
    for col, spec in schema.items():
        base_row = df.iloc[0].to_dict()

        if spec['type'] == 'integer':
            edge_rows.append({**base_row, col: spec.get('min', 0)})
            edge_rows.append({**base_row, col: spec.get('max', 2147483647)})

        elif spec['type'] == 'string':
            edge_rows.append({**base_row, col: ''})  # Empty string
            edge_rows.append({**base_row, col: 'a' * spec.get('max_length', 255)})  # Max length

        elif spec['type'] == 'float':
            edge_rows.append({**base_row, col: 0.0})
            edge_rows.append({**base_row, col: spec.get('min', -1e9)})
            edge_rows.append({**base_row, col: spec.get('max', 1e9)})

        elif spec['type'] == 'date':
            edge_rows.append({**base_row, col: datetime(1970, 1, 1)})
            edge_rows.append({**base_row, col: datetime.now()})

    return pd.concat([df, pd.DataFrame(edge_rows)], ignore_index=True)

Type Generators

import random
import string
from datetime import datetime, timedelta

def generate_integers(count: int, spec: dict) -> list:
    min_val = spec.get('min', 0)
    max_val = spec.get('max', 1000000)
    return [random.randint(min_val, max_val) for _ in range(count)]

def generate_floats(count: int, spec: dict) -> list:
    min_val = spec.get('min', 0.0)
    max_val = spec.get('max', 1000000.0)
    precision = spec.get('precision', 2)
    return [round(random.uniform(min_val, max_val), precision) for _ in range(count)]

def generate_strings(count: int, spec: dict) -> list:
    min_len = spec.get('min_length', 1)
    max_len = spec.get('max_length', 50)
    pattern = spec.get('pattern', None)

    if pattern == 'email':
        return [f"user{i}@example.com" for i in range(count)]
    elif pattern == 'phone':
        return [f"+1{random.randint(1000000000, 9999999999)}" for i in range(count)]
    else:
        return [
            ''.join(random.choices(string.ascii_letters, k=random.randint(min_len, max_len)))
            for _ in range(count)
        ]

def generate_dates(count: int, spec: dict) -> list:
    start = spec.get('min', datetime(2020, 1, 1))
    end = spec.get('max', datetime.now())
    delta = (end - start).days
    return [start + timedelta(days=random.randint(0, delta)) for _ in range(count)]

def generate_booleans(count: int) -> list:
    return [random.choice([True, False]) for _ in range(count)]

def generate_enums(count: int, values: list) -> list:
    return [random.choice(values) for _ in range(count)]

Schema Definition Format

# fixtures/orders_schema.yml
columns:
  order_id:
    type: integer
    min: 1
    nullable: false

  customer_email:
    type: string
    pattern: email
    nullable: false

  total_amount:
    type: float
    min: 0.01
    max: 100000.00
    precision: 2

  status:
    type: enum
    values: [pending, confirmed, shipped, delivered, cancelled]

  created_at:
    type: date
    min: 2023-01-01
    nullable: false

Usage

import yaml

# Load schema
with open('fixtures/orders_schema.yml') as f:
    schema = yaml.safe_load(f)['columns']

# Generate fixtures
df = generate_fixtures(schema, count=100, edge_cases=True)

# Save for test use
df.to_csv('tests/fixtures/orders_fixture.csv', index=False)
Stats
Stars30
Forks6
Last CommitJan 19, 2026
Actions

Similar Skills