Automatically manage regression test baseline lifecycle with triggers for releases, deployments, and quality gates.
Automatically manages regression test baselines triggered by releases, merges, deployments, and schedules.
/plugin marketplace add jmagly/aiwg/plugin install sdlc@aiwgThis skill inherits all available tools. When active, it can use any tool Claude has access to.
Automatically manage regression test baseline lifecycle with triggers for releases, deployments, and quality gates.
This skill automates regression baseline management by:
When triggered or activated automatically, this skill:
Detects trigger conditions:
Validates pre-conditions:
Captures new baseline:
Validates baseline quality:
Activates baseline:
Manages lifecycle:
trigger:
type: git_tag
pattern: "^v[0-9]+\\.[0-9]+\\.[0-9]+$"
description: "Baseline on semantic version tags"
behavior:
on_tag_create:
- validate_tag_format
- check_if_tests_passing
- create_baseline:
name: "release-{tag}"
auto_activate: true
- tag_baseline_with_git_tag
example:
tag: v2.1.0
baseline_id: release-v2.1.0
status: auto-created
trigger:
type: git_merge
branches:
- main
- master
description: "Baseline after merge to protected branch"
behavior:
on_merge_complete:
- wait_for_ci_success
- check_test_pass_rate:
minimum: 95%
- create_baseline:
name: "main-{commit_short}"
auto_activate: false # Require approval
- request_approval:
approvers: [tech-lead, test-lead]
example:
merge_commit: abc123def
baseline_id: main-abc123d
approval_required: true
trigger:
type: deployment
environments:
- production
- staging
description: "Baseline after successful deployment"
behavior:
on_deployment_success:
- wait_for_smoke_tests
- verify_health_checks
- create_baseline:
name: "{environment}-{deploy_id}"
auto_activate: true
- link_to_deployment:
id: deploy_id
timestamp: deployment_timestamp
example:
environment: production
deploy_id: prod-2026-01-28-01
baseline_id: production-prod-2026-01-28-01
trigger:
type: manual
description: "Baseline after explicit human approval"
behavior:
on_approval_received:
- validate_approver_permissions
- capture_approval_metadata
- create_baseline:
name: "approved-{timestamp}"
auto_activate: true
- record_approver:
name: approver_name
reason: approval_reason
example:
approver: tech-lead
reason: "Post-release validation complete"
baseline_id: approved-2026-01-28T15-30
trigger:
type: schedule
description: "Baseline on fixed schedule"
schedules:
nightly:
cron: "0 2 * * *"
description: "Nightly baseline capture"
auto_activate: false
weekly:
cron: "0 3 * * 0"
description: "Weekly baseline for long-term tracking"
auto_activate: false
monthly:
cron: "0 4 1 * *"
description: "Monthly baseline archive"
auto_activate: false
behavior:
on_schedule_trigger:
- check_if_tests_healthy
- create_baseline:
name: "{schedule}-{date}"
auto_activate: false
- generate_drift_report
storage:
type: local
location: .aiwg/testing/baselines/
structure:
- functional/
- visual/
- performance/
- api/
retention:
active: 1 # Keep 1 active baseline per type
archive: 10 # Keep 10 archived versions
compress: true # Compress archives with gzip
cleanup:
prune_after_days: 90
keep_tagged: true # Never prune tagged baselines
storage:
type: git_lfs
description: "Use Git LFS for large baselines (screenshots, large JSON)"
configuration:
track_patterns:
- "*.png"
- "*.jpg"
- "baselines/**/*.json"
max_file_size: 10MB
behavior:
on_baseline_create:
- compress_if_needed
- add_to_git_lfs
- commit_with_metadata
storage:
type: cloud
provider: s3 # or gcs, azure_blob
configuration:
bucket: my-project-baselines
path_prefix: baselines/{environment}/
encryption: AES256
behavior:
on_baseline_create:
- upload_to_cloud
- generate_signed_url:
expiry: 7 days
- store_url_in_manifest
- keep_local_copy: false # Optional: delete after upload
retention:
lifecycle_policy:
- after_30_days: transition_to_infrequent_access
- after_90_days: transition_to_glacier
- after_365_days: delete
storage:
type: artifact_registry
description: "Integrate with artifact registries (npm, Maven, Docker)"
configuration:
registry_url: https://registry.example.com
repository: baseline-artifacts
versioning: semver # Follow package versioning
behavior:
on_baseline_create:
- package_baseline:
format: tarball
name: "{project}-baseline"
version: "{tag_version}"
- publish_to_registry
- tag_as_latest: true
validation:
before_activation:
- test_pass_rate:
minimum: 95%
description: "At least 95% of tests must pass"
- quality_gates:
all_must_pass: true
description: "All quality gates must be green"
- stability_check:
runs: 3
consistency: 100%
description: "Baseline must be 100% consistent across 3 runs"
- critical_issues:
max_open: 0
description: "No critical issues can be open"
- human_approval:
required_for:
- production_baselines
- breaking_changes
approvers: [tech-lead, qa-lead]
validation:
diff_detection:
compare_to_previous: true
flag_changes:
- new_failures
- performance_degradation
- unexpected_schema_changes
- visual_differences
severity_classification:
critical:
- new_test_failures
- security_regressions
high:
- performance_degradation_gt_20_percent
- breaking_api_changes
medium:
- new_warnings
- minor_visual_differences
low:
- timestamp_changes
- non-functional_metadata
action_on_critical:
- block_activation
- notify_stakeholders
- require_manual_review
environments:
development:
auto_baseline: true
trigger: merge_to_develop
retention: 5
approval_required: false
staging:
auto_baseline: true
trigger: deployment_success
retention: 10
approval_required: true
approvers: [qa-lead]
production:
auto_baseline: true
trigger: semantic_version_tag
retention: 20
approval_required: true
approvers: [tech-lead, release-manager]
validation_level: strict
selective_updates:
description: "Update only specific baseline types"
update_policies:
functional:
trigger: every_merge
auto_activate: true
visual:
trigger: design_system_update
auto_activate: false
require_approval: true
performance:
trigger: weekly
auto_activate: false
threshold_change: 10% # Only update if >10% improvement
api:
trigger: version_tag
auto_activate: true
breaking_changes_require_approval: true
# .github/workflows/auto-baseline.yml
name: Auto-Baseline Management
on:
push:
tags:
- 'v*.*.*'
workflow_dispatch:
inputs:
baseline_type:
description: 'Baseline type to update'
required: true
type: choice
options:
- all
- functional
- visual
- performance
- api
jobs:
auto_baseline:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run tests and capture baseline
run: |
npm test -- --coverage
npm run test:integration
npm run test:e2e
- name: Create baseline
run: |
npx aiwg baseline create auto-${GITHUB_REF_NAME} \
--type all \
--tag ${GITHUB_REF_NAME} \
--auto-activate
- name: Upload to S3
run: |
aws s3 sync .aiwg/testing/baselines/ \
s3://my-baselines/baselines/${GITHUB_REF_NAME}/
- name: Update manifest
run: |
git add .aiwg/testing/baselines/manifest.yaml
git commit -m "chore: update baseline for ${GITHUB_REF_NAME}"
git push
# .gitlab-ci.yml
baseline_on_release:
stage: deploy
only:
- tags
script:
- npm test
- npx aiwg baseline create release-${CI_COMMIT_TAG}
- npx aiwg baseline upload --storage s3
artifacts:
paths:
- .aiwg/testing/baselines/
expire_in: 30 days
concurrency_control:
mode: lock_file
lock_file: .aiwg/testing/baselines/.lock
on_conflict:
- check_lock_file
- if_locked:
wait_timeout: 300 # 5 minutes
action_on_timeout: fail_with_message
distributed_lock:
enabled: true
backend: redis
key: "baseline_update_lock"
ttl: 600 # 10 minutes
merge_strategy:
on_baseline_conflict:
- prefer: newer_timestamp
- fallback: manual_resolution
resolution_workflow:
- detect_conflict
- preserve_both_baselines
- notify_team_for_manual_merge
- provide_diff_comparison
rollback:
enabled: true
preserve_rollback_history: true
rollback_triggers:
- manual_command
- quality_regression_detected
- deployment_failure
rollback_process:
- identify_previous_baseline
- validate_previous_baseline_integrity
- deactivate_current_baseline
- activate_previous_baseline
- update_manifest
- notify_stakeholders
- record_rollback_reason
example:
current: baseline-v2.1.0
rollback_to: baseline-v2.0.5
reason: "Critical regression in v2.1.0"
triggered_by: tech-lead
# Baseline Diff Report
**Date**: 2026-01-28
**Previous Baseline**: functional-auth-v2
**New Baseline**: functional-auth-v3
**Trigger**: semantic_version_tag v2.1.0
## Executive Summary
**Changes Detected**: 12 differences
**Severity**: Medium
**Recommendation**: Approve with review of API changes
| Category | Previous | New | Change |
|----------|----------|-----|--------|
| Tests Passing | 45/45 | 47/47 | +2 new tests |
| Functional Outputs | 100% match | 98% match | 2 API changes |
| Performance | Baseline | +5% faster | Improvement |
| Visual | Baseline | 100% match | No change |
## Detailed Changes
### Functional Changes
#### Change 1: New field in login response
**Type**: API Schema Change
**Severity**: Medium - Breaking for strict clients
**Previous** (baseline-v2):
```json
{
"token": "jwt.header.payload",
"user": {
"id": "uuid",
"email": "user@example.com"
}
}
New (baseline-v3):
{
"token": "jwt.header.payload",
"user": {
"id": "uuid",
"email": "user@example.com",
"display_name": "John Doe"
}
}
Impact: Clients using strict schema validation may fail Recommendation: Document as breaking change in CHANGELOG
Type: Error Message Update Severity: Low - Informational
Previous:
Error: Invalid input
New:
ValidationError: Email format is invalid. Expected format: user@example.com
Impact: Better user experience, non-breaking Recommendation: Approve
| Metric | Previous | New | Change | Status |
|---|---|---|---|---|
| Login p50 | 45ms | 42ms | -7% | ✅ Improved |
| Login p95 | 120ms | 115ms | -4% | ✅ Improved |
| Registration p50 | 85ms | 80ms | -6% | ✅ Improved |
test/auth/display-name-validation.test.ts
test/auth/error-message-quality.test.ts
To approve this baseline:
aiwg baseline approve functional-auth-v3 \
--reviewed-by "tech-lead" \
--reason "API schema change documented and approved"
To reject and rollback:
aiwg baseline rollback functional-auth-v3 \
--to functional-auth-v2 \
--reason "Breaking change not acceptable"
## Usage Examples
### Auto-Baseline on Release Tag
Event: Git tag v2.1.0 created
Skill activates automatically:
Output: "Auto-Baseline Created: release-v2.1.0
Trigger: Git tag v2.1.0 Previous: release-v2.0.5 Changes: 3 API fields added, 2 tests added Quality: 100% tests passing Stability: 100% consistent (3/3 runs)
Status: Pending approval Approvers: tech-lead, release-manager
Diff report: .aiwg/testing/baseline-diff-v2.1.0.md
Approve with: aiwg baseline approve release-v2.1.0"
### Auto-Baseline on Merge to Main
Event: PR #456 merged to main
Skill activates:
Output: "Auto-Baseline Blocked: main-abc123d
Trigger: Merge to main (commit abc123d) Test pass rate: 98% (44/45) Reason: 1 test flaky, below 100% threshold
Action required:
Or override with:
aiwg baseline create main-abc123d
--force
--approved-by tech-lead
--reason 'Known flaky test acceptable'"
### Scheduled Weekly Baseline
Event: Weekly schedule (Sunday 3 AM)
Skill activates:
Output: "Weekly Baseline Created: weekly-2026-01-28
Previous: weekly-2026-01-21 Drift: Minimal (0.2% performance improvement) New tests: 3 added Test health: 100% passing
Auto-activated: No (scheduled baselines require approval)
Drift report: .aiwg/testing/weekly-drift-2026-01-28.md
Review and approve: aiwg baseline review weekly-2026-01-28"
## Integration
This skill uses:
- `regression-baseline`: Core baseline creation and comparison
- `regression-bisect`: Find commits that changed baseline
- `regression-metrics`: Track baseline drift over time
- `test-coverage`: Ensure baseline captures critical paths
- `issue-auto-sync`: Create issues for baseline approval
- `project-awareness`: Detect CI/CD configuration
## Agent Orchestration
```yaml
agents:
baseline_creation:
agent: test-engineer
focus: Capture and validate baseline
diff_analysis:
agent: test-architect
focus: Analyze baseline changes
approval:
agent: tech-lead
focus: Review and approve baselines
automation:
agent: devops-engineer
focus: CI/CD integration
# aiwg.yml
regression:
auto_baseline:
enabled: true
triggers:
semantic_version_tag:
enabled: true
pattern: "^v[0-9]+\\.[0-9]+\\.[0-9]+$"
auto_activate: false
approval_required: true
merge_to_main:
enabled: true
branches: [main, master]
auto_activate: false
test_pass_threshold: 100%
deployment_success:
enabled: true
environments: [production, staging]
auto_activate: true
scheduled:
enabled: true
nightly:
cron: "0 2 * * *"
auto_activate: false
weekly:
cron: "0 3 * * 0"
auto_activate: false
validation:
minimum_test_pass_rate: 95%
stability_runs: 3
require_human_approval_for:
- production_baselines
- breaking_changes
- critical_severity_changes
storage:
primary: local
backup: s3
retention:
active: 1
archive: 10
compress: true
notifications:
on_baseline_created: [slack, email]
on_approval_needed: [slack, issue_comment]
on_baseline_activated: [slack]
retention:
by_tag:
release_tags: keep_forever
development_tags: 30_days
untagged: 7_days
by_environment:
production: 365_days
staging: 90_days
development: 30_days
by_type:
functional: 90_days
visual: 60_days
performance: 120_days
api: 90_days
compression:
after_days: 7
algorithm: gzip
keep_uncompressed: false
📸 Auto-Baseline Created
Baseline: {baseline_id}
Trigger: {trigger_type}
Environment: {environment}
Status: {status}
Changes from previous:
• {change_count} differences detected
• Severity: {max_severity}
Diff report: {diff_report_url}
{approval_required ? "Approval required from: {approvers}" : "Auto-activated"}
⚠️ Baseline Approval Required
Baseline: {baseline_id}
Reason: {approval_reason}
Changes:
{change_summary}
Review:
{diff_report_url}
Approve:
aiwg baseline approve {baseline_id} --reason "..."
Reject:
aiwg baseline reject {baseline_id} --reason "..."
Expires: {approval_timeout}
✅ Baseline Activated
Baseline: {baseline_id}
Activated at: {activation_timestamp}
Replaces: {previous_baseline_id}
This baseline is now active for regression testing.
All future test runs will be compared against this baseline.
.aiwg/testing/baselines/{type}/.aiwg/testing/baselines/manifest.yaml.aiwg/testing/baseline-diffs/.aiwg/testing/baseline-approvals/.aiwg/testing/baselines/archive/.aiwg/testing/baselines/.lockActivates when the user asks about AI prompts, needs prompt templates, wants to search for prompts, or mentions prompts.chat. Use for discovering, retrieving, and improving prompts.
Search, retrieve, and install Agent Skills from the prompts.chat registry using MCP tools. Use when the user asks to find skills, browse skill catalogs, install a skill for Claude, or extend Claude's capabilities with reusable AI agent components.
Creating algorithmic art using p5.js with seeded randomness and interactive parameter exploration. Use this when users request creating art using code, generative art, algorithmic art, flow fields, or particle systems. Create original algorithmic art rather than copying existing artists' work to avoid copyright violations.