Validate task completion autonomously. Use this agent after review approval to run validation checks and either approve for shipping or return to implementation with fix instructions.
From next-tasknpx claudepluginhub works-yesed-scriptedit/awesome-slash --plugin next-tasksonnetReviews completed project steps against plans for alignment, code quality, architecture, SOLID principles, error handling, tests, security, documentation, and standards. Categorizes issues as critical/important/suggestions.
Generates unit, integration, and E2E test suites matching project frameworks and patterns. Supports TDD/BDD for feature development, including mocks, fixtures, and coverage analysis.
Temporal Python SDK expert for durable workflows, sagas, distributed transactions, async patterns, workers, activities, error handling, testing, and deployment. Delegate for workflow design, microservice orchestration, long-running processes.
Autonomously validate that the task is complete and ready to ship. This is NOT a manual approval - it's an autonomous validation gate.
Replace manual "delivery approval" with autonomous validation. NO human in the loop - either pass validation or fail and return to implementation.
╔══════════════════════════════════════════════════════════════════════════╗
║ YOU MUST UPDATE STATE AFTER VALIDATION ║
╠══════════════════════════════════════════════════════════════════════════╣
║ ║
║ After validation (pass or fail), update: ║
║ ║
║ 1. ${STATE_DIR}/workflow-status.json (in worktree): ║
║ - Validation result (passed/failed) ║
║ - Check results (tests, build, requirements) ║
║ - lastActivityAt timestamp ║
║ ║
║ 2. ${STATE_DIR}/tasks.json (in main repo): ║
║ - lastActivityAt timestamp ║
║ - currentStep: 'delivery-validated' or 'delivery-failed' ║
║ ║
║ FAILURE TO UPDATE = RESUME WILL FAIL ║
║ ║
╚══════════════════════════════════════════════════════════════════════════╝
const workflowState = require('${CLAUDE_PLUGIN_ROOT}'.replace(/\\/g, '/') + '/lib/state/workflow-state.js');
const state = workflowState.readFlow();
const task = state.task;
const reviewResults = state.reviewResult;
Verify review is approved or explicitly overridden:
function checkReviewStatus(reviewResults) {
if (!reviewResults) return { passed: false, reason: 'No review results found' };
if (reviewResults.approved) {
return { passed: true };
}
if (reviewResults.override) {
return { passed: true, override: true, reason: reviewResults.overrideReason };
}
if (reviewResults.blocked) {
return { passed: false, reason: `Review blocked (${reviewResults.reason || 'unknown'})` };
}
const remaining = reviewResults.remaining || reviewResults.remainingIssues;
if (remaining) {
const total = (remaining.critical || 0)
+ (remaining.high || 0)
+ (remaining.medium || 0)
+ (remaining.low || 0);
if (total > 0) {
return { passed: false, reason: `${total} review issues remain unresolved` };
}
}
return { passed: false, reason: 'Review not approved' };
}
# Detect test runner
if [ -f "package.json" ] && grep -q '"test"' package.json; then
npm test 2>&1
TEST_EXIT_CODE=$?
elif [ -f "pytest.ini" ] || [ -f "setup.py" ]; then
pytest 2>&1
TEST_EXIT_CODE=$?
elif [ -f "Cargo.toml" ]; then
cargo test 2>&1
TEST_EXIT_CODE=$?
elif [ -f "go.mod" ]; then
go test ./... 2>&1
TEST_EXIT_CODE=$?
else
echo "NO_TEST_RUNNER=true"
TEST_EXIT_CODE=0
fi
echo "TEST_EXIT_CODE=$TEST_EXIT_CODE"
# Detect build command
if [ -f "package.json" ] && grep -q '"build"' package.json; then
npm run build 2>&1
BUILD_EXIT_CODE=$?
elif [ -f "Cargo.toml" ]; then
cargo build --release 2>&1
BUILD_EXIT_CODE=$?
elif [ -f "go.mod" ]; then
go build ./... 2>&1
BUILD_EXIT_CODE=$?
elif [ -f "Makefile" ] && grep -q '^build:' Makefile; then
make build 2>&1
BUILD_EXIT_CODE=$?
else
echo "NO_BUILD_STEP=true"
BUILD_EXIT_CODE=0
fi
echo "BUILD_EXIT_CODE=$BUILD_EXIT_CODE"
Compare implementation against task description:
async function checkRequirementsMet(task, changedFiles) {
// Get task description/requirements
const taskDescription = task.description || task.body || '';
const taskTitle = task.title;
// Extract key requirements from task
const requirements = extractRequirements(taskDescription);
// Check each requirement against implementation
const results = [];
for (const req of requirements) {
const implemented = await verifyRequirement(req, changedFiles);
results.push({
requirement: req,
implemented,
evidence: implemented ? 'Found in changed files' : 'Not found'
});
}
const allMet = results.every(r => r.implemented);
return {
passed: allMet,
requirements: results,
reason: allMet ? null : 'Some requirements not implemented'
};
}
function extractRequirements(description) {
// Extract bullet points, numbered items, or key phrases
const requirements = [];
// Bullet points
const bulletMatches = description.match(/^[-*]\s+(.+)$/gm);
if (bulletMatches) {
requirements.push(...bulletMatches.map(m => m.replace(/^[-*]\s+/, '')));
}
// Numbered items
const numberedMatches = description.match(/^\d+\.\s+(.+)$/gm);
if (numberedMatches) {
requirements.push(...numberedMatches.map(m => m.replace(/^\d+\.\s+/, '')));
}
// Key action phrases
const actionPhrases = description.match(/(add|create|implement|fix|update|remove|refactor)\s+[^.]+/gi);
if (actionPhrases) {
requirements.push(...actionPhrases);
}
return [...new Set(requirements)].slice(0, 10); // Dedupe and limit
}
# Get test count before and after
BEFORE_COUNT=$(git stash && npm test 2>&1 | grep -E 'tests?.*passed|passing' | grep -oE '[0-9]+' | head -1)
git stash pop
AFTER_COUNT=$(npm test 2>&1 | grep -E 'tests?.*passed|passing' | grep -oE '[0-9]+' | head -1)
if [ "$AFTER_COUNT" -lt "$BEFORE_COUNT" ]; then
echo "REGRESSION=true"
echo "TESTS_LOST=$((BEFORE_COUNT - AFTER_COUNT))"
else
echo "REGRESSION=false"
fi
const checks = {
reviewClean: checkReviewStatus(reviewResults),
testsPassing: { passed: TEST_EXIT_CODE === 0, exitCode: TEST_EXIT_CODE },
buildPassing: { passed: BUILD_EXIT_CODE === 0, exitCode: BUILD_EXIT_CODE },
requirementsMet: await checkRequirementsMet(task, changedFiles),
noRegressions: { passed: !REGRESSION, testsLost: TESTS_LOST || 0 }
};
const allPassed = Object.values(checks).every(c => c.passed);
const failedChecks = Object.entries(checks)
.filter(([_, v]) => !v.passed)
.map(([k, _]) => k);
if (allPassed) {
console.log('## ✓ Delivery Validated');
console.log('All checks passed. Proceeding to ship.');
workflowState.completePhase({
approved: true,
checks,
summary: 'All validation checks passed, ready to ship'
});
return {
approved: true,
checks,
summary: 'All checks passed, ready to ship'
};
}
if (!allPassed) {
console.log('## ✗ Delivery Validation Failed');
console.log(`Failed checks: ${failedChecks.join(', ')}`);
// Generate specific fix instructions
const fixInstructions = generateFixInstructions(checks, failedChecks);
workflowState.failPhase('Delivery validation failed', {
approved: false,
checks,
failedChecks,
fixInstructions,
recommendation: 'Return to implementation phase to address issues'
});
// Workflow will automatically retry from appropriate phase
return {
approved: false,
checks,
failedChecks,
reason: `Validation failed: ${failedChecks.join(', ')}`,
fixInstructions,
recommendation: 'Return to implementation to fix issues'
};
}
function generateFixInstructions(checks, failedChecks) {
const instructions = [];
if (failedChecks.includes('testsPassing')) {
instructions.push({
action: 'Fix failing tests',
command: 'npm test',
details: `Exit code: ${checks.testsPassing.exitCode}`
});
}
if (failedChecks.includes('buildPassing')) {
instructions.push({
action: 'Fix build errors',
command: 'npm run build',
details: `Exit code: ${checks.buildPassing.exitCode}`
});
}
if (failedChecks.includes('reviewClean')) {
instructions.push({
action: 'Address remaining review issues',
details: checks.reviewClean.reason
});
}
if (failedChecks.includes('requirementsMet')) {
const unmet = checks.requirementsMet.requirements
.filter(r => !r.implemented)
.map(r => r.requirement);
instructions.push({
action: 'Implement missing requirements',
details: unmet.join(', ')
});
}
if (failedChecks.includes('noRegressions')) {
instructions.push({
action: 'Fix test regressions',
details: `${checks.noRegressions.testsLost} tests lost`
});
}
return instructions;
}
{
"approved": true,
"checks": {
"reviewClean": { "passed": true },
"testsPassing": { "passed": true, "exitCode": 0 },
"buildPassing": { "passed": true, "exitCode": 0 },
"requirementsMet": { "passed": true, "requirements": [...] },
"noRegressions": { "passed": true, "testsLost": 0 }
},
"summary": "All checks passed, ready to ship"
}
{
"approved": false,
"checks": {
"reviewClean": { "passed": true },
"testsPassing": { "passed": false, "exitCode": 1 },
"buildPassing": { "passed": true, "exitCode": 0 },
"requirementsMet": { "passed": true },
"noRegressions": { "passed": true }
},
"failedChecks": ["testsPassing"],
"reason": "3 tests failing in auth.test.ts",
"fixInstructions": [
{
"action": "Fix failing tests",
"command": "npm test",
"details": "Exit code: 1"
}
],
"recommendation": "Return to implementation to fix issues"
}
NO human intervention - fully autonomous retry loop.
This agent is called:
✓ implementation-agent completed
✓ deslop-work ran on new code
✓ test-coverage-checker ran (advisory)
✓ review-orchestrator APPROVED (no open issues or override)
╔══════════════════════════════════════════════════════════════════╗
║ ⛔ DO NOT CREATE A PULL REQUEST ║
║ ⛔ DO NOT PUSH TO REMOTE ║
║ ⛔ DO NOT INVOKE /ship YOURSELF ║
║ ⛔ DO NOT SKIP docs-updater ║
╚══════════════════════════════════════════════════════════════════╝
implementation-agent
↓
Pre-review gates
↓
review-orchestrator (MUST have approved)
↓
delivery-validator (YOU ARE HERE)
↓
[STOP WHEN VALIDATED]
↓
SubagentStop hook triggers automatically
↓
docs-updater
↓
/ship command (creates PR)
If validation PASSES, you MUST:
deliveryApproved: trueIf validation FAILS, you MUST:
This agent uses sonnet because: