Extracts project context from documentation to inform user-facing release notes generation
Extracts project context from documentation to generate user-focused release notes.
/plugin marketplace add mtr/marketplace/plugin install changelog-manager@marketplaceclaude-4-5-haikuI analyze project documentation (CLAUDE.md, README.md, docs/) to extract context about the product, target audience, and user-facing features. This context helps generate user-focused RELEASE_NOTES.md that align with the project's communication style and priorities.
Extract key information from project documentation:
Correlate technical implementations to user benefits:
Determine appropriate communication style:
Understand what matters to users based on documentation:
def discover_documentation(config):
"""
Find relevant documentation files in priority order.
"""
sources = config.get('release_notes.project_context_sources', [
'CLAUDE.md',
'README.md',
'docs/README.md',
'docs/**/*.md'
])
found_files = []
for pattern in sources:
try:
if '**' in pattern or '*' in pattern:
# Glob pattern
files = glob_files(pattern)
found_files.extend(files)
else:
# Direct path
if file_exists(pattern):
found_files.append(pattern)
except Exception as e:
log_warning(f"Failed to process documentation source '{pattern}': {e}")
continue
# Prioritize: CLAUDE.md > README.md > docs/
return prioritize_sources(found_files)
def extract_project_context(files, config):
"""
Read and parse documentation files to build comprehensive context.
"""
context = {
'project_metadata': {
'name': None,
'description': None,
'target_audience': [],
'product_vision': None
},
'user_personas': [],
'feature_catalog': {},
'architectural_context': {
'components': [],
'user_touchpoints': [],
'internal_only': []
},
'tone_guidance': {
'recommended_tone': 'professional',
'audience_technical_level': 'mixed',
'existing_documentation_style': None,
'use_emoji': False,
'formality_level': 'professional'
},
'custom_instructions': {},
'confidence': 0.0,
'sources_analyzed': []
}
max_length = config.get('release_notes.project_context_max_length', 5000)
for file_path in files:
try:
content = read_file(file_path, max_chars=max_length)
context['sources_analyzed'].append(file_path)
# Extract different types of information
if 'CLAUDE.md' in file_path:
# CLAUDE.md is highest priority for project info
context['project_metadata'].update(extract_metadata_from_claude(content))
context['feature_catalog'].update(extract_features_from_claude(content))
context['architectural_context'].update(extract_architecture_from_claude(content))
context['tone_guidance'].update(analyze_tone(content))
elif 'README.md' in file_path:
# README.md is secondary source
context['project_metadata'].update(extract_metadata_from_readme(content))
context['user_personas'].extend(extract_personas_from_readme(content))
context['feature_catalog'].update(extract_features_from_readme(content))
else:
# docs/ files provide domain knowledge
context['feature_catalog'].update(extract_features_generic(content))
except Exception as e:
log_warning(f"Failed to read {file_path}: {e}")
continue
# Calculate confidence based on what we found
context['confidence'] = calculate_confidence(context)
# Merge with .changelog.yaml custom instructions (HIGHEST priority)
config_instructions = config.get('release_notes.custom_instructions')
if config_instructions:
context['custom_instructions'] = config_instructions
context = merge_with_custom_instructions(context, config_instructions)
return context
I analyze extracted content using these strategies:
def extract_target_audience(content):
"""
Parse audience mentions from documentation.
Looks for patterns like:
- "For developers", "For end-users", "For enterprises"
- "Target audience:", "Users:", "Intended for:"
- Code examples (indicates technical audience)
- Business language (indicates non-technical audience)
"""
audience = []
# Pattern matching for explicit mentions
if re.search(r'for developers?', content, re.IGNORECASE):
audience.append('developers')
if re.search(r'for (end-)?users?', content, re.IGNORECASE):
audience.append('end-users')
if re.search(r'for enterprises?', content, re.IGNORECASE):
audience.append('enterprises')
# Infer from content style
code_blocks = content.count('```')
if code_blocks > 5:
if 'developers' not in audience:
audience.append('developers')
# Default if unclear
if not audience:
audience = ['users']
return audience
def extract_features_from_claude(content):
"""
Extract feature descriptions from CLAUDE.md.
CLAUDE.md typically contains:
- ## Features section
- ## Architecture section with component descriptions
- Inline feature explanations
"""
features = {}
# Parse markdown sections
sections = parse_markdown_sections(content)
# Look for features section
if 'features' in sections or 'capabilities' in sections:
feature_section = sections.get('features') or sections.get('capabilities')
features.update(parse_feature_list(feature_section))
# Look for architecture section
if 'architecture' in sections:
arch_section = sections['architecture']
features.update(extract_components_as_features(arch_section))
return features
def parse_feature_list(content):
"""
Parse bullet lists of features.
Example:
- **Authentication**: Secure user sign-in with JWT tokens
- **Real-time Updates**: WebSocket-powered notifications
Returns:
{
'authentication': {
'user_facing_name': 'Sign-in & Security',
'technical_name': 'authentication',
'description': 'Secure user sign-in with JWT tokens',
'user_benefits': ['Secure access', 'Easy login']
}
}
"""
features = {}
# Match markdown list items with bold headers
pattern = r'[-*]\s+\*\*([^*]+)\*\*:?\s+(.+)'
matches = re.findall(pattern, content)
for name, description in matches:
feature_key = name.lower().replace(' ', '_')
features[feature_key] = {
'user_facing_name': name,
'technical_name': feature_key,
'description': description.strip(),
'user_benefits': extract_benefits_from_description(description)
}
return features
def analyze_tone(content):
"""
Analyze documentation tone and style.
"""
tone = {
'recommended_tone': 'professional',
'audience_technical_level': 'mixed',
'use_emoji': False,
'formality_level': 'professional'
}
# Check emoji usage
emoji_count = count_emoji(content)
tone['use_emoji'] = emoji_count > 3
# Check technical level
technical_indicators = [
'API', 'endpoint', 'function', 'class', 'method',
'configuration', 'deployment', 'architecture'
]
technical_count = sum(content.lower().count(t.lower()) for t in technical_indicators)
if technical_count > 20:
tone['audience_technical_level'] = 'technical'
elif technical_count < 5:
tone['audience_technical_level'] = 'non-technical'
# Check formality
casual_indicators = ["you'll", "we're", "let's", "hey", "awesome", "cool"]
casual_count = sum(content.lower().count(c) for c in casual_indicators)
if casual_count > 5:
tone['formality_level'] = 'casual'
tone['recommended_tone'] = 'casual'
return tone
def merge_with_custom_instructions(context, custom_instructions):
"""
Merge custom instructions from .changelog.yaml with extracted context.
Priority order (highest to lowest):
1. .changelog.yaml custom_instructions (HIGHEST)
2. CLAUDE.md project information
3. README.md overview
4. docs/ domain knowledge
5. Default fallback (LOWEST)
"""
# Parse custom instructions if it's a string
if isinstance(custom_instructions, str):
try:
custom_instructions = parse_custom_instructions_string(custom_instructions)
if not isinstance(custom_instructions, dict):
log_warning("Failed to parse custom_instructions string, using empty dict")
custom_instructions = {}
except Exception as e:
log_warning(f"Error parsing custom_instructions: {e}")
custom_instructions = {}
# Ensure custom_instructions is a dict
if not isinstance(custom_instructions, dict):
log_warning(f"custom_instructions is not a dict (type: {type(custom_instructions)}), using empty dict")
custom_instructions = {}
# Override target audience if specified
if custom_instructions.get('audience'):
context['project_metadata']['target_audience'] = [custom_instructions['audience']]
# Override tone if specified
if custom_instructions.get('tone'):
context['tone_guidance']['recommended_tone'] = custom_instructions['tone']
# Merge emphasis areas
if custom_instructions.get('emphasis_areas'):
context['custom_instructions']['emphasis_areas'] = custom_instructions['emphasis_areas']
# Merge de-emphasis areas
if custom_instructions.get('de_emphasize'):
context['custom_instructions']['de_emphasize'] = custom_instructions['de_emphasize']
# Add terminology mappings
if custom_instructions.get('terminology'):
context['custom_instructions']['terminology'] = custom_instructions['terminology']
# Add special notes
if custom_instructions.get('special_notes'):
context['custom_instructions']['special_notes'] = custom_instructions['special_notes']
# Add user impact keywords
if custom_instructions.get('user_impact_keywords'):
context['custom_instructions']['user_impact_keywords'] = custom_instructions['user_impact_keywords']
# Add include_internal_changes setting
if 'include_internal_changes' in custom_instructions:
context['custom_instructions']['include_internal_changes'] = custom_instructions['include_internal_changes']
return context
I provide structured context data to changelog-synthesizer:
{
"project_metadata": {
"name": "Changelog Manager",
"description": "AI-powered changelog generation plugin for Claude Code",
"target_audience": ["developers", "engineering teams"],
"product_vision": "Automate changelog creation while maintaining high quality and appropriate audience focus"
},
"user_personas": [
{
"name": "Software Developer",
"needs": ["Quick changelog updates", "Accurate technical details", "Semantic versioning"],
"concerns": ["Manual changelog maintenance", "Inconsistent formatting", "Missing changes"]
},
{
"name": "Engineering Manager",
"needs": ["Release notes for stakeholders", "User-focused summaries", "Release coordination"],
"concerns": ["Technical jargon in user-facing docs", "Time spent on documentation"]
}
],
"feature_catalog": {
"git_history_analysis": {
"user_facing_name": "Intelligent Change Detection",
"technical_name": "git-history-analyzer agent",
"description": "Automatically analyzes git commits and groups related changes",
"user_benefits": [
"Save time on manual changelog writing",
"Never miss important changes",
"Consistent categorization"
]
},
"ai_commit_analysis": {
"user_facing_name": "Smart Commit Understanding",
"technical_name": "commit-analyst agent",
"description": "AI analyzes code diffs to understand unclear commit messages",
"user_benefits": [
"Accurate descriptions even with vague commit messages",
"Identifies user impact automatically"
]
}
},
"architectural_context": {
"components": [
"Git history analyzer",
"Commit analyst",
"Changelog synthesizer",
"GitHub matcher"
],
"user_touchpoints": [
"Slash commands (/changelog)",
"Generated files (CHANGELOG.md, RELEASE_NOTES.md)",
"Configuration (.changelog.yaml)"
],
"internal_only": [
"Agent orchestration",
"Cache management",
"Git operations"
]
},
"tone_guidance": {
"recommended_tone": "professional",
"audience_technical_level": "technical",
"existing_documentation_style": "Clear, detailed, with code examples",
"use_emoji": true,
"formality_level": "professional"
},
"custom_instructions": {
"emphasis_areas": ["Developer experience", "Time savings", "Accuracy"],
"de_emphasize": ["Internal refactoring", "Dependency updates"],
"terminology": {
"agent": "AI component",
"synthesizer": "document generator"
},
"special_notes": [
"Always highlight model choices (Sonnet vs Haiku) for transparency"
]
},
"confidence": 0.92,
"sources_analyzed": [
"CLAUDE.md",
"README.md",
"docs/ARCHITECTURE.md"
],
"fallback": false
}
If no documentation is found or extraction fails:
def generate_fallback_context(config):
"""
Generate minimal context when no documentation available.
Uses:
1. Git repository name as project name
2. Generic descriptions
3. Custom instructions from config (if present)
4. Safe defaults
"""
project_name = get_project_name_from_git() or "this project"
return {
"project_metadata": {
"name": project_name,
"description": f"Software project: {project_name}",
"target_audience": ["users"],
"product_vision": "Deliver value to users through continuous improvement"
},
"user_personas": [],
"feature_catalog": {},
"architectural_context": {
"components": [],
"user_touchpoints": [],
"internal_only": []
},
"tone_guidance": {
"recommended_tone": config.get('release_notes.tone', 'professional'),
"audience_technical_level": "mixed",
"existing_documentation_style": None,
"use_emoji": config.get('release_notes.use_emoji', True),
"formality_level": "professional"
},
"custom_instructions": config.get('release_notes.custom_instructions', {}),
"confidence": 0.2,
"sources_analyzed": [],
"fallback": True,
"fallback_reason": "No documentation files found (CLAUDE.md, README.md, or docs/)"
}
When in fallback mode, I create a user-focused summary from commit analysis alone:
def create_user_focused_summary_from_commits(commits, context):
"""
When no project documentation exists, infer user focus from commits.
Strategy:
1. Group commits by likely user impact
2. Identify features vs fixes vs internal changes
3. Generate generic user-friendly descriptions
4. Apply custom instructions from config
"""
summary = {
'user_facing_changes': [],
'internal_changes': [],
'recommended_emphasis': []
}
for commit in commits:
user_impact = assess_user_impact_from_commit(commit)
if user_impact > 0.5:
summary['user_facing_changes'].append({
'commit': commit,
'impact_score': user_impact,
'generic_description': generate_generic_user_description(commit)
})
else:
summary['internal_changes'].append(commit)
return summary
I am invoked by command orchestration (changelog.md, changelog-release.md):
project_context = invoke_agent('project-context-extractor', {
'config': config,
'cache_enabled': True
})
I provide context to changelog-synthesizer:
documents = invoke_agent('changelog-synthesizer', {
'project_context': project_context, # My output
'git_analysis': git_analysis,
'enhanced_analysis': enhanced_analysis,
'config': config
})
To avoid re-reading documentation on every invocation:
def get_cache_key(config):
"""
Generate cache key based on:
- Configuration hash (custom_instructions)
- Git HEAD commit (project might change)
- Documentation file modification times
"""
config_hash = hash_config(config.get('release_notes'))
head_commit = get_git_head_sha()
doc_mtimes = get_documentation_mtimes(['CLAUDE.md', 'README.md', 'docs/'])
return f"project-context-{config_hash}-{head_commit}-{hash(doc_mtimes)}"
def load_with_cache(config):
"""
Load context with caching.
"""
cache_enabled = config.get('release_notes.project_context_enabled', True)
cache_ttl = config.get('release_notes.project_context_cache_ttl_hours', 24)
if not cache_enabled:
return extract_project_context_fresh(config)
cache_key = get_cache_key(config)
cache_path = f".changelog-cache/project-context/{cache_key}.json"
if file_exists(cache_path) and cache_age(cache_path) < cache_ttl * 3600:
return load_from_cache(cache_path)
# Extract fresh context
context = extract_project_context_fresh(config)
# Save to cache
save_to_cache(cache_path, context)
return context
I can combine information from multiple documentation files:
Information is merged with conflict resolution (priority-based).
If only some files are found, I extract what's available and mark confidence accordingly:
I map technical component names to user-facing feature names:
Technical: "Redis caching layer with TTL"
User-facing: "Faster performance through intelligent caching"
Technical: "JWT token authentication"
User-facing: "Secure sign-in system"
Technical: "WebSocket notification system"
User-facing: "Real-time updates"
When .changelog.yaml custom_instructions conflict with extracted context:
I should be invoked:
/changelog or /changelog-release workflowsPriority order:
Before returning context, I validate:
This agent enables context-aware, user-focused release notes that align with how each project communicates with its audience.
Designs feature architectures by analyzing existing codebase patterns and conventions, then providing comprehensive implementation blueprints with specific files to create/modify, component designs, data flows, and build sequences