Files
ClaudeForge/skill/analyzer.py
T
Claude a45e6f5dbd feat(plugin): ship as Claude Code plugin with 150-line cap and sync command
Turns ClaudeForge into an installable Claude Code plugin and adds the
missing pieces for a clean lifecycle: hard line-cap enforcement, modular
chaining via @path imports, a sync/cleanup command, and Explore-agent
delegation for project discovery.

- .claude-plugin/plugin.json: plugin manifest registering both skills,
  both commands, and the guardian agent (installable via /plugin marketplace
  add alirezarezvani/ClaudeForge && /plugin install claudeforge)
- skill/validator.py: MAX_RECOMMENDED_LINES 300 -> 150 (warning at 120)
- skill/template_selector.py: target_lines capped at 150 across all
  team sizes (solo 75 / small 100 / medium 125 / large 150) so any
  single CLAUDE.md fits within the cap; bigger projects spread content
  across modular sub-files
- skill/analyzer.py: length thresholds, quality scoring, and recommendations
  rebased on the 150 cap (was 300/400)
- skill/generator.py: modular root now emits @path imports next to the
  human-readable links; every sub-CLAUDE.md gets a back-link header
  pointing to ../CLAUDE.md (markdown + @import) for bidirectional chaining
- command/sync-claude-md.md: new /sync-claude-md command that inventories
  every CLAUDE.md, prunes stale references, enforces the 150 cap by
  splitting into sub-files, and repairs the root <-> sub chain
- command/enhance-claude-md.md: discovery phase now delegates the deep
  codebase scan to the Explore subagent to keep context lean
- install.sh / install.ps1: each command in command/ installs as its own
  ~/.claude/commands/<name>.md (legacy bundle backed up on upgrade)
- skill/SKILL.md, CLAUDE.md, README.md, CHANGELOG.md: docs updated for
  the plugin install path, sync command, and new line cap

Verified via smoke test: validator constants, template targets, generator
output line counts across 5 presets (all <= 150), context files with
backlinks, @-import chain in modular root, idempotent merge_with_existing,
validator status transitions at the new cap, analyzer quality differential,
and plugin manifest JSON shape with all referenced paths existing on disk.
2026-05-19 02:03:59 +00:00

390 lines
13 KiB
Python

"""
CLAUDE.md File Analyzer
Analyzes existing CLAUDE.md files to identify structure, sections, and quality issues.
Provides detailed analysis reports with quality scores and actionable recommendations.
"""
from typing import Dict, List, Any, Tuple
import re
class CLAUDEMDAnalyzer:
"""Analyzes CLAUDE.md files for structure, completeness, and quality."""
# Standard sections that should be present in most CLAUDE.md files
RECOMMENDED_SECTIONS = [
"Quick Navigation",
"Core Principles",
"Tech Stack",
"Workflow Instructions",
"Quality Checklist",
"File Organization",
"Common Commands",
"References"
]
# Optional but valuable sections
OPTIONAL_SECTIONS = [
"Testing Requirements",
"Error Handling Patterns",
"Documentation Standards",
"Performance Guidelines",
"Security Checklist",
"Deployment Process",
"Troubleshooting"
]
def __init__(self, content: str):
"""
Initialize analyzer with CLAUDE.md file content.
Args:
content: Full text content of CLAUDE.md file
"""
self.content = content
self.lines = content.split('\n')
self.line_count = len(self.lines)
self.char_count = len(content)
self.sections = []
self.subsections = []
def analyze_file(self) -> Dict[str, Any]:
"""
Perform comprehensive analysis of CLAUDE.md file.
Returns:
Dictionary containing full analysis results
"""
return {
"file_metrics": self._get_file_metrics(),
"sections_found": self.detect_sections(),
"missing_sections": self._identify_missing_sections(),
"structure_analysis": self._analyze_structure(),
"issues": self._detect_issues(),
"quality_score": self.calculate_quality_score(),
"recommendations": self.generate_recommendations()
}
def _get_file_metrics(self) -> Dict[str, int]:
"""Calculate basic file metrics."""
return {
"char_count": self.char_count,
"line_count": self.line_count,
"word_count": len(self.content.split()),
"heading_count": len([line for line in self.lines if line.startswith('#')]),
"code_block_count": self.content.count('```') // 2
}
def detect_sections(self) -> List[str]:
"""
Detect all sections (headings) in the file.
Returns:
List of section titles found
"""
sections = []
subsections = []
for line in self.lines:
# Match markdown headings (## or ###)
if line.startswith('## '):
section_title = line[3:].strip()
sections.append(section_title)
elif line.startswith('### '):
subsection_title = line[4:].strip()
subsections.append(subsection_title)
self.sections = sections
self.subsections = subsections
return sections
def _identify_missing_sections(self) -> List[str]:
"""
Identify recommended sections that are missing.
Returns:
List of missing section names
"""
if not self.sections:
self.detect_sections()
missing = []
for recommended in self.RECOMMENDED_SECTIONS:
# Check if section exists (case-insensitive, partial match)
if not any(recommended.lower() in section.lower() for section in self.sections):
missing.append(recommended)
return missing
def _analyze_structure(self) -> Dict[str, Any]:
"""
Analyze the structural quality of the file.
Returns:
Dictionary with structure analysis
"""
has_title = self.content.startswith('# ')
has_navigation = any('navigation' in s.lower() for s in self.sections)
has_code_examples = '```' in self.content
has_links = '[' in self.content and '](' in self.content
# Check for modular architecture mentions
mentions_modular = any(
keyword in self.content.lower()
for keyword in ['backend/CLAUDE.md', 'frontend/CLAUDE.md', 'subdirectory', 'context-specific']
)
return {
"has_main_title": has_title,
"has_navigation_section": has_navigation,
"has_code_examples": has_code_examples,
"has_links": has_links,
"mentions_modular_architecture": mentions_modular,
"section_count": len(self.sections),
"subsection_count": len(self.subsections),
"hierarchy_depth": self._calculate_hierarchy_depth()
}
def _calculate_hierarchy_depth(self) -> int:
"""Calculate maximum heading depth."""
max_depth = 1 # Assumes at least # title
for line in self.lines:
if line.startswith('#'):
depth = len(line) - len(line.lstrip('#'))
max_depth = max(max_depth, depth)
return max_depth
def _detect_issues(self) -> List[Dict[str, str]]:
"""
Detect potential issues with the file.
Returns:
List of issue dictionaries with type, severity, and message
"""
issues = []
# Check file length
if self.line_count > 250:
issues.append({
"type": "length_critical",
"severity": "high",
"message": f"File is too long ({self.line_count} lines). Hard cap is 150; split into modular files."
})
elif self.line_count > 150:
issues.append({
"type": "length_warning",
"severity": "high",
"message": f"File exceeds the 150-line cap ({self.line_count} lines). Split now."
})
elif self.line_count > 120:
issues.append({
"type": "length_warning",
"severity": "medium",
"message": f"File is approaching the 150-line cap ({self.line_count} lines)."
})
# Check if file is too short
if self.line_count < 30:
issues.append({
"type": "too_short",
"severity": "medium",
"message": f"File is very short ({self.line_count} lines). May need more guidance."
})
# Check for missing critical sections
critical_sections = ["Core Principles", "Tech Stack", "Workflow"]
missing_critical = [
s for s in critical_sections
if not any(s.lower() in section.lower() for section in self.sections)
]
if missing_critical:
issues.append({
"type": "missing_critical_sections",
"severity": "high",
"message": f"Missing critical sections: {', '.join(missing_critical)}"
})
# Check for placeholder text
placeholders = ['TODO', 'TBD', 'FIXME', '[Insert', '[Add']
for placeholder in placeholders:
if placeholder in self.content:
issues.append({
"type": "placeholder_text",
"severity": "medium",
"message": f"Contains placeholder text: '{placeholder}'"
})
break
# Check for empty sections
empty_section_pattern = r'##\s+[^\n]+\n\s*\n\s*##'
if re.search(empty_section_pattern, self.content):
issues.append({
"type": "empty_sections",
"severity": "low",
"message": "Some sections appear to be empty"
})
return issues
def calculate_quality_score(self) -> int:
"""
Calculate overall quality score (0-100).
Scoring breakdown:
- Length appropriateness: 25 points
- Section completeness: 25 points
- Formatting quality: 20 points
- Content specificity: 15 points
- Modular organization: 15 points
Returns:
Quality score between 0 and 100
"""
score = 0
# Length appropriateness (25 points). Hard cap is 150 lines; anything
# above that loses points sharply because it indicates context bloat.
if 50 <= self.line_count <= 150:
score += 25
elif 30 <= self.line_count < 50 or 150 < self.line_count <= 200:
score += 15
elif self.line_count > 200:
score += 5
else:
score += 10
# Section completeness (25 points)
if not self.sections:
self.detect_sections()
found_count = len([
s for s in self.RECOMMENDED_SECTIONS
if any(s.lower() in section.lower() for section in self.sections)
])
section_score = (found_count / len(self.RECOMMENDED_SECTIONS)) * 25
score += int(section_score)
# Formatting quality (20 points)
formatting_score = 0
if self.content.startswith('# '):
formatting_score += 5
if '```' in self.content:
formatting_score += 5
if '[' in self.content and '](' in self.content:
formatting_score += 5
if any('navigation' in s.lower() for s in self.sections):
formatting_score += 5
score += formatting_score
# Content specificity (15 points)
# Check for specific tech mentions (not generic)
tech_keywords = [
'typescript', 'python', 'react', 'vue', 'angular', 'node',
'fastapi', 'django', 'postgresql', 'mongodb', 'docker'
]
content_lower = self.content.lower()
tech_mentions = sum(1 for keyword in tech_keywords if keyword in content_lower)
if tech_mentions >= 3:
score += 15
elif tech_mentions >= 2:
score += 10
elif tech_mentions >= 1:
score += 5
# Modular organization (15 points)
modular_keywords = [
'backend/CLAUDE.md', 'frontend/CLAUDE.md', 'context-specific',
'subdirectory', 'modular'
]
modular_mentions = sum(1 for keyword in modular_keywords if keyword.lower() in content_lower)
if modular_mentions >= 2:
score += 15
elif modular_mentions >= 1:
score += 10
return min(score, 100)
def generate_recommendations(self) -> List[str]:
"""
Generate actionable recommendations for improvement.
Returns:
List of recommendation strings
"""
recommendations = []
# Analyze first to ensure data is available
if not self.sections:
self.detect_sections()
missing = self._identify_missing_sections()
issues = self._detect_issues()
# Critical issues first
for issue in issues:
if issue['severity'] == 'high':
if issue['type'] == 'length_critical':
recommendations.append(
"CRITICAL: Split into modular files - create backend/CLAUDE.md, "
"frontend/CLAUDE.md, etc."
)
elif issue['type'] == 'missing_critical_sections':
recommendations.append(f"CRITICAL: {issue['message']}")
# Length recommendations
if self.line_count > 150:
recommendations.append(
"Reduce this CLAUDE.md to <=150 lines (hard cap) - move detail to context-specific files and chain them via @path imports"
)
elif self.line_count < 30:
recommendations.append(
"Expand with essential sections: Core Principles, Tech Stack, Workflow Instructions"
)
# Missing sections
if missing:
high_priority = ["Core Principles", "Tech Stack", "Workflow Instructions"]
missing_high_priority = [s for s in missing if s in high_priority]
if missing_high_priority:
recommendations.append(
f"Add essential sections: {', '.join(missing_high_priority)}"
)
missing_optional = [s for s in missing if s not in high_priority]
if len(missing_optional) <= 3:
recommendations.append(
f"Consider adding: {', '.join(missing_optional)}"
)
# Structure recommendations
structure = self._analyze_structure()
if not structure['has_navigation_section'] and self.line_count > 100:
recommendations.append(
"Add Quick Navigation section with links to context-specific guides"
)
if not structure['has_code_examples']:
recommendations.append(
"Include code examples for complex patterns to improve clarity"
)
# Modular architecture
if self.line_count > 200 and not structure['mentions_modular_architecture']:
recommendations.append(
"Consider implementing modular architecture - separate files for major components"
)
# Quality improvements
quality_score = self.calculate_quality_score()
if quality_score < 60:
recommendations.append(
f"Overall quality score is {quality_score}/100 - prioritize critical improvements"
)
return recommendations[:8] # Limit to top 8 recommendations