Files
ClaudeForge/skill/analyzer.py
T

390 lines
13 KiB
Python

"""
CLAUDE.md File Analyzer
Analyzes existing CLAUDE.md files to identify structure, sections, and quality issues.
Provides detailed analysis reports with quality scores and actionable recommendations.
"""
from typing import Dict, List, Any, Tuple
import re
class CLAUDEMDAnalyzer:
"""Analyzes CLAUDE.md files for structure, completeness, and quality."""
# Standard sections that should be present in most CLAUDE.md files
RECOMMENDED_SECTIONS = [
"Quick Navigation",
"Core Principles",
"Tech Stack",
"Workflow Instructions",
"Quality Checklist",
"File Organization",
"Common Commands",
"References"
]
# Optional but valuable sections
OPTIONAL_SECTIONS = [
"Testing Requirements",
"Error Handling Patterns",
"Documentation Standards",
"Performance Guidelines",
"Security Checklist",
"Deployment Process",
"Troubleshooting"
]
def __init__(self, content: str):
"""
Initialize analyzer with CLAUDE.md file content.
Args:
content: Full text content of CLAUDE.md file
"""
self.content = content
self.lines = content.split('\n')
self.line_count = len(self.lines)
self.char_count = len(content)
self.sections = []
self.subsections = []
def analyze_file(self) -> Dict[str, Any]:
"""
Perform comprehensive analysis of CLAUDE.md file.
Returns:
Dictionary containing full analysis results
"""
return {
"file_metrics": self._get_file_metrics(),
"sections_found": self.detect_sections(),
"missing_sections": self._identify_missing_sections(),
"structure_analysis": self._analyze_structure(),
"issues": self._detect_issues(),
"quality_score": self.calculate_quality_score(),
"recommendations": self.generate_recommendations()
}
def _get_file_metrics(self) -> Dict[str, int]:
"""Calculate basic file metrics."""
return {
"char_count": self.char_count,
"line_count": self.line_count,
"word_count": len(self.content.split()),
"heading_count": len([line for line in self.lines if line.startswith('#')]),
"code_block_count": self.content.count('```') // 2
}
def detect_sections(self) -> List[str]:
"""
Detect all sections (headings) in the file.
Returns:
List of section titles found
"""
sections = []
subsections = []
for line in self.lines:
# Match markdown headings (## or ###)
if line.startswith('## '):
section_title = line[3:].strip()
sections.append(section_title)
elif line.startswith('### '):
subsection_title = line[4:].strip()
subsections.append(subsection_title)
self.sections = sections
self.subsections = subsections
return sections
def _identify_missing_sections(self) -> List[str]:
"""
Identify recommended sections that are missing.
Returns:
List of missing section names
"""
if not self.sections:
self.detect_sections()
missing = []
for recommended in self.RECOMMENDED_SECTIONS:
# Check if section exists (case-insensitive, partial match)
if not any(recommended.lower() in section.lower() for section in self.sections):
missing.append(recommended)
return missing
def _analyze_structure(self) -> Dict[str, Any]:
"""
Analyze the structural quality of the file.
Returns:
Dictionary with structure analysis
"""
has_title = self.content.startswith('# ')
has_navigation = any('navigation' in s.lower() for s in self.sections)
has_code_examples = '```' in self.content
has_links = '[' in self.content and '](' in self.content
# Check for modular architecture mentions
mentions_modular = any(
keyword in self.content.lower()
for keyword in ['backend/CLAUDE.md', 'frontend/CLAUDE.md', 'subdirectory', 'context-specific']
)
return {
"has_main_title": has_title,
"has_navigation_section": has_navigation,
"has_code_examples": has_code_examples,
"has_links": has_links,
"mentions_modular_architecture": mentions_modular,
"section_count": len(self.sections),
"subsection_count": len(self.subsections),
"hierarchy_depth": self._calculate_hierarchy_depth()
}
def _calculate_hierarchy_depth(self) -> int:
"""Calculate maximum heading depth."""
max_depth = 1 # Assumes at least # title
for line in self.lines:
if line.startswith('#'):
depth = len(line) - len(line.lstrip('#'))
max_depth = max(max_depth, depth)
return max_depth
def _detect_issues(self) -> List[Dict[str, str]]:
"""
Detect potential issues with the file.
Returns:
List of issue dictionaries with type, severity, and message
"""
issues = []
# Check file length
if self.line_count > 250:
issues.append({
"type": "length_critical",
"severity": "high",
"message": f"File is too long ({self.line_count} lines). Hard cap is 150; split into modular files."
})
elif self.line_count > 150:
issues.append({
"type": "length_warning",
"severity": "high",
"message": f"File exceeds the 150-line cap ({self.line_count} lines). Split now."
})
elif self.line_count > 120:
issues.append({
"type": "length_warning",
"severity": "medium",
"message": f"File is approaching the 150-line cap ({self.line_count} lines)."
})
# Check if file is too short
if self.line_count < 30:
issues.append({
"type": "too_short",
"severity": "medium",
"message": f"File is very short ({self.line_count} lines). May need more guidance."
})
# Check for missing critical sections
critical_sections = ["Core Principles", "Tech Stack", "Workflow"]
missing_critical = [
s for s in critical_sections
if not any(s.lower() in section.lower() for section in self.sections)
]
if missing_critical:
issues.append({
"type": "missing_critical_sections",
"severity": "high",
"message": f"Missing critical sections: {', '.join(missing_critical)}"
})
# Check for placeholder text
placeholders = ['TODO', 'TBD', 'FIXME', '[Insert', '[Add']
for placeholder in placeholders:
if placeholder in self.content:
issues.append({
"type": "placeholder_text",
"severity": "medium",
"message": f"Contains placeholder text: '{placeholder}'"
})
break
# Check for empty sections
empty_section_pattern = r'##\s+[^\n]+\n\s*\n\s*##'
if re.search(empty_section_pattern, self.content):
issues.append({
"type": "empty_sections",
"severity": "low",
"message": "Some sections appear to be empty"
})
return issues
def calculate_quality_score(self) -> int:
"""
Calculate overall quality score (0-100).
Scoring breakdown:
- Length appropriateness: 25 points
- Section completeness: 25 points
- Formatting quality: 20 points
- Content specificity: 15 points
- Modular organization: 15 points
Returns:
Quality score between 0 and 100
"""
score = 0
# Length appropriateness (25 points). Hard cap is 150 lines; anything
# above that loses points sharply because it indicates context bloat.
if 50 <= self.line_count <= 150:
score += 25
elif 30 <= self.line_count < 50 or 150 < self.line_count <= 200:
score += 15
elif self.line_count > 200:
score += 5
else:
score += 10
# Section completeness (25 points)
if not self.sections:
self.detect_sections()
found_count = len([
s for s in self.RECOMMENDED_SECTIONS
if any(s.lower() in section.lower() for section in self.sections)
])
section_score = (found_count / len(self.RECOMMENDED_SECTIONS)) * 25
score += int(section_score)
# Formatting quality (20 points)
formatting_score = 0
if self.content.startswith('# '):
formatting_score += 5
if '```' in self.content:
formatting_score += 5
if '[' in self.content and '](' in self.content:
formatting_score += 5
if any('navigation' in s.lower() for s in self.sections):
formatting_score += 5
score += formatting_score
# Content specificity (15 points)
# Check for specific tech mentions (not generic)
tech_keywords = [
'typescript', 'python', 'react', 'vue', 'angular', 'node',
'fastapi', 'django', 'postgresql', 'mongodb', 'docker'
]
content_lower = self.content.lower()
tech_mentions = sum(1 for keyword in tech_keywords if keyword in content_lower)
if tech_mentions >= 3:
score += 15
elif tech_mentions >= 2:
score += 10
elif tech_mentions >= 1:
score += 5
# Modular organization (15 points)
modular_keywords = [
'backend/CLAUDE.md', 'frontend/CLAUDE.md', 'context-specific',
'subdirectory', 'modular'
]
modular_mentions = sum(1 for keyword in modular_keywords if keyword.lower() in content_lower)
if modular_mentions >= 2:
score += 15
elif modular_mentions >= 1:
score += 10
return min(score, 100)
def generate_recommendations(self) -> List[str]:
"""
Generate actionable recommendations for improvement.
Returns:
List of recommendation strings
"""
recommendations = []
# Analyze first to ensure data is available
if not self.sections:
self.detect_sections()
missing = self._identify_missing_sections()
issues = self._detect_issues()
# Critical issues first
for issue in issues:
if issue['severity'] == 'high':
if issue['type'] == 'length_critical':
recommendations.append(
"CRITICAL: Split into modular files - create backend/CLAUDE.md, "
"frontend/CLAUDE.md, etc."
)
elif issue['type'] == 'missing_critical_sections':
recommendations.append(f"CRITICAL: {issue['message']}")
# Length recommendations
if self.line_count > 150:
recommendations.append(
"Reduce this CLAUDE.md to <=150 lines (hard cap) - move detail to context-specific files and chain them via @path imports"
)
elif self.line_count < 30:
recommendations.append(
"Expand with essential sections: Core Principles, Tech Stack, Workflow Instructions"
)
# Missing sections
if missing:
high_priority = ["Core Principles", "Tech Stack", "Workflow Instructions"]
missing_high_priority = [s for s in missing if s in high_priority]
if missing_high_priority:
recommendations.append(
f"Add essential sections: {', '.join(missing_high_priority)}"
)
missing_optional = [s for s in missing if s not in high_priority]
if len(missing_optional) <= 3:
recommendations.append(
f"Consider adding: {', '.join(missing_optional)}"
)
# Structure recommendations
structure = self._analyze_structure()
if not structure['has_navigation_section'] and self.line_count > 100:
recommendations.append(
"Add Quick Navigation section with links to context-specific guides"
)
if not structure['has_code_examples']:
recommendations.append(
"Include code examples for complex patterns to improve clarity"
)
# Modular architecture
if self.line_count > 200 and not structure['mentions_modular_architecture']:
recommendations.append(
"Consider implementing modular architecture - separate files for major components"
)
# Quality improvements
quality_score = self.calculate_quality_score()
if quality_score < 60:
recommendations.append(
f"Overall quality score is {quality_score}/100 - prioritize critical improvements"
)
return recommendations[:8] # Limit to top 8 recommendations