mirror of
https://github.com/alirezarezvani/ClaudeForge.git
synced 2026-07-03 02:13:15 -04:00
e33fa8326b
Wave 3 - adoption hardening. Patterns adapted (in original prose, with
attribution) from MIT-licensed shanraisshan/claude-code-best-practice.
Commands (command/enhance-claude-md.md, command/sync-claude-md.md):
- Add allowed-tools / disallowedTools / argument-hint / when_to_use so the
commands auto-suggest in the slash menu and avoid permission prompts.
- disallowedTools blocks WebFetch + WebSearch on both commands.
- Drop the previous broken hooks block (array-of-{matcher, commands} shape
did not match canonical schema; was never firing).
Skills:
- skill/karpathy-guidelines/SKILL.md: paths: glob over 23 code-file
extensions, so the guardrails auto-load only when editing source, not
markdown or data.
- skill/SKILL.md: model: haiku, effort: medium, paths: scoped to CLAUDE.md
+ AGENTS.md + .claude/rules/*.md so validator/generator passes run
cheaply without changing the user-facing model.
CLAUDE.local.md personal tier:
- skill/validator.py BestPracticesValidator now accepts filename=; any
*.local.md basename waives the 150-line cap.
- hooks/validate-claude-md.py reads the exempt suffix from hooks-config.
- .gitignore covers CLAUDE.local.md, **/CLAUDE.local.md,
.claude/settings.local.json, hooks/hooks-config.local.json.
Layered hook config:
- hooks/hooks-config.json: committed defaults
(validateClaudeMd.enabled/maxLines/exemptFilenameSuffix/exitCodeOnViolation,
stopAuditLine.enabled).
- hooks/validate-claude-md.py merges hooks-config.json +
hooks-config.local.json key-by-key; honours enabled=false (silent
exit 0), configurable cap, configurable exit code.
Stop audit hook:
- hooks/audit-claude-md.py walks the project tree, prints one stderr
line: total tracked / OVER cap / near cap (>=80%). Respects
stopAuditLine.enabled from config.
- hooks/hooks.json registers Stop event with matcher "".
Guardian fail-closed contract:
- agent/claude-md-guardian.md Safety & Validation section now explicitly
requires Skill-tool invocation (no inline paraphrase of SKILL.md),
abort on missing validated output, never auto-commit, and respect
local hook config.
Verified (8/8 smoke tests):
- Both commands parse with new fields and no broken hooks block.
- karpathy paths: 23 globs, includes .py/.ts/.go/.rs.
- skill model=haiku effort=medium with CLAUDE.md path scope.
- Validator: *.local.md (300 lines) -> pass; CLAUDE.md (300) -> fail;
legacy ctor without filename -> default behavior preserved.
- hooks-config.json valid; validateClaudeMd.enabled=true, maxLines=150.
- Hook validator: default rc=2 on bloated, rc=0 when local override
disables it, rc=0 on *.local.md (exempt).
- Stop hook entry present; audit script: rc=0 with "5 CLAUDE.md tracked".
- Regression: large-fullstack root still 52 lines with chain imports.
456 lines
16 KiB
Python
456 lines
16 KiB
Python
"""
|
|
CLAUDE.md Best Practices Validator
|
|
|
|
Validates CLAUDE.md files against Anthropic guidelines and community best practices.
|
|
Provides detailed validation reports with pass/fail status and improvement suggestions.
|
|
"""
|
|
|
|
from typing import Dict, List, Any, Tuple
|
|
import re
|
|
|
|
|
|
class BestPracticesValidator:
|
|
"""Validates CLAUDE.md files against best practices and guidelines."""
|
|
|
|
# Hard cap: every CLAUDE.md (root or modular) must stay under this.
|
|
# Modular split is required when content would exceed this cap.
|
|
MAX_RECOMMENDED_LINES = 150
|
|
WARNING_THRESHOLD_LINES = 120
|
|
|
|
# Minimum content requirements
|
|
MIN_LINES = 20
|
|
MIN_SECTIONS = 3
|
|
|
|
# Required sections for a complete CLAUDE.md
|
|
REQUIRED_SECTIONS = [
|
|
"Core Principles",
|
|
"Workflow"
|
|
]
|
|
|
|
# Anti-patterns to detect
|
|
ANTI_PATTERNS = [
|
|
{
|
|
"name": "hardcoded_secrets",
|
|
"patterns": [
|
|
r'api[_-]?key\s*=\s*["\'][a-zA-Z0-9]{20,}["\']',
|
|
r'password\s*=\s*["\'][^"\']+["\']',
|
|
r'secret\s*=\s*["\'][^"\']+["\']',
|
|
r'token\s*=\s*["\'][a-zA-Z0-9]{20,}["\']'
|
|
],
|
|
"message": "Potential hardcoded secrets detected (API keys, passwords, tokens)"
|
|
},
|
|
{
|
|
"name": "generic_content",
|
|
"patterns": [
|
|
r'\[TODO\]',
|
|
r'\[TBD\]',
|
|
r'\[PLACEHOLDER\]',
|
|
r'\[Insert.*?\]',
|
|
r'\[Add.*?\]'
|
|
],
|
|
"message": "Generic placeholder content found - replace with specific guidance"
|
|
},
|
|
{
|
|
"name": "duplicate_sections",
|
|
"patterns": [],
|
|
"message": "Duplicate section headings detected"
|
|
},
|
|
{
|
|
"name": "broken_links",
|
|
"patterns": [
|
|
r'\[.*?\]\(\)',
|
|
r'\[.*?\]\(#\)',
|
|
r'\[.*?\]\(undefined\)'
|
|
],
|
|
"message": "Broken or empty markdown links detected"
|
|
}
|
|
]
|
|
|
|
def __init__(self, content: str, project_context: Dict[str, Any] = None, filename: str = None):
|
|
"""
|
|
Initialize validator with CLAUDE.md content.
|
|
|
|
Args:
|
|
content: Full text content of CLAUDE.md file
|
|
project_context: Optional project context for advanced validation
|
|
filename: Optional path or basename. When the basename ends with
|
|
``.local.md`` (e.g. ``CLAUDE.local.md``), the 150-line cap is
|
|
relaxed because the file is a personal/gitignored override
|
|
outside the chained team-shared tree.
|
|
"""
|
|
self.content = content
|
|
self.lines = content.split('\n')
|
|
self.line_count = len(self.lines)
|
|
self.project_context = project_context or {}
|
|
self.filename = filename or ""
|
|
self.is_local_override = self.filename.endswith('.local.md')
|
|
|
|
def validate_all(self) -> Dict[str, Any]:
|
|
"""
|
|
Run all validation checks.
|
|
|
|
Returns:
|
|
Comprehensive validation report
|
|
"""
|
|
return {
|
|
"valid": self._is_valid_overall(),
|
|
"validation_results": {
|
|
"length": self.validate_length(),
|
|
"structure": self.validate_structure(),
|
|
"formatting": self.validate_formatting(),
|
|
"completeness": self.validate_completeness(),
|
|
"anti_patterns": self._check_anti_patterns()
|
|
},
|
|
"errors": self._collect_errors(),
|
|
"warnings": self._collect_warnings(),
|
|
"pass_count": self._count_passes(),
|
|
"fail_count": self._count_failures()
|
|
}
|
|
|
|
def validate_length(self) -> Dict[str, Any]:
|
|
"""
|
|
Validate file length against best practices.
|
|
|
|
Returns:
|
|
Validation result for length check
|
|
"""
|
|
status = "pass"
|
|
message = f"File length is appropriate ({self.line_count} lines)"
|
|
severity = "info"
|
|
|
|
# CLAUDE.local.md (and any *.local.md sibling) is a personal,
|
|
# gitignored override outside the chained team-shared tree. Skip the
|
|
# 150-line cap — only flag underuse.
|
|
if self.is_local_override:
|
|
if self.line_count < self.MIN_LINES:
|
|
status = "fail"
|
|
message = f"Personal override is too short ({self.line_count} lines, minimum {self.MIN_LINES})"
|
|
severity = "low"
|
|
else:
|
|
message = f"Personal override ({self.line_count} lines, cap waived)"
|
|
return {
|
|
"check": "file_length",
|
|
"status": status,
|
|
"message": message,
|
|
"severity": severity,
|
|
"actual_value": self.line_count,
|
|
"expected_range": f"{self.MIN_LINES}+ lines (cap waived for *.local.md)",
|
|
}
|
|
|
|
if self.line_count > self.MAX_RECOMMENDED_LINES:
|
|
status = "fail"
|
|
message = f"File exceeds maximum recommended length ({self.line_count} > {self.MAX_RECOMMENDED_LINES} lines)"
|
|
severity = "high"
|
|
elif self.line_count > self.WARNING_THRESHOLD_LINES:
|
|
status = "warning"
|
|
message = f"File is approaching maximum length ({self.line_count} lines, recommended < {self.WARNING_THRESHOLD_LINES})"
|
|
severity = "medium"
|
|
elif self.line_count < self.MIN_LINES:
|
|
status = "fail"
|
|
message = f"File is too short ({self.line_count} lines, minimum {self.MIN_LINES})"
|
|
severity = "high"
|
|
|
|
return {
|
|
"check": "file_length",
|
|
"status": status,
|
|
"message": message,
|
|
"severity": severity,
|
|
"actual_value": self.line_count,
|
|
"expected_range": f"{self.MIN_LINES}-{self.MAX_RECOMMENDED_LINES} lines"
|
|
}
|
|
|
|
def validate_structure(self) -> Dict[str, Any]:
|
|
"""
|
|
Validate file structure and organization.
|
|
|
|
Returns:
|
|
Validation result for structure check
|
|
"""
|
|
sections = self._extract_sections()
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Check for main title
|
|
if not self.content.strip().startswith('# '):
|
|
errors.append("Missing main title (# CLAUDE.md)")
|
|
|
|
# Check for minimum sections
|
|
if len(sections) < self.MIN_SECTIONS:
|
|
errors.append(f"Too few sections ({len(sections)}, minimum {self.MIN_SECTIONS})")
|
|
|
|
# Check for required sections
|
|
for required in self.REQUIRED_SECTIONS:
|
|
if not any(required.lower() in section.lower() for section in sections):
|
|
errors.append(f"Missing required section: '{required}'")
|
|
|
|
# Check for duplicate sections
|
|
section_counts = {}
|
|
for section in sections:
|
|
section_lower = section.lower()
|
|
section_counts[section_lower] = section_counts.get(section_lower, 0) + 1
|
|
|
|
duplicates = [s for s, count in section_counts.items() if count > 1]
|
|
if duplicates:
|
|
warnings.append(f"Duplicate sections found: {', '.join(duplicates)}")
|
|
|
|
# Determine overall status
|
|
status = "pass"
|
|
if errors:
|
|
status = "fail"
|
|
elif warnings:
|
|
status = "warning"
|
|
|
|
return {
|
|
"check": "file_structure",
|
|
"status": status,
|
|
"message": "Structure validation complete",
|
|
"severity": "high" if errors else "medium" if warnings else "info",
|
|
"errors": errors,
|
|
"warnings": warnings,
|
|
"sections_found": len(sections)
|
|
}
|
|
|
|
def validate_formatting(self) -> Dict[str, Any]:
|
|
"""
|
|
Validate markdown formatting quality.
|
|
|
|
Returns:
|
|
Validation result for formatting check
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Check for balanced code blocks
|
|
code_block_count = self.content.count('```')
|
|
if code_block_count % 2 != 0:
|
|
errors.append("Unbalanced code blocks (unclosed ``` markers)")
|
|
|
|
# Check for proper heading hierarchy
|
|
heading_levels = []
|
|
for line in self.lines:
|
|
if line.startswith('#'):
|
|
level = len(line) - len(line.lstrip('#'))
|
|
heading_levels.append(level)
|
|
|
|
if heading_levels and heading_levels[0] != 1:
|
|
errors.append("First heading should be level 1 (# Title)")
|
|
|
|
# Check for heading level skipping (e.g., # → ###)
|
|
for i in range(len(heading_levels) - 1):
|
|
if heading_levels[i+1] - heading_levels[i] > 1:
|
|
warnings.append(f"Heading level skips detected (h{heading_levels[i]} → h{heading_levels[i+1]})")
|
|
break
|
|
|
|
# Check for consistent list formatting
|
|
if '- ' in self.content and '* ' in self.content:
|
|
warnings.append("Mixed list markers (- and *) - prefer consistent style")
|
|
|
|
# Check for trailing whitespace (sample check)
|
|
lines_with_trailing_ws = sum(1 for line in self.lines if line.endswith(' ') and line.strip())
|
|
if lines_with_trailing_ws > 5:
|
|
warnings.append(f"Multiple lines with trailing whitespace ({lines_with_trailing_ws})")
|
|
|
|
status = "pass"
|
|
if errors:
|
|
status = "fail"
|
|
elif warnings:
|
|
status = "warning"
|
|
|
|
return {
|
|
"check": "markdown_formatting",
|
|
"status": status,
|
|
"message": "Formatting validation complete",
|
|
"severity": "medium" if errors else "low",
|
|
"errors": errors,
|
|
"warnings": warnings
|
|
}
|
|
|
|
def validate_completeness(self) -> Dict[str, Any]:
|
|
"""
|
|
Validate content completeness and quality.
|
|
|
|
Returns:
|
|
Validation result for completeness check
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Check for essential content types
|
|
has_code_examples = '```' in self.content
|
|
has_links = '[' in self.content and '](' in self.content
|
|
has_lists = any(line.strip().startswith(('-', '*', '1.')) for line in self.lines)
|
|
|
|
if not has_code_examples:
|
|
warnings.append("No code examples found - consider adding examples for clarity")
|
|
|
|
if not has_links:
|
|
warnings.append("No links found - consider linking to external documentation")
|
|
|
|
if not has_lists:
|
|
warnings.append("No lists found - consider using lists for better readability")
|
|
|
|
# Check for tech stack mention
|
|
tech_keywords = [
|
|
'typescript', 'javascript', 'python', 'react', 'vue', 'angular',
|
|
'node', 'django', 'fastapi', 'go', 'rust', 'java'
|
|
]
|
|
content_lower = self.content.lower()
|
|
tech_mentioned = any(keyword in content_lower for keyword in tech_keywords)
|
|
|
|
if not tech_mentioned:
|
|
warnings.append("No specific technologies mentioned - add tech stack reference")
|
|
|
|
# Check for workflow mentions
|
|
workflow_keywords = ['test', 'commit', 'deploy', 'review', 'documentation']
|
|
workflow_mentioned = sum(1 for keyword in workflow_keywords if keyword in content_lower)
|
|
|
|
if workflow_mentioned < 2:
|
|
warnings.append("Limited workflow guidance - consider adding development workflow instructions")
|
|
|
|
# Check for empty sections
|
|
empty_section_pattern = r'##\s+[^\n]+\n\s*\n\s*##'
|
|
if re.search(empty_section_pattern, self.content):
|
|
errors.append("Empty sections detected - remove or populate with content")
|
|
|
|
status = "pass"
|
|
if errors:
|
|
status = "fail"
|
|
elif len(warnings) >= 3:
|
|
status = "warning"
|
|
|
|
return {
|
|
"check": "content_completeness",
|
|
"status": status,
|
|
"message": "Completeness validation complete",
|
|
"severity": "medium",
|
|
"errors": errors,
|
|
"warnings": warnings,
|
|
"has_code_examples": has_code_examples,
|
|
"has_links": has_links,
|
|
"has_lists": has_lists,
|
|
"tech_stack_mentioned": tech_mentioned
|
|
}
|
|
|
|
def _check_anti_patterns(self) -> Dict[str, Any]:
|
|
"""
|
|
Check for anti-patterns and bad practices.
|
|
|
|
Returns:
|
|
Validation result for anti-pattern detection
|
|
"""
|
|
detected = []
|
|
|
|
for anti_pattern in self.ANTI_PATTERNS:
|
|
if anti_pattern['name'] == 'duplicate_sections':
|
|
# Handle duplicate sections separately
|
|
sections = self._extract_sections()
|
|
section_counts = {}
|
|
for section in sections:
|
|
section_lower = section.lower()
|
|
section_counts[section_lower] = section_counts.get(section_lower, 0) + 1
|
|
|
|
if any(count > 1 for count in section_counts.values()):
|
|
detected.append({
|
|
"pattern": anti_pattern['name'],
|
|
"message": anti_pattern['message']
|
|
})
|
|
else:
|
|
# Check regex patterns
|
|
for pattern in anti_pattern['patterns']:
|
|
if re.search(pattern, self.content, re.IGNORECASE):
|
|
detected.append({
|
|
"pattern": anti_pattern['name'],
|
|
"message": anti_pattern['message']
|
|
})
|
|
break # Only report each anti-pattern once
|
|
|
|
status = "pass" if not detected else "fail"
|
|
severity = "high" if any(p['pattern'] == 'hardcoded_secrets' for p in detected) else "medium"
|
|
|
|
return {
|
|
"check": "anti_patterns",
|
|
"status": status,
|
|
"message": f"{len(detected)} anti-pattern(s) detected" if detected else "No anti-patterns detected",
|
|
"severity": severity,
|
|
"detected_patterns": detected
|
|
}
|
|
|
|
def _extract_sections(self) -> List[str]:
|
|
"""Extract all section headings from content."""
|
|
sections = []
|
|
for line in self.lines:
|
|
if line.startswith('## '):
|
|
sections.append(line[3:].strip())
|
|
return sections
|
|
|
|
def _is_valid_overall(self) -> bool:
|
|
"""Determine if file passes overall validation."""
|
|
length_result = self.validate_length()
|
|
structure_result = self.validate_structure()
|
|
|
|
# File is valid if length and structure pass (formatting and completeness can have warnings)
|
|
return (
|
|
length_result['status'] != 'fail' and
|
|
structure_result['status'] != 'fail'
|
|
)
|
|
|
|
def _collect_errors(self) -> List[str]:
|
|
"""Collect all errors from validation checks."""
|
|
errors = []
|
|
all_results = [
|
|
self.validate_length(),
|
|
self.validate_structure(),
|
|
self.validate_formatting(),
|
|
self.validate_completeness(),
|
|
self._check_anti_patterns()
|
|
]
|
|
|
|
for result in all_results:
|
|
if result['status'] == 'fail':
|
|
if 'errors' in result:
|
|
errors.extend(result['errors'])
|
|
else:
|
|
errors.append(result['message'])
|
|
|
|
return errors
|
|
|
|
def _collect_warnings(self) -> List[str]:
|
|
"""Collect all warnings from validation checks."""
|
|
warnings = []
|
|
all_results = [
|
|
self.validate_length(),
|
|
self.validate_structure(),
|
|
self.validate_formatting(),
|
|
self.validate_completeness()
|
|
]
|
|
|
|
for result in all_results:
|
|
if 'warnings' in result:
|
|
warnings.extend(result['warnings'])
|
|
elif result['status'] == 'warning':
|
|
warnings.append(result['message'])
|
|
|
|
return warnings
|
|
|
|
def _count_passes(self) -> int:
|
|
"""Count number of passed checks."""
|
|
all_results = [
|
|
self.validate_length(),
|
|
self.validate_structure(),
|
|
self.validate_formatting(),
|
|
self.validate_completeness(),
|
|
self._check_anti_patterns()
|
|
]
|
|
return sum(1 for result in all_results if result['status'] == 'pass')
|
|
|
|
def _count_failures(self) -> int:
|
|
"""Count number of failed checks."""
|
|
all_results = [
|
|
self.validate_length(),
|
|
self.validate_structure(),
|
|
self.validate_formatting(),
|
|
self.validate_completeness(),
|
|
self._check_anti_patterns()
|
|
]
|
|
return sum(1 for result in all_results if result['status'] == 'fail')
|