""" CLAUDE.md Best Practices Validator Validates CLAUDE.md files against Anthropic guidelines and community best practices. Provides detailed validation reports with pass/fail status and improvement suggestions. """ from typing import Dict, List, Any, Tuple import re class BestPracticesValidator: """Validates CLAUDE.md files against best practices and guidelines.""" # Hard cap: every CLAUDE.md (root or modular) must stay under this. # Modular split is required when content would exceed this cap. MAX_RECOMMENDED_LINES = 150 WARNING_THRESHOLD_LINES = 120 # Minimum content requirements MIN_LINES = 20 MIN_SECTIONS = 3 # Required sections for a complete CLAUDE.md REQUIRED_SECTIONS = [ "Core Principles", "Workflow" ] # Anti-patterns to detect ANTI_PATTERNS = [ { "name": "hardcoded_secrets", "patterns": [ r'api[_-]?key\s*=\s*["\'][a-zA-Z0-9]{20,}["\']', r'password\s*=\s*["\'][^"\']+["\']', r'secret\s*=\s*["\'][^"\']+["\']', r'token\s*=\s*["\'][a-zA-Z0-9]{20,}["\']' ], "message": "Potential hardcoded secrets detected (API keys, passwords, tokens)" }, { "name": "generic_content", "patterns": [ r'\[TODO\]', r'\[TBD\]', r'\[PLACEHOLDER\]', r'\[Insert.*?\]', r'\[Add.*?\]' ], "message": "Generic placeholder content found - replace with specific guidance" }, { "name": "duplicate_sections", "patterns": [], "message": "Duplicate section headings detected" }, { "name": "broken_links", "patterns": [ r'\[.*?\]\(\)', r'\[.*?\]\(#\)', r'\[.*?\]\(undefined\)' ], "message": "Broken or empty markdown links detected" } ] def __init__(self, content: str, project_context: Dict[str, Any] = None, filename: str = None): """ Initialize validator with CLAUDE.md content. Args: content: Full text content of CLAUDE.md file project_context: Optional project context for advanced validation filename: Optional path or basename. When the basename ends with ``.local.md`` (e.g. ``CLAUDE.local.md``), the 150-line cap is relaxed because the file is a personal/gitignored override outside the chained team-shared tree. """ self.content = content self.lines = content.split('\n') self.line_count = len(self.lines) self.project_context = project_context or {} self.filename = filename or "" self.is_local_override = self.filename.endswith('.local.md') def validate_all(self) -> Dict[str, Any]: """ Run all validation checks. Returns: Comprehensive validation report """ return { "valid": self._is_valid_overall(), "validation_results": { "length": self.validate_length(), "structure": self.validate_structure(), "formatting": self.validate_formatting(), "completeness": self.validate_completeness(), "anti_patterns": self._check_anti_patterns() }, "errors": self._collect_errors(), "warnings": self._collect_warnings(), "pass_count": self._count_passes(), "fail_count": self._count_failures() } def validate_length(self) -> Dict[str, Any]: """ Validate file length against best practices. Returns: Validation result for length check """ status = "pass" message = f"File length is appropriate ({self.line_count} lines)" severity = "info" # CLAUDE.local.md (and any *.local.md sibling) is a personal, # gitignored override outside the chained team-shared tree. Skip the # 150-line cap — only flag underuse. if self.is_local_override: if self.line_count < self.MIN_LINES: status = "fail" message = f"Personal override is too short ({self.line_count} lines, minimum {self.MIN_LINES})" severity = "low" else: message = f"Personal override ({self.line_count} lines, cap waived)" return { "check": "file_length", "status": status, "message": message, "severity": severity, "actual_value": self.line_count, "expected_range": f"{self.MIN_LINES}+ lines (cap waived for *.local.md)", } if self.line_count > self.MAX_RECOMMENDED_LINES: status = "fail" message = f"File exceeds maximum recommended length ({self.line_count} > {self.MAX_RECOMMENDED_LINES} lines)" severity = "high" elif self.line_count > self.WARNING_THRESHOLD_LINES: status = "warning" message = f"File is approaching maximum length ({self.line_count} lines, recommended < {self.WARNING_THRESHOLD_LINES})" severity = "medium" elif self.line_count < self.MIN_LINES: status = "fail" message = f"File is too short ({self.line_count} lines, minimum {self.MIN_LINES})" severity = "high" return { "check": "file_length", "status": status, "message": message, "severity": severity, "actual_value": self.line_count, "expected_range": f"{self.MIN_LINES}-{self.MAX_RECOMMENDED_LINES} lines" } def validate_structure(self) -> Dict[str, Any]: """ Validate file structure and organization. Returns: Validation result for structure check """ sections = self._extract_sections() errors = [] warnings = [] # Check for main title if not self.content.strip().startswith('# '): errors.append("Missing main title (# CLAUDE.md)") # Check for minimum sections if len(sections) < self.MIN_SECTIONS: errors.append(f"Too few sections ({len(sections)}, minimum {self.MIN_SECTIONS})") # Check for required sections for required in self.REQUIRED_SECTIONS: if not any(required.lower() in section.lower() for section in sections): errors.append(f"Missing required section: '{required}'") # Check for duplicate sections section_counts = {} for section in sections: section_lower = section.lower() section_counts[section_lower] = section_counts.get(section_lower, 0) + 1 duplicates = [s for s, count in section_counts.items() if count > 1] if duplicates: warnings.append(f"Duplicate sections found: {', '.join(duplicates)}") # Determine overall status status = "pass" if errors: status = "fail" elif warnings: status = "warning" return { "check": "file_structure", "status": status, "message": "Structure validation complete", "severity": "high" if errors else "medium" if warnings else "info", "errors": errors, "warnings": warnings, "sections_found": len(sections) } def validate_formatting(self) -> Dict[str, Any]: """ Validate markdown formatting quality. Returns: Validation result for formatting check """ errors = [] warnings = [] # Check for balanced code blocks code_block_count = self.content.count('```') if code_block_count % 2 != 0: errors.append("Unbalanced code blocks (unclosed ``` markers)") # Check for proper heading hierarchy heading_levels = [] for line in self.lines: if line.startswith('#'): level = len(line) - len(line.lstrip('#')) heading_levels.append(level) if heading_levels and heading_levels[0] != 1: errors.append("First heading should be level 1 (# Title)") # Check for heading level skipping (e.g., # → ###) for i in range(len(heading_levels) - 1): if heading_levels[i+1] - heading_levels[i] > 1: warnings.append(f"Heading level skips detected (h{heading_levels[i]} → h{heading_levels[i+1]})") break # Check for consistent list formatting if '- ' in self.content and '* ' in self.content: warnings.append("Mixed list markers (- and *) - prefer consistent style") # Check for trailing whitespace (sample check) lines_with_trailing_ws = sum(1 for line in self.lines if line.endswith(' ') and line.strip()) if lines_with_trailing_ws > 5: warnings.append(f"Multiple lines with trailing whitespace ({lines_with_trailing_ws})") status = "pass" if errors: status = "fail" elif warnings: status = "warning" return { "check": "markdown_formatting", "status": status, "message": "Formatting validation complete", "severity": "medium" if errors else "low", "errors": errors, "warnings": warnings } def validate_completeness(self) -> Dict[str, Any]: """ Validate content completeness and quality. Returns: Validation result for completeness check """ errors = [] warnings = [] # Check for essential content types has_code_examples = '```' in self.content has_links = '[' in self.content and '](' in self.content has_lists = any(line.strip().startswith(('-', '*', '1.')) for line in self.lines) if not has_code_examples: warnings.append("No code examples found - consider adding examples for clarity") if not has_links: warnings.append("No links found - consider linking to external documentation") if not has_lists: warnings.append("No lists found - consider using lists for better readability") # Check for tech stack mention tech_keywords = [ 'typescript', 'javascript', 'python', 'react', 'vue', 'angular', 'node', 'django', 'fastapi', 'go', 'rust', 'java' ] content_lower = self.content.lower() tech_mentioned = any(keyword in content_lower for keyword in tech_keywords) if not tech_mentioned: warnings.append("No specific technologies mentioned - add tech stack reference") # Check for workflow mentions workflow_keywords = ['test', 'commit', 'deploy', 'review', 'documentation'] workflow_mentioned = sum(1 for keyword in workflow_keywords if keyword in content_lower) if workflow_mentioned < 2: warnings.append("Limited workflow guidance - consider adding development workflow instructions") # Check for empty sections empty_section_pattern = r'##\s+[^\n]+\n\s*\n\s*##' if re.search(empty_section_pattern, self.content): errors.append("Empty sections detected - remove or populate with content") status = "pass" if errors: status = "fail" elif len(warnings) >= 3: status = "warning" return { "check": "content_completeness", "status": status, "message": "Completeness validation complete", "severity": "medium", "errors": errors, "warnings": warnings, "has_code_examples": has_code_examples, "has_links": has_links, "has_lists": has_lists, "tech_stack_mentioned": tech_mentioned } def _check_anti_patterns(self) -> Dict[str, Any]: """ Check for anti-patterns and bad practices. Returns: Validation result for anti-pattern detection """ detected = [] for anti_pattern in self.ANTI_PATTERNS: if anti_pattern['name'] == 'duplicate_sections': # Handle duplicate sections separately sections = self._extract_sections() section_counts = {} for section in sections: section_lower = section.lower() section_counts[section_lower] = section_counts.get(section_lower, 0) + 1 if any(count > 1 for count in section_counts.values()): detected.append({ "pattern": anti_pattern['name'], "message": anti_pattern['message'] }) else: # Check regex patterns for pattern in anti_pattern['patterns']: if re.search(pattern, self.content, re.IGNORECASE): detected.append({ "pattern": anti_pattern['name'], "message": anti_pattern['message'] }) break # Only report each anti-pattern once status = "pass" if not detected else "fail" severity = "high" if any(p['pattern'] == 'hardcoded_secrets' for p in detected) else "medium" return { "check": "anti_patterns", "status": status, "message": f"{len(detected)} anti-pattern(s) detected" if detected else "No anti-patterns detected", "severity": severity, "detected_patterns": detected } def _extract_sections(self) -> List[str]: """Extract all section headings from content.""" sections = [] for line in self.lines: if line.startswith('## '): sections.append(line[3:].strip()) return sections def _is_valid_overall(self) -> bool: """Determine if file passes overall validation.""" length_result = self.validate_length() structure_result = self.validate_structure() # File is valid if length and structure pass (formatting and completeness can have warnings) return ( length_result['status'] != 'fail' and structure_result['status'] != 'fail' ) def _collect_errors(self) -> List[str]: """Collect all errors from validation checks.""" errors = [] all_results = [ self.validate_length(), self.validate_structure(), self.validate_formatting(), self.validate_completeness(), self._check_anti_patterns() ] for result in all_results: if result['status'] == 'fail': if 'errors' in result: errors.extend(result['errors']) else: errors.append(result['message']) return errors def _collect_warnings(self) -> List[str]: """Collect all warnings from validation checks.""" warnings = [] all_results = [ self.validate_length(), self.validate_structure(), self.validate_formatting(), self.validate_completeness() ] for result in all_results: if 'warnings' in result: warnings.extend(result['warnings']) elif result['status'] == 'warning': warnings.append(result['message']) return warnings def _count_passes(self) -> int: """Count number of passed checks.""" all_results = [ self.validate_length(), self.validate_structure(), self.validate_formatting(), self.validate_completeness(), self._check_anti_patterns() ] return sum(1 for result in all_results if result['status'] == 'pass') def _count_failures(self) -> int: """Count number of failed checks.""" all_results = [ self.validate_length(), self.validate_structure(), self.validate_formatting(), self.validate_completeness(), self._check_anti_patterns() ] return sum(1 for result in all_results if result['status'] == 'fail')