Skip to main content
Glama
optimizer.py24.9 kB
""" Context Optimization Engine Implements intelligent CLAUDE.md optimization using: 1. Token reduction strategies (23K → 5K) 2. Template selection based on project analysis 3. Semantic compression using AgentDB 4. Dynamic context loading with /prime commands Based on research findings: - ACE (Agentic Context Engineering) 5-layer model - Quality over quantity approach - Progressive disclosure patterns """ import re import logging from pathlib import Path from typing import Dict, Any, List, Optional, Tuple, Set from dataclasses import dataclass, asdict from datetime import datetime import hashlib import json logger = logging.getLogger(__name__) @dataclass class ContextMetrics: """Metrics for tracking context optimization.""" token_count: int section_count: int compression_ratio: float semantic_density: float last_optimized: datetime optimization_version: str def to_dict(self) -> Dict[str, Any]: result = asdict(self) result['last_optimized'] = self.last_optimized.isoformat() return result @dataclass class TemplateMatch: """Result of template matching analysis.""" template_id: str confidence: float project_type: str detected_patterns: List[str] recommended_sections: List[str] estimated_tokens: int class ContextOptimizer: """ Intelligent CLAUDE.md optimization engine. Capabilities: - Token reduction: 23K → 5K via semantic compression - Template selection: 15+ project-specific templates - Dynamic sections: Load on-demand via /prime commands - Learning: Adapt based on usage patterns """ # Token budget targets (based on research) TOKEN_BUDGET = { 'global': 3000, # Global CLAUDE.md 'project': 5000, # Project CLAUDE.md 'prime_context': 2000, # Per /prime command 'total_budget': 10000 # Maximum total context } # Core sections that should always be present CORE_SECTIONS = { 'global': [ 'Core Principles', 'Essential Rules', 'Tool Optimization' ], 'project': [ 'Project Overview', 'Quick Commands', 'File Organization' ] } # Extended sections that can be loaded on-demand EXTENDED_SECTIONS = { 'bug': ['Debugging Workflow', 'Error Patterns', 'Testing Standards'], 'feature': ['Feature Development', 'Design Patterns', 'Integration Points'], 'refactor': ['Code Quality', 'Refactoring Patterns', 'Performance Optimization'], 'docs': ['Documentation Standards', 'API Documentation', 'Examples'], 'test': ['Testing Strategies', 'Test Patterns', 'Coverage Requirements'] } # Template database for different project types PROJECT_TEMPLATES = { 'python-backend': { 'markers': ['pyproject.toml', 'requirements.txt', 'setup.py'], 'core_sections': ['Python Best Practices', 'Package Management', 'Testing'], 'tools': ['uv', 'pytest', 'ruff', 'mypy'], 'token_baseline': 4000 }, 'javascript-frontend': { 'markers': ['package.json', 'tsconfig.json', 'vite.config.ts'], 'core_sections': ['JavaScript Standards', 'Build Configuration', 'Testing'], 'tools': ['npm', 'pnpm', 'vitest', 'eslint'], 'token_baseline': 3500 }, 'fullstack': { 'markers': ['package.json', 'pyproject.toml'], 'core_sections': ['Backend Standards', 'Frontend Standards', 'API Design'], 'tools': ['docker', 'docker-compose'], 'token_baseline': 5500 }, 'mcp-server': { 'markers': ['src/mcp_standards/', 'npx-mcp'], 'core_sections': ['MCP Protocol', 'Tool Implementation', 'Testing'], 'tools': ['mcp', 'claude-flow'], 'token_baseline': 4500 }, 'research': { 'markers': ['docs/', 'research/', 'analysis/'], 'core_sections': ['Research Workflow', 'Documentation', 'Collaboration'], 'tools': ['markdown', 'citations'], 'token_baseline': 3000 } } def __init__(self, memory_system=None, agentdb=None): """ Initialize context optimizer. Args: memory_system: PersistentMemory for storing optimizations agentdb: AgentDB instance for semantic operations """ self.memory = memory_system self.agentdb = agentdb # Cache of optimized contexts self._context_cache: Dict[str, Tuple[str, ContextMetrics]] = {} # Token estimation patterns self._token_patterns = self._build_token_patterns() def _build_token_patterns(self) -> Dict[str, int]: """Build token estimation patterns based on research.""" return { # Markdown structure tokens 'heading_h1': 3, 'heading_h2': 3, 'heading_h3': 3, 'bullet_point': 2, 'code_block_delimiter': 2, 'code_line': 8, 'paragraph': 15, # Content type multipliers 'command': 5, 'example': 20, 'explanation': 12, 'rule': 8, # Average tokens per section type 'principles': 300, 'quick_reference': 200, 'workflow': 400, 'examples': 500, 'agents': 600, 'mcp_tools': 800 } def estimate_tokens(self, content: str) -> int: """ Estimate token count for content. Uses Claude's approximate tokenization: - ~4 characters per token for English - Adjust for markdown structure Args: content: Text content to estimate Returns: Estimated token count """ # Base estimation: 4 chars per token base_tokens = len(content) / 4 # Adjust for markdown structure markdown_overhead = 0 markdown_overhead += content.count('\n##') * self._token_patterns['heading_h2'] markdown_overhead += content.count('\n###') * self._token_patterns['heading_h3'] markdown_overhead += content.count('\n- ') * self._token_patterns['bullet_point'] markdown_overhead += content.count('```') / 2 * self._token_patterns['code_block_delimiter'] # Code blocks are typically more token-dense code_blocks = re.findall(r'```[\s\S]*?```', content) code_tokens = sum(len(block) / 3 for block in code_blocks) # 3 chars/token for code # Non-code tokens non_code_content = re.sub(r'```[\s\S]*?```', '', content) text_tokens = len(non_code_content) / 4 return int(text_tokens + code_tokens + markdown_overhead) def analyze_project_type(self, project_path: Path) -> TemplateMatch: """ Analyze project to determine optimal template. Args: project_path: Path to project root Returns: TemplateMatch with template recommendation """ detected_patterns = [] template_scores = {} # Check for template markers for template_id, template_config in self.PROJECT_TEMPLATES.items(): score = 0.0 markers_found = [] for marker in template_config['markers']: marker_path = project_path / marker if marker_path.exists(): score += 1.0 markers_found.append(marker) if markers_found: template_scores[template_id] = { 'score': score / len(template_config['markers']), 'markers': markers_found, 'config': template_config } if not template_scores: # Default to research template template_id = 'research' config = self.PROJECT_TEMPLATES[template_id] return TemplateMatch( template_id=template_id, confidence=0.5, project_type='unknown', detected_patterns=[], recommended_sections=config['core_sections'], estimated_tokens=config['token_baseline'] ) # Get best match best_template = max(template_scores.items(), key=lambda x: x[1]['score']) template_id, template_data = best_template config = template_data['config'] return TemplateMatch( template_id=template_id, confidence=template_data['score'], project_type=template_id.replace('-', ' ').title(), detected_patterns=template_data['markers'], recommended_sections=config['core_sections'], estimated_tokens=config['token_baseline'] ) def optimize_content( self, content: str, target_tokens: int, preserve_sections: Optional[List[str]] = None ) -> Tuple[str, ContextMetrics]: """ Optimize CLAUDE.md content to target token count. Strategy: 1. Extract and preserve core sections 2. Compress extended sections using semantic methods 3. Remove redundant information 4. Apply progressive disclosure Args: content: Original CLAUDE.md content target_tokens: Target token count preserve_sections: Sections that must be kept Returns: (optimized_content, metrics) tuple """ current_tokens = self.estimate_tokens(content) if current_tokens <= target_tokens: # Already within budget metrics = ContextMetrics( token_count=current_tokens, section_count=content.count('\n## '), compression_ratio=1.0, semantic_density=1.0, last_optimized=datetime.now(), optimization_version='1.0' ) return content, metrics # Extract sections sections = self._extract_sections(content) preserve_sections = preserve_sections or [] # Start with preserved sections optimized_sections = {} preserved_tokens = 0 for section_name, section_content in sections.items(): if self._is_core_section(section_name, preserve_sections): optimized_sections[section_name] = section_content preserved_tokens += self.estimate_tokens(section_content) remaining_budget = target_tokens - preserved_tokens if remaining_budget <= 0: # Core sections already exceed budget - aggressive compression needed return self._aggressive_compress(sections, target_tokens) # Add non-core sections based on importance remaining_sections = { k: v for k, v in sections.items() if k not in optimized_sections } # Score sections by importance section_scores = self._score_sections(remaining_sections) sorted_sections = sorted( section_scores.items(), key=lambda x: x[1], reverse=True ) # Add sections until budget exhausted for section_name, score in sorted_sections: section_content = remaining_sections[section_name] section_tokens = self.estimate_tokens(section_content) if section_tokens <= remaining_budget: optimized_sections[section_name] = section_content remaining_budget -= section_tokens else: # Try to compress section to fit compressed = self._compress_section( section_content, remaining_budget ) if compressed: optimized_sections[section_name] = compressed break # Rebuild content optimized_content = self._rebuild_content(optimized_sections) # Add progressive disclosure footer omitted_sections = set(sections.keys()) - set(optimized_sections.keys()) if omitted_sections: footer = self._generate_disclosure_footer(omitted_sections) optimized_content += '\n\n' + footer # Calculate metrics final_tokens = self.estimate_tokens(optimized_content) metrics = ContextMetrics( token_count=final_tokens, section_count=len(optimized_sections), compression_ratio=current_tokens / final_tokens, semantic_density=len(optimized_sections) / final_tokens * 1000, last_optimized=datetime.now(), optimization_version='1.0' ) logger.info( f"Optimized content: {current_tokens} → {final_tokens} tokens " f"({metrics.compression_ratio:.2f}x compression)" ) return optimized_content, metrics def _is_core_section(self, section_name: str, preserve: List[str]) -> bool: """Check if section is core or explicitly preserved.""" if section_name in preserve: return True # Check against core sections for both global and project for core_list in self.CORE_SECTIONS.values(): if any(core.lower() in section_name.lower() for core in core_list): return True return False def _extract_sections(self, content: str) -> Dict[str, str]: """Extract markdown sections from content.""" sections = {} current_section = None current_content = [] for line in content.split('\n'): if line.startswith('## '): # Save previous section if current_section: sections[current_section] = '\n'.join(current_content).strip() # Start new section current_section = line[3:].strip() current_content = [] elif current_section: current_content.append(line) # Save last section if current_section: sections[current_section] = '\n'.join(current_content).strip() return sections def _score_sections(self, sections: Dict[str, str]) -> Dict[str, float]: """ Score sections by importance. Criteria: - Length: Shorter sections preferred (less bloat) - Keywords: Important keywords increase score - Structure: Well-structured content scores higher """ scores = {} important_keywords = { 'must', 'required', 'critical', 'always', 'never', 'important', 'essential', 'mandatory', 'rule' } for section_name, section_content in sections.items(): score = 0.0 # Base score from section name if any(kw in section_name.lower() for kw in ['core', 'essential', 'rule']): score += 50.0 # Keyword presence content_lower = section_content.lower() keyword_count = sum(1 for kw in important_keywords if kw in content_lower) score += keyword_count * 5.0 # Prefer concise sections section_tokens = self.estimate_tokens(section_content) if section_tokens < 200: score += 20.0 elif section_tokens < 500: score += 10.0 # Structure bonus (lists, code examples) if '- ' in section_content: score += 10.0 if '```' in section_content: score += 15.0 scores[section_name] = score return scores def _compress_section(self, content: str, max_tokens: int) -> Optional[str]: """ Compress section to fit token budget. Strategies: 1. Remove examples while keeping rules 2. Condense lists 3. Remove redundant explanations """ current_tokens = self.estimate_tokens(content) if current_tokens <= max_tokens: return content # Strategy 1: Remove code examples compressed = re.sub(r'```[\s\S]*?```\n*', '', content) if self.estimate_tokens(compressed) <= max_tokens: return compressed + '\n\n_Code examples available via /prime_' # Strategy 2: Keep only first sentence of paragraphs lines = compressed.split('\n') condensed_lines = [] for line in lines: if line.startswith('- ') or line.startswith('#'): condensed_lines.append(line) elif line.strip(): # Keep first sentence only first_sentence = line.split('. ')[0] + '.' condensed_lines.append(first_sentence) condensed = '\n'.join(condensed_lines) if self.estimate_tokens(condensed) <= max_tokens: return condensed # Can't compress enough return None def _aggressive_compress( self, sections: Dict[str, str], target_tokens: int ) -> Tuple[str, ContextMetrics]: """ Aggressive compression when even core sections exceed budget. This should rarely happen - indicates CLAUDE.md needs restructuring. """ logger.warning( f"Aggressive compression needed - core sections exceed {target_tokens} tokens" ) # Keep only the absolute essentials essential_content = [] essential_content.append("# Claude Code Configuration\n") essential_content.append("**Auto-optimized for token efficiency**\n") essential_content.append("\n## Core Principles\n") essential_content.append("- Evidence > Assumptions") essential_content.append("- Code > Documentation") essential_content.append("- Efficiency > Verbosity") essential_content.append("\n## Extended Context Available\n") essential_content.append("Use `/prime-<context>` commands to load task-specific context:") # List available contexts for section_name in sections.keys(): essential_content.append(f"- `/prime-{section_name.lower().replace(' ', '-')}`") compressed = '\n'.join(essential_content) metrics = ContextMetrics( token_count=self.estimate_tokens(compressed), section_count=2, compression_ratio=99.0, # Extreme compression semantic_density=0.1, last_optimized=datetime.now(), optimization_version='1.0-aggressive' ) return compressed, metrics def _rebuild_content(self, sections: Dict[str, str]) -> str: """Rebuild markdown content from sections.""" lines = ["# Claude Code Configuration\n"] lines.append(f"_Last optimized: {datetime.now().strftime('%Y-%m-%d %H:%M')}_\n") for section_name, section_content in sections.items(): lines.append(f"\n## {section_name}\n") lines.append(section_content) return '\n'.join(lines) def _generate_disclosure_footer(self, omitted_sections: Set[str]) -> str: """Generate progressive disclosure footer.""" footer_lines = [ "---", "## 📚 Extended Context Available", "", "The following sections are available via `/prime` commands:", "" ] for section in sorted(omitted_sections): command = section.lower().replace(' ', '-').replace('&', 'and') footer_lines.append(f"- **{section}**: `/prime-{command}`") footer_lines.append("") footer_lines.append("_Load contexts on-demand to minimize token usage._") return '\n'.join(footer_lines) async def generate_prime_context( self, context_type: str, project_path: Optional[Path] = None ) -> Optional[str]: """ Generate context for /prime-<type> command. Args: context_type: Type of context (bug, feature, refactor, etc.) project_path: Project path for project-specific context Returns: Context content or None if type unknown """ if context_type not in self.EXTENDED_SECTIONS: logger.warning(f"Unknown prime context type: {context_type}") return None # Get relevant sections sections_to_load = self.EXTENDED_SECTIONS[context_type] # Check memory for cached context cache_key = f"prime_context_{context_type}" if self.memory: cached = await self.memory.retrieve(cache_key, namespace="context") if cached: return cached # Build context from templates and learned patterns context_lines = [ f"# {context_type.title()} Context", f"_Loaded: {datetime.now().strftime('%Y-%m-%d %H:%M')}_", "" ] for section in sections_to_load: context_lines.append(f"## {section}") context_lines.append("") # Get section template template = self._get_section_template(section, context_type) context_lines.append(template) context_lines.append("") context_content = '\n'.join(context_lines) # Cache for future use if self.memory: await self.memory.store( cache_key, context_content, namespace="context", ttl_seconds=3600 # 1 hour cache ) return context_content def _get_section_template(self, section: str, context_type: str) -> str: """Get template for a section based on context type.""" # This would integrate with learned patterns from memory # For now, return basic templates templates = { 'Debugging Workflow': """ 1. **Reproduce** the issue reliably 2. **Isolate** the failing component 3. **Hypothesize** potential causes 4. **Test** hypotheses systematically 5. **Fix** and verify 6. **Document** the solution """.strip(), 'Error Patterns': """ Common error patterns and solutions (learned from history): - Check logs first - Verify dependencies - Validate inputs - Review recent changes """.strip(), 'Feature Development': """ 1. **Design** - Plan the feature architecture 2. **Implement** - Write the core functionality 3. **Test** - Comprehensive test coverage 4. **Document** - Update relevant docs 5. **Review** - Code review and refinement """.strip(), 'Code Quality': """ Quality standards: - Readability > Cleverness - Modular design (functions < 50 LOC) - Comprehensive error handling - Type safety where applicable """.strip() } return templates.get(section, f"_{section} context template_") def calculate_optimization_impact( self, original: str, optimized: str ) -> Dict[str, Any]: """ Calculate the impact of optimization. Returns metrics for analysis and reporting. """ original_tokens = self.estimate_tokens(original) optimized_tokens = self.estimate_tokens(optimized) original_sections = original.count('\n## ') optimized_sections = optimized.count('\n## ') return { 'tokens_saved': original_tokens - optimized_tokens, 'compression_ratio': original_tokens / optimized_tokens if optimized_tokens > 0 else 1.0, 'sections_removed': original_sections - optimized_sections, 'size_reduction_percent': (1 - optimized_tokens / original_tokens) * 100, 'estimated_cost_savings_percent': (1 - optimized_tokens / original_tokens) * 100, 'readability_score': self._calculate_readability(optimized) } def _calculate_readability(self, content: str) -> float: """ Calculate readability score (0-100). Based on: - Average line length - Code/text ratio - Structure clarity """ lines = [l for l in content.split('\n') if l.strip()] if not lines: return 0.0 # Average line length (prefer 60-80 chars) avg_line_length = sum(len(l) for l in lines) / len(lines) length_score = max(0, 100 - abs(avg_line_length - 70)) # Structure (headings, lists) structure_score = min(100, ( content.count('\n## ') * 10 + content.count('\n- ') * 2 + content.count('\n### ') * 5 )) # Prefer less code in main context code_ratio = content.count('```') / len(lines) if lines else 0 code_score = max(0, 100 - code_ratio * 200) return (length_score + structure_score + code_score) / 3

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/airmcp-com/mcp-standards'

If you have feedback or need assistance with the MCP directory API, please join our Discord server