optimizer.py•24.9 kB
"""
Context Optimization Engine
Implements intelligent CLAUDE.md optimization using:
1. Token reduction strategies (23K → 5K)
2. Template selection based on project analysis
3. Semantic compression using AgentDB
4. Dynamic context loading with /prime commands
Based on research findings:
- ACE (Agentic Context Engineering) 5-layer model
- Quality over quantity approach
- Progressive disclosure patterns
"""
import re
import logging
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple, Set
from dataclasses import dataclass, asdict
from datetime import datetime
import hashlib
import json
logger = logging.getLogger(__name__)
@dataclass
class ContextMetrics:
"""Metrics for tracking context optimization."""
token_count: int
section_count: int
compression_ratio: float
semantic_density: float
last_optimized: datetime
optimization_version: str
def to_dict(self) -> Dict[str, Any]:
result = asdict(self)
result['last_optimized'] = self.last_optimized.isoformat()
return result
@dataclass
class TemplateMatch:
"""Result of template matching analysis."""
template_id: str
confidence: float
project_type: str
detected_patterns: List[str]
recommended_sections: List[str]
estimated_tokens: int
class ContextOptimizer:
"""
Intelligent CLAUDE.md optimization engine.
Capabilities:
- Token reduction: 23K → 5K via semantic compression
- Template selection: 15+ project-specific templates
- Dynamic sections: Load on-demand via /prime commands
- Learning: Adapt based on usage patterns
"""
# Token budget targets (based on research)
TOKEN_BUDGET = {
'global': 3000, # Global CLAUDE.md
'project': 5000, # Project CLAUDE.md
'prime_context': 2000, # Per /prime command
'total_budget': 10000 # Maximum total context
}
# Core sections that should always be present
CORE_SECTIONS = {
'global': [
'Core Principles',
'Essential Rules',
'Tool Optimization'
],
'project': [
'Project Overview',
'Quick Commands',
'File Organization'
]
}
# Extended sections that can be loaded on-demand
EXTENDED_SECTIONS = {
'bug': ['Debugging Workflow', 'Error Patterns', 'Testing Standards'],
'feature': ['Feature Development', 'Design Patterns', 'Integration Points'],
'refactor': ['Code Quality', 'Refactoring Patterns', 'Performance Optimization'],
'docs': ['Documentation Standards', 'API Documentation', 'Examples'],
'test': ['Testing Strategies', 'Test Patterns', 'Coverage Requirements']
}
# Template database for different project types
PROJECT_TEMPLATES = {
'python-backend': {
'markers': ['pyproject.toml', 'requirements.txt', 'setup.py'],
'core_sections': ['Python Best Practices', 'Package Management', 'Testing'],
'tools': ['uv', 'pytest', 'ruff', 'mypy'],
'token_baseline': 4000
},
'javascript-frontend': {
'markers': ['package.json', 'tsconfig.json', 'vite.config.ts'],
'core_sections': ['JavaScript Standards', 'Build Configuration', 'Testing'],
'tools': ['npm', 'pnpm', 'vitest', 'eslint'],
'token_baseline': 3500
},
'fullstack': {
'markers': ['package.json', 'pyproject.toml'],
'core_sections': ['Backend Standards', 'Frontend Standards', 'API Design'],
'tools': ['docker', 'docker-compose'],
'token_baseline': 5500
},
'mcp-server': {
'markers': ['src/mcp_standards/', 'npx-mcp'],
'core_sections': ['MCP Protocol', 'Tool Implementation', 'Testing'],
'tools': ['mcp', 'claude-flow'],
'token_baseline': 4500
},
'research': {
'markers': ['docs/', 'research/', 'analysis/'],
'core_sections': ['Research Workflow', 'Documentation', 'Collaboration'],
'tools': ['markdown', 'citations'],
'token_baseline': 3000
}
}
def __init__(self, memory_system=None, agentdb=None):
"""
Initialize context optimizer.
Args:
memory_system: PersistentMemory for storing optimizations
agentdb: AgentDB instance for semantic operations
"""
self.memory = memory_system
self.agentdb = agentdb
# Cache of optimized contexts
self._context_cache: Dict[str, Tuple[str, ContextMetrics]] = {}
# Token estimation patterns
self._token_patterns = self._build_token_patterns()
def _build_token_patterns(self) -> Dict[str, int]:
"""Build token estimation patterns based on research."""
return {
# Markdown structure tokens
'heading_h1': 3,
'heading_h2': 3,
'heading_h3': 3,
'bullet_point': 2,
'code_block_delimiter': 2,
'code_line': 8,
'paragraph': 15,
# Content type multipliers
'command': 5,
'example': 20,
'explanation': 12,
'rule': 8,
# Average tokens per section type
'principles': 300,
'quick_reference': 200,
'workflow': 400,
'examples': 500,
'agents': 600,
'mcp_tools': 800
}
def estimate_tokens(self, content: str) -> int:
"""
Estimate token count for content.
Uses Claude's approximate tokenization:
- ~4 characters per token for English
- Adjust for markdown structure
Args:
content: Text content to estimate
Returns:
Estimated token count
"""
# Base estimation: 4 chars per token
base_tokens = len(content) / 4
# Adjust for markdown structure
markdown_overhead = 0
markdown_overhead += content.count('\n##') * self._token_patterns['heading_h2']
markdown_overhead += content.count('\n###') * self._token_patterns['heading_h3']
markdown_overhead += content.count('\n- ') * self._token_patterns['bullet_point']
markdown_overhead += content.count('```') / 2 * self._token_patterns['code_block_delimiter']
# Code blocks are typically more token-dense
code_blocks = re.findall(r'```[\s\S]*?```', content)
code_tokens = sum(len(block) / 3 for block in code_blocks) # 3 chars/token for code
# Non-code tokens
non_code_content = re.sub(r'```[\s\S]*?```', '', content)
text_tokens = len(non_code_content) / 4
return int(text_tokens + code_tokens + markdown_overhead)
def analyze_project_type(self, project_path: Path) -> TemplateMatch:
"""
Analyze project to determine optimal template.
Args:
project_path: Path to project root
Returns:
TemplateMatch with template recommendation
"""
detected_patterns = []
template_scores = {}
# Check for template markers
for template_id, template_config in self.PROJECT_TEMPLATES.items():
score = 0.0
markers_found = []
for marker in template_config['markers']:
marker_path = project_path / marker
if marker_path.exists():
score += 1.0
markers_found.append(marker)
if markers_found:
template_scores[template_id] = {
'score': score / len(template_config['markers']),
'markers': markers_found,
'config': template_config
}
if not template_scores:
# Default to research template
template_id = 'research'
config = self.PROJECT_TEMPLATES[template_id]
return TemplateMatch(
template_id=template_id,
confidence=0.5,
project_type='unknown',
detected_patterns=[],
recommended_sections=config['core_sections'],
estimated_tokens=config['token_baseline']
)
# Get best match
best_template = max(template_scores.items(), key=lambda x: x[1]['score'])
template_id, template_data = best_template
config = template_data['config']
return TemplateMatch(
template_id=template_id,
confidence=template_data['score'],
project_type=template_id.replace('-', ' ').title(),
detected_patterns=template_data['markers'],
recommended_sections=config['core_sections'],
estimated_tokens=config['token_baseline']
)
def optimize_content(
self,
content: str,
target_tokens: int,
preserve_sections: Optional[List[str]] = None
) -> Tuple[str, ContextMetrics]:
"""
Optimize CLAUDE.md content to target token count.
Strategy:
1. Extract and preserve core sections
2. Compress extended sections using semantic methods
3. Remove redundant information
4. Apply progressive disclosure
Args:
content: Original CLAUDE.md content
target_tokens: Target token count
preserve_sections: Sections that must be kept
Returns:
(optimized_content, metrics) tuple
"""
current_tokens = self.estimate_tokens(content)
if current_tokens <= target_tokens:
# Already within budget
metrics = ContextMetrics(
token_count=current_tokens,
section_count=content.count('\n## '),
compression_ratio=1.0,
semantic_density=1.0,
last_optimized=datetime.now(),
optimization_version='1.0'
)
return content, metrics
# Extract sections
sections = self._extract_sections(content)
preserve_sections = preserve_sections or []
# Start with preserved sections
optimized_sections = {}
preserved_tokens = 0
for section_name, section_content in sections.items():
if self._is_core_section(section_name, preserve_sections):
optimized_sections[section_name] = section_content
preserved_tokens += self.estimate_tokens(section_content)
remaining_budget = target_tokens - preserved_tokens
if remaining_budget <= 0:
# Core sections already exceed budget - aggressive compression needed
return self._aggressive_compress(sections, target_tokens)
# Add non-core sections based on importance
remaining_sections = {
k: v for k, v in sections.items()
if k not in optimized_sections
}
# Score sections by importance
section_scores = self._score_sections(remaining_sections)
sorted_sections = sorted(
section_scores.items(),
key=lambda x: x[1],
reverse=True
)
# Add sections until budget exhausted
for section_name, score in sorted_sections:
section_content = remaining_sections[section_name]
section_tokens = self.estimate_tokens(section_content)
if section_tokens <= remaining_budget:
optimized_sections[section_name] = section_content
remaining_budget -= section_tokens
else:
# Try to compress section to fit
compressed = self._compress_section(
section_content,
remaining_budget
)
if compressed:
optimized_sections[section_name] = compressed
break
# Rebuild content
optimized_content = self._rebuild_content(optimized_sections)
# Add progressive disclosure footer
omitted_sections = set(sections.keys()) - set(optimized_sections.keys())
if omitted_sections:
footer = self._generate_disclosure_footer(omitted_sections)
optimized_content += '\n\n' + footer
# Calculate metrics
final_tokens = self.estimate_tokens(optimized_content)
metrics = ContextMetrics(
token_count=final_tokens,
section_count=len(optimized_sections),
compression_ratio=current_tokens / final_tokens,
semantic_density=len(optimized_sections) / final_tokens * 1000,
last_optimized=datetime.now(),
optimization_version='1.0'
)
logger.info(
f"Optimized content: {current_tokens} → {final_tokens} tokens "
f"({metrics.compression_ratio:.2f}x compression)"
)
return optimized_content, metrics
def _is_core_section(self, section_name: str, preserve: List[str]) -> bool:
"""Check if section is core or explicitly preserved."""
if section_name in preserve:
return True
# Check against core sections for both global and project
for core_list in self.CORE_SECTIONS.values():
if any(core.lower() in section_name.lower() for core in core_list):
return True
return False
def _extract_sections(self, content: str) -> Dict[str, str]:
"""Extract markdown sections from content."""
sections = {}
current_section = None
current_content = []
for line in content.split('\n'):
if line.startswith('## '):
# Save previous section
if current_section:
sections[current_section] = '\n'.join(current_content).strip()
# Start new section
current_section = line[3:].strip()
current_content = []
elif current_section:
current_content.append(line)
# Save last section
if current_section:
sections[current_section] = '\n'.join(current_content).strip()
return sections
def _score_sections(self, sections: Dict[str, str]) -> Dict[str, float]:
"""
Score sections by importance.
Criteria:
- Length: Shorter sections preferred (less bloat)
- Keywords: Important keywords increase score
- Structure: Well-structured content scores higher
"""
scores = {}
important_keywords = {
'must', 'required', 'critical', 'always', 'never',
'important', 'essential', 'mandatory', 'rule'
}
for section_name, section_content in sections.items():
score = 0.0
# Base score from section name
if any(kw in section_name.lower() for kw in ['core', 'essential', 'rule']):
score += 50.0
# Keyword presence
content_lower = section_content.lower()
keyword_count = sum(1 for kw in important_keywords if kw in content_lower)
score += keyword_count * 5.0
# Prefer concise sections
section_tokens = self.estimate_tokens(section_content)
if section_tokens < 200:
score += 20.0
elif section_tokens < 500:
score += 10.0
# Structure bonus (lists, code examples)
if '- ' in section_content:
score += 10.0
if '```' in section_content:
score += 15.0
scores[section_name] = score
return scores
def _compress_section(self, content: str, max_tokens: int) -> Optional[str]:
"""
Compress section to fit token budget.
Strategies:
1. Remove examples while keeping rules
2. Condense lists
3. Remove redundant explanations
"""
current_tokens = self.estimate_tokens(content)
if current_tokens <= max_tokens:
return content
# Strategy 1: Remove code examples
compressed = re.sub(r'```[\s\S]*?```\n*', '', content)
if self.estimate_tokens(compressed) <= max_tokens:
return compressed + '\n\n_Code examples available via /prime_'
# Strategy 2: Keep only first sentence of paragraphs
lines = compressed.split('\n')
condensed_lines = []
for line in lines:
if line.startswith('- ') or line.startswith('#'):
condensed_lines.append(line)
elif line.strip():
# Keep first sentence only
first_sentence = line.split('. ')[0] + '.'
condensed_lines.append(first_sentence)
condensed = '\n'.join(condensed_lines)
if self.estimate_tokens(condensed) <= max_tokens:
return condensed
# Can't compress enough
return None
def _aggressive_compress(
self,
sections: Dict[str, str],
target_tokens: int
) -> Tuple[str, ContextMetrics]:
"""
Aggressive compression when even core sections exceed budget.
This should rarely happen - indicates CLAUDE.md needs restructuring.
"""
logger.warning(
f"Aggressive compression needed - core sections exceed {target_tokens} tokens"
)
# Keep only the absolute essentials
essential_content = []
essential_content.append("# Claude Code Configuration\n")
essential_content.append("**Auto-optimized for token efficiency**\n")
essential_content.append("\n## Core Principles\n")
essential_content.append("- Evidence > Assumptions")
essential_content.append("- Code > Documentation")
essential_content.append("- Efficiency > Verbosity")
essential_content.append("\n## Extended Context Available\n")
essential_content.append("Use `/prime-<context>` commands to load task-specific context:")
# List available contexts
for section_name in sections.keys():
essential_content.append(f"- `/prime-{section_name.lower().replace(' ', '-')}`")
compressed = '\n'.join(essential_content)
metrics = ContextMetrics(
token_count=self.estimate_tokens(compressed),
section_count=2,
compression_ratio=99.0, # Extreme compression
semantic_density=0.1,
last_optimized=datetime.now(),
optimization_version='1.0-aggressive'
)
return compressed, metrics
def _rebuild_content(self, sections: Dict[str, str]) -> str:
"""Rebuild markdown content from sections."""
lines = ["# Claude Code Configuration\n"]
lines.append(f"_Last optimized: {datetime.now().strftime('%Y-%m-%d %H:%M')}_\n")
for section_name, section_content in sections.items():
lines.append(f"\n## {section_name}\n")
lines.append(section_content)
return '\n'.join(lines)
def _generate_disclosure_footer(self, omitted_sections: Set[str]) -> str:
"""Generate progressive disclosure footer."""
footer_lines = [
"---",
"## 📚 Extended Context Available",
"",
"The following sections are available via `/prime` commands:",
""
]
for section in sorted(omitted_sections):
command = section.lower().replace(' ', '-').replace('&', 'and')
footer_lines.append(f"- **{section}**: `/prime-{command}`")
footer_lines.append("")
footer_lines.append("_Load contexts on-demand to minimize token usage._")
return '\n'.join(footer_lines)
async def generate_prime_context(
self,
context_type: str,
project_path: Optional[Path] = None
) -> Optional[str]:
"""
Generate context for /prime-<type> command.
Args:
context_type: Type of context (bug, feature, refactor, etc.)
project_path: Project path for project-specific context
Returns:
Context content or None if type unknown
"""
if context_type not in self.EXTENDED_SECTIONS:
logger.warning(f"Unknown prime context type: {context_type}")
return None
# Get relevant sections
sections_to_load = self.EXTENDED_SECTIONS[context_type]
# Check memory for cached context
cache_key = f"prime_context_{context_type}"
if self.memory:
cached = await self.memory.retrieve(cache_key, namespace="context")
if cached:
return cached
# Build context from templates and learned patterns
context_lines = [
f"# {context_type.title()} Context",
f"_Loaded: {datetime.now().strftime('%Y-%m-%d %H:%M')}_",
""
]
for section in sections_to_load:
context_lines.append(f"## {section}")
context_lines.append("")
# Get section template
template = self._get_section_template(section, context_type)
context_lines.append(template)
context_lines.append("")
context_content = '\n'.join(context_lines)
# Cache for future use
if self.memory:
await self.memory.store(
cache_key,
context_content,
namespace="context",
ttl_seconds=3600 # 1 hour cache
)
return context_content
def _get_section_template(self, section: str, context_type: str) -> str:
"""Get template for a section based on context type."""
# This would integrate with learned patterns from memory
# For now, return basic templates
templates = {
'Debugging Workflow': """
1. **Reproduce** the issue reliably
2. **Isolate** the failing component
3. **Hypothesize** potential causes
4. **Test** hypotheses systematically
5. **Fix** and verify
6. **Document** the solution
""".strip(),
'Error Patterns': """
Common error patterns and solutions (learned from history):
- Check logs first
- Verify dependencies
- Validate inputs
- Review recent changes
""".strip(),
'Feature Development': """
1. **Design** - Plan the feature architecture
2. **Implement** - Write the core functionality
3. **Test** - Comprehensive test coverage
4. **Document** - Update relevant docs
5. **Review** - Code review and refinement
""".strip(),
'Code Quality': """
Quality standards:
- Readability > Cleverness
- Modular design (functions < 50 LOC)
- Comprehensive error handling
- Type safety where applicable
""".strip()
}
return templates.get(section, f"_{section} context template_")
def calculate_optimization_impact(
self,
original: str,
optimized: str
) -> Dict[str, Any]:
"""
Calculate the impact of optimization.
Returns metrics for analysis and reporting.
"""
original_tokens = self.estimate_tokens(original)
optimized_tokens = self.estimate_tokens(optimized)
original_sections = original.count('\n## ')
optimized_sections = optimized.count('\n## ')
return {
'tokens_saved': original_tokens - optimized_tokens,
'compression_ratio': original_tokens / optimized_tokens if optimized_tokens > 0 else 1.0,
'sections_removed': original_sections - optimized_sections,
'size_reduction_percent': (1 - optimized_tokens / original_tokens) * 100,
'estimated_cost_savings_percent': (1 - optimized_tokens / original_tokens) * 100,
'readability_score': self._calculate_readability(optimized)
}
def _calculate_readability(self, content: str) -> float:
"""
Calculate readability score (0-100).
Based on:
- Average line length
- Code/text ratio
- Structure clarity
"""
lines = [l for l in content.split('\n') if l.strip()]
if not lines:
return 0.0
# Average line length (prefer 60-80 chars)
avg_line_length = sum(len(l) for l in lines) / len(lines)
length_score = max(0, 100 - abs(avg_line_length - 70))
# Structure (headings, lists)
structure_score = min(100, (
content.count('\n## ') * 10 +
content.count('\n- ') * 2 +
content.count('\n### ') * 5
))
# Prefer less code in main context
code_ratio = content.count('```') / len(lines) if lines else 0
code_score = max(0, 100 - code_ratio * 200)
return (length_score + structure_score + code_score) / 3