/**
* Token measurement engine for analyzing MCP tool schema token consumption
*/
export class TokenMeasurementEngine {
constructor() {
this.tokenizer = null;
this.initializeTokenizer();
}
/**
* Initialize the tokenizer (placeholder for Anthropic's tokenizer)
*/
initializeTokenizer() {
// In a real implementation, this would use @anthropic-ai/tokenizer
// For now, we'll implement a simple approximation
this.tokenizer = {
encode: (text) => this.approximateTokenCount(text)
};
}
/**
* Approximate token count using character-based estimation
* Based on: ~1 token = 4 characters for English text
* @param {string} text Text to count tokens for
* @returns {number} Estimated token count
*/
approximateTokenCount(text) {
if (!text || typeof text !== 'string') return 0;
// More accurate approximation for code/JSON
const words = text.split(/\s+/).filter(word => word.length > 0);
const characters = text.length;
// JSON and code tend to be more token-dense than natural language
// Use a more conservative ratio for technical content
const avgTokenLength = text.includes('{') || text.includes('[') ? 3.5 : 4;
return Math.ceil(characters / avgTokenLength);
}
/**
* Count tokens in a tool definition (handles both real MCP and simulated formats)
* @param {Object} tool Tool definition object
* @returns {Object} Token breakdown for the tool
*/
countToolTokens(tool) {
const breakdown = {
name: this.tokenizer.encode(tool.name || ''),
title: this.tokenizer.encode(tool.title || ''), // MCP tools have optional title
description: this.tokenizer.encode(tool.description || ''),
inputSchema: this.tokenizer.encode(JSON.stringify(tool.inputSchema || {}, null, 2)),
outputSchema: this.tokenizer.encode(JSON.stringify(tool.outputSchema || {}, null, 2)), // NEW: output schema
annotations: this.tokenizer.encode(JSON.stringify(tool.annotations || {}, null, 2)),
additionalFields: 0, // NEW: for any other fields we find
total: 0
};
// Count additional fields beyond the standard ones
const standardFields = new Set(['name', 'title', 'description', 'inputSchema', 'outputSchema', 'annotations']);
Object.keys(tool).forEach(field => {
if (!standardFields.has(field)) {
const fieldValue = tool[field];
if (fieldValue !== undefined && fieldValue !== null) {
const fieldContent = typeof fieldValue === 'object'
? JSON.stringify(fieldValue, null, 2)
: String(fieldValue);
breakdown.additionalFields += this.tokenizer.encode(fieldContent);
}
}
});
breakdown.total = breakdown.name + breakdown.title + breakdown.description +
breakdown.inputSchema + breakdown.outputSchema + breakdown.annotations +
breakdown.additionalFields;
return breakdown;
}
/**
* Count tokens for an entire server's tool collection
* @param {Object} serverResult Server extraction result
* @returns {Object} Server token analysis
*/
countServerTokens(serverResult) {
if (!serverResult.success || !serverResult.tools) {
return {
serverName: serverResult.serverName,
success: false,
error: serverResult.error,
totalTokens: 0,
tools: []
};
}
const toolAnalysis = serverResult.tools.map(tool => ({
name: tool.name,
tokens: this.countToolTokens(tool),
complexity: this.analyzeSchemaComplexity(tool.inputSchema)
}));
const totalTokens = toolAnalysis.reduce((sum, tool) => sum + tool.tokens.total, 0);
return {
serverName: serverResult.serverName,
success: true,
totalTokens,
toolCount: toolAnalysis.length,
averageTokensPerTool: Math.round(totalTokens / toolAnalysis.length),
tools: toolAnalysis,
summary: this.generateServerTokenSummary(toolAnalysis)
};
}
/**
* Analyze schema complexity metrics
* @param {Object} schema JSON schema object
* @returns {Object} Complexity metrics
*/
analyzeSchemaComplexity(schema) {
if (!schema || typeof schema !== 'object') {
return { depth: 0, properties: 0, complexity: 'simple' };
}
const depth = this.calculateSchemaDepth(schema);
const properties = this.countSchemaProperties(schema);
let complexity = 'simple';
if (depth > 3 || properties > 10) {
complexity = 'complex';
} else if (depth > 2 || properties > 5) {
complexity = 'moderate';
}
return {
depth,
properties,
complexity,
hasArrays: this.hasArrayTypes(schema),
hasNestedObjects: this.hasNestedObjects(schema),
hasEnums: this.hasEnumTypes(schema)
};
}
/**
* Calculate the maximum depth of a JSON schema
* @param {Object} schema JSON schema object
* @param {number} currentDepth Current depth level
* @returns {number} Maximum depth
*/
calculateSchemaDepth(schema, currentDepth = 0) {
if (!schema || typeof schema !== 'object') return currentDepth;
let maxDepth = currentDepth;
if (schema.properties) {
for (const prop of Object.values(schema.properties)) {
const depth = this.calculateSchemaDepth(prop, currentDepth + 1);
maxDepth = Math.max(maxDepth, depth);
}
}
if (schema.items) {
const depth = this.calculateSchemaDepth(schema.items, currentDepth + 1);
maxDepth = Math.max(maxDepth, depth);
}
return maxDepth;
}
/**
* Count total properties in a schema (including nested)
* @param {Object} schema JSON schema object
* @returns {number} Total property count
*/
countSchemaProperties(schema) {
if (!schema || typeof schema !== 'object') return 0;
let count = 0;
if (schema.properties) {
count += Object.keys(schema.properties).length;
for (const prop of Object.values(schema.properties)) {
count += this.countSchemaProperties(prop);
}
}
if (schema.items) {
count += this.countSchemaProperties(schema.items);
}
return count;
}
/**
* Check if schema contains array types
* @param {Object} schema JSON schema object
* @returns {boolean} Whether schema has arrays
*/
hasArrayTypes(schema) {
if (!schema || typeof schema !== 'object') return false;
if (schema.type === 'array') return true;
if (schema.properties) {
return Object.values(schema.properties).some(prop => this.hasArrayTypes(prop));
}
return false;
}
/**
* Check if schema has nested objects
* @param {Object} schema JSON schema object
* @returns {boolean} Whether schema has nested objects
*/
hasNestedObjects(schema) {
if (!schema || typeof schema !== 'object') return false;
if (schema.properties) {
return Object.values(schema.properties).some(prop =>
prop.type === 'object' || this.hasNestedObjects(prop)
);
}
return false;
}
/**
* Check if schema contains enum types
* @param {Object} schema JSON schema object
* @returns {boolean} Whether schema has enums
*/
hasEnumTypes(schema) {
if (!schema || typeof schema !== 'object') return false;
if (schema.enum) return true;
if (schema.properties) {
return Object.values(schema.properties).some(prop => this.hasEnumTypes(prop));
}
return false;
}
/**
* Generate summary statistics for server token usage
* @param {Array} toolAnalysis Array of tool token analyses
* @returns {Object} Summary statistics
*/
generateServerTokenSummary(toolAnalysis) {
if (toolAnalysis.length === 0) {
return { heaviestTool: null, lightestTool: null, complexityDistribution: {} };
}
const sortedByTokens = toolAnalysis.sort((a, b) => b.tokens.total - a.tokens.total);
const complexityDistribution = toolAnalysis.reduce((acc, tool) => {
const complexity = tool.complexity.complexity;
acc[complexity] = (acc[complexity] || 0) + 1;
return acc;
}, {});
return {
heaviestTool: {
name: sortedByTokens[0].name,
tokens: sortedByTokens[0].tokens.total
},
lightestTool: {
name: sortedByTokens[sortedByTokens.length - 1].name,
tokens: sortedByTokens[sortedByTokens.length - 1].tokens.total
},
complexityDistribution,
averageComplexity: this.calculateAverageComplexity(toolAnalysis)
};
}
/**
* Calculate average complexity score
* @param {Array} toolAnalysis Array of tool analyses
* @returns {number} Average complexity score
*/
calculateAverageComplexity(toolAnalysis) {
const complexityScores = toolAnalysis.map(tool => {
switch (tool.complexity.complexity) {
case 'simple': return 1;
case 'moderate': return 2;
case 'complex': return 3;
default: return 1;
}
});
return complexityScores.reduce((sum, score) => sum + score, 0) / complexityScores.length;
}
/**
* Measure baseline token usage (built-in Claude Code tools)
* @returns {Object} Baseline token measurements
*/
measureBaselineTokens() {
// Simulate built-in Claude Code tools
const builtInTools = [
'Task', 'Bash', 'Grep', 'Read', 'Edit', 'MultiEdit', 'Write',
'WebFetch', 'WebSearch', 'TodoWrite', 'NotebookRead', 'NotebookEdit',
'LS', 'Glob', 'ExitPlanMode'
];
// Estimate tokens for built-in tools based on research
// Each built-in tool likely has moderate complexity
const estimatedTokensPerTool = 150; // Conservative estimate
const totalBuiltInTokens = builtInTools.length * estimatedTokensPerTool;
return {
builtInTools: builtInTools.length,
estimatedTokensPerTool,
totalBuiltInTokens,
baseline: {
contextWithoutMCP: 1000, // Research baseline
contextWithBuiltIns: 1000 + totalBuiltInTokens
}
};
}
/**
* Calculate total MCP overhead
* @param {Array} serverResults Array of server token analyses
* @returns {Object} Total overhead calculation
*/
calculateTotalOverhead(serverResults) {
const baseline = this.measureBaselineTokens();
const mcpTokens = serverResults.reduce((sum, server) => sum + (server.totalTokens || 0), 0);
const totalOverhead = baseline.totalBuiltInTokens + mcpTokens;
const contextWindow = 200000; // 200k token context window
const overheadPercentage = (totalOverhead / contextWindow) * 100;
return {
baseline: baseline.baseline.contextWithoutMCP,
builtInTokens: baseline.totalBuiltInTokens,
mcpTokens,
totalOverhead,
contextWindow,
overheadPercentage: Math.round(overheadPercentage * 100) / 100,
remainingContext: contextWindow - totalOverhead,
effectiveContext: Math.max(0, contextWindow - totalOverhead)
};
}
}