Skip to main content
Glama
SemanticAugmenter.ts7.08 kB
import { CodeRepresentation } from '../types/index.js'; import OpenAI from 'openai'; /** * SemanticAugmenter implements Stage 1 of SACL framework * Enriches code representations with semantic information beyond surface-level features */ export class SemanticAugmenter { private openai: OpenAI; private codeEncoder: CodeEncoder; private semanticEmbedder: SemanticEmbedder; constructor(apiKey: string, model: string = 'gpt-4') { this.openai = new OpenAI({ apiKey }); this.codeEncoder = new CodeEncoder(this.openai, model); this.semanticEmbedder = new SemanticEmbedder(this.openai); } /** * Augment code representation with semantic information * Combines base embeddings with semantic understanding to reduce textual bias */ async augmentRepresentation(code: CodeRepresentation): Promise<CodeRepresentation> { // Generate base code embedding const baseEmbedding = await this.codeEncoder.encode(code.content); // Generate semantic embedding focusing on functionality const semanticEmbedding = await this.semanticEmbedder.embed(code); // Generate functional signature and behavior pattern const { functionalSignature, behaviorPattern } = await this.extractSemanticFeatures(code); // Combine embeddings to create augmented representation const augmentedEmbedding = this.combineEmbeddings(baseEmbedding, semanticEmbedding); return { ...code, semanticFeatures: { embedding: semanticEmbedding, functionalSignature, behaviorPattern }, augmentedEmbedding }; } /** * Extract high-level semantic features using LLM analysis */ private async extractSemanticFeatures(code: CodeRepresentation): Promise<{ functionalSignature: string; behaviorPattern: string; }> { const prompt = `Analyze this code and extract semantic features, ignoring variable names and comments: CODE: ${code.content} Provide: 1. Functional signature: What does this code DO functionally (inputs/outputs/transformations) 2. Behavior pattern: What computational pattern does it follow (iteration, recursion, filtering, etc.) Focus on FUNCTIONALITY, not naming or documentation.`; try { const response = await this.openai.chat.completions.create({ model: 'gpt-4', messages: [{ role: 'user', content: prompt }], max_tokens: 500, temperature: 0.1 }); const content = response.choices[0]?.message?.content || ''; const lines = content.split('\n'); const functionalSignature = lines.find(line => line.toLowerCase().includes('functional') || line.includes('1.') )?.replace(/^[1-9\.]?\s*/, '') || 'Unknown function signature'; const behaviorPattern = lines.find(line => line.toLowerCase().includes('behavior') || line.includes('2.') )?.replace(/^[1-9\.]?\s*/, '') || 'Unknown behavior pattern'; return { functionalSignature, behaviorPattern }; } catch (error) { console.error('Error extracting semantic features:', error); return { functionalSignature: 'Error analyzing function signature', behaviorPattern: 'Error analyzing behavior pattern' }; } } /** * Combine base and semantic embeddings using weighted combination * Reduces textual bias by emphasizing semantic over surface features */ private combineEmbeddings(baseEmbedding: number[], semanticEmbedding: number[]): number[] { const maxLength = Math.max(baseEmbedding.length, semanticEmbedding.length); const combined: number[] = []; // Weight semantic features higher to reduce textual bias const semanticWeight = 0.7; const baseWeight = 0.3; for (let i = 0; i < maxLength; i++) { const base = baseEmbedding[i] || 0; const semantic = semanticEmbedding[i] || 0; combined[i] = (base * baseWeight) + (semantic * semanticWeight); } return combined; } /** * Generate explanation for the semantic augmentation process */ async generateExplanation(original: CodeRepresentation, augmented: CodeRepresentation): Promise<string> { const prompt = `Explain how this code's semantic understanding has been enhanced: ORIGINAL FEATURES: - Docstrings: ${original.textualFeatures.docstrings.length} - Comments: ${original.textualFeatures.comments.length} - Identifiers: ${original.textualFeatures.identifierNames.length} SEMANTIC FEATURES: - Functional Signature: ${augmented.semanticFeatures.functionalSignature} - Behavior Pattern: ${augmented.semanticFeatures.behaviorPattern} - Bias Score: ${augmented.biasScore.toFixed(3)} Provide a concise explanation of how semantic augmentation improves code understanding beyond surface-level text.`; try { const response = await this.openai.chat.completions.create({ model: 'gpt-4', messages: [{ role: 'user', content: prompt }], max_tokens: 300, temperature: 0.2 }); return response.choices[0]?.message?.content || 'Semantic augmentation explanation unavailable'; } catch (error) { console.error('Error generating explanation:', error); return 'Error generating semantic augmentation explanation'; } } } /** * CodeEncoder handles basic code embedding generation */ class CodeEncoder { constructor(private openai: OpenAI, private model: string) {} async encode(codeContent: string): Promise<number[]> { try { const response = await this.openai.embeddings.create({ model: 'text-embedding-3-small', input: codeContent }); return response.data[0]?.embedding || []; } catch (error) { console.error('Error generating code embedding:', error); return []; } } } /** * SemanticEmbedder focuses on functional/semantic aspects */ class SemanticEmbedder { constructor(private openai: OpenAI) {} async embed(code: CodeRepresentation): Promise<number[]> { // Create semantic-focused representation by emphasizing structural features const semanticContent = this.createSemanticRepresentation(code); try { const response = await this.openai.embeddings.create({ model: 'text-embedding-3-small', input: semanticContent }); return response.data[0]?.embedding || []; } catch (error) { console.error('Error generating semantic embedding:', error); return []; } } /** * Create a textual representation emphasizing semantic/structural aspects */ private createSemanticRepresentation(code: CodeRepresentation): string { const struct = code.structuralFeatures; return ` Computational structure: ${struct.complexity} complexity, ${struct.nestingDepth} nesting levels Components: ${struct.functionCount} functions, ${struct.classCount} classes AST nodes: ${struct.astNodes} Code patterns: ${code.semanticFeatures?.behaviorPattern || 'unknown'} Functional signature: ${code.semanticFeatures?.functionalSignature || 'unknown'} `.trim(); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ulasbilgen/sacl'

If you have feedback or need assistance with the MCP directory API, please join our Discord server