Skip to main content
Glama

Obsidian Semantic MCP Server

semantic-chunk-index.ts8.76 kB
import { Chunk, SemanticSegment, ExpandedContext, SearchOptions, ContextualFragment } from '../types/fragment.js'; /** * Semantic chunking with context preservation * Splits documents into meaningful chunks and maintains relationships */ export class SemanticChunkIndex { private chunks = new Map<string, Chunk>(); private chunkGraph = new Map<string, Set<string>>(); // chunk relationships private termChunkIndex = new Map<string, Set<string>>(); private filePathMap = new Map<string, string>(); indexDocument(docId: string, filePath: string, content: string) { const semanticChunks = this.createSemanticChunks(content); semanticChunks.forEach((chunk, idx) => { const chunkId = `${docId}:${idx}`; // Store chunk with context this.chunks.set(chunkId, { id: chunkId, docId, content: chunk.text, context: { before: chunk.before, after: chunk.after, type: chunk.type // paragraph, list, heading, etc }, metadata: { start: chunk.start, end: chunk.end, depth: chunk.depth } }); // Build relationships if (idx > 0) { this.addChunkRelation(chunkId, `${docId}:${idx-1}`); } if (idx < semanticChunks.length - 1) { this.addChunkRelation(chunkId, `${docId}:${idx+1}`); } // Index terms const terms = this.extractTerms(chunk.text); terms.forEach(term => { if (!this.termChunkIndex.has(term)) { this.termChunkIndex.set(term, new Set()); } this.termChunkIndex.get(term)!.add(chunkId); }); }); // Store file path mapping this.filePathMap.set(docId, filePath); } searchWithContext(query: string, options: SearchOptions = {}): ContextualFragment[] { const { maxFragments = 5, includeContext = true, expandNeighbors = true } = options; // Handle undefined or empty query if (!query || query.trim().length === 0) { return []; } const queryTerms = this.extractTerms(query); const chunkScores = new Map<string, number>(); // Score chunks based on term overlap queryTerms.forEach(term => { const chunks = this.termChunkIndex.get(term); if (chunks) { chunks.forEach(chunkId => { chunkScores.set(chunkId, (chunkScores.get(chunkId) || 0) + 1); }); } }); // Boost scores based on chunk relationships if (expandNeighbors) { this.boostNeighborScores(chunkScores); } // Select top chunks with context const topChunks = Array.from(chunkScores.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, maxFragments); return topChunks.map(([chunkId, score]) => { const chunk = this.chunks.get(chunkId)!; const filePath = this.filePathMap.get(chunk.docId)!; const doc = this.getDocumentContent(chunk.docId); return { id: chunkId, docId: chunk.docId, docPath: filePath, content: chunk.content, score, lineStart: this.getLineNumber(doc, chunk.metadata.start), lineEnd: this.getLineNumber(doc, chunk.metadata.end), context: includeContext ? this.gatherContext(chunkId) : undefined, metadata: { ...chunk.metadata, chunkType: chunk.context.type } }; }); } private createSemanticChunks(content: string): SemanticSegment[] { const segments: SemanticSegment[] = []; // Split by multiple indicators const lines = content.split('\n'); let currentSegment: string[] = []; let segmentStart = 0; let charOffset = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const trimmed = line.trim(); // Detect semantic boundaries const isHeading = /^#+\s/.test(trimmed) || /^[A-Z][^.!?]*:$/.test(trimmed); const isListStart = /^[-*•]\s/.test(trimmed) || /^\d+\.\s/.test(trimmed); const isEmptyLine = trimmed.length === 0; const isLongParagraph = currentSegment.join(' ').length > 500; const isCodeBlock = trimmed.startsWith('```'); const inList = currentSegment.length > 0 && this.detectSegmentType(currentSegment) === 'list'; // Decide whether to start new segment if ((isHeading || (isEmptyLine && currentSegment.length > 0 && !inList) || isLongParagraph || isCodeBlock) && !isListStart) { if (currentSegment.length > 0) { segments.push({ text: currentSegment.join('\n').trim(), type: this.detectSegmentType(currentSegment), start: segmentStart, end: charOffset - 1, depth: this.calculateDepth(currentSegment), before: segments.length > 0 ? segments[segments.length - 1].text.slice(-100) : '', after: '' // Will be filled later }); currentSegment = []; segmentStart = charOffset; } } if (trimmed.length > 0 || isCodeBlock) { currentSegment.push(line); } charOffset += line.length + 1; // +1 for newline } // Add final segment if (currentSegment.length > 0) { segments.push({ text: currentSegment.join('\n').trim(), type: this.detectSegmentType(currentSegment), start: segmentStart, end: charOffset - 1, depth: this.calculateDepth(currentSegment), before: segments.length > 0 ? segments[segments.length - 1].text.slice(-100) : '', after: '' }); } // Fill in 'after' context for (let i = 0; i < segments.length - 1; i++) { segments[i].after = segments[i + 1].text.slice(0, 100); } return segments; } private detectSegmentType(lines: string[]): string { const firstLine = lines[0]?.trim() || ''; if (/^#+\s/.test(firstLine)) return 'heading'; if (/^```/.test(firstLine)) return 'code'; if (/^[-*•]\s/.test(firstLine) || /^\d+\.\s/.test(firstLine)) return 'list'; if (/^[A-Z][^.!?]*:$/.test(firstLine)) return 'section'; if (lines.every(l => l.trim().startsWith('>'))) return 'quote'; return 'paragraph'; } private calculateDepth(lines: string[]): number { // Calculate semantic depth/importance const avgLineLength = lines.reduce((sum, l) => sum + l.length, 0) / lines.length; const hasCapitals = lines.some(l => /[A-Z]/.test(l)); const hasPunctuation = lines.some(l => /[.!?]/.test(l)); let depth = 1; if (avgLineLength > 50) depth++; if (hasCapitals) depth++; if (hasPunctuation) depth++; return depth; } private extractTerms(text: string): string[] { return text.toLowerCase() .replace(/[^\w\s-]/g, ' ') .split(/\s+/) .filter(t => t.length > 2); } private addChunkRelation(chunk1: string, chunk2: string) { if (!this.chunkGraph.has(chunk1)) { this.chunkGraph.set(chunk1, new Set()); } if (!this.chunkGraph.has(chunk2)) { this.chunkGraph.set(chunk2, new Set()); } this.chunkGraph.get(chunk1)!.add(chunk2); this.chunkGraph.get(chunk2)!.add(chunk1); } private boostNeighborScores(scores: Map<string, number>) { const boosts = new Map<string, number>(); scores.forEach((score, chunkId) => { const neighbors = this.chunkGraph.get(chunkId) || new Set(); neighbors.forEach(neighbor => { // Give a small boost to neighboring chunks boosts.set(neighbor, (boosts.get(neighbor) || 0) + score * 0.1); }); }); // Apply boosts boosts.forEach((boost, chunkId) => { scores.set(chunkId, (scores.get(chunkId) || 0) + boost); }); } private gatherContext(chunkId: string): ExpandedContext { const neighbors = this.chunkGraph.get(chunkId) || new Set(); const chunk = this.chunks.get(chunkId)!; return { before: chunk.context.before, after: chunk.context.after, related: Array.from(neighbors).map(id => ({ id, preview: this.chunks.get(id)?.content.slice(0, 100) || '' })) }; } private getDocumentContent(docId: string): string { // Reconstruct document from chunks for line number calculation const docChunks = Array.from(this.chunks.values()) .filter(chunk => chunk.docId === docId) .sort((a, b) => a.metadata.start - b.metadata.start); return docChunks.map(chunk => chunk.content).join('\n'); } private getLineNumber(content: string, position: number): number { const lines = content.substring(0, position).split('\n'); return lines.length; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aaronsb/obsidian-semantic-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server