Skip to main content
Glama

Obsidian Semantic MCP Server

proximity-index.ts•7.52 kB
import { Fragment, TermPositions, TokenPosition, ProcessedDocument, PositionCluster } from '../types/fragment.js'; /** * Proximity-based fragment extraction * Finds fragments where query terms appear close together */ export class ProximityFragmentIndex { private documents = new Map<string, ProcessedDocument>(); private positionIndex = new Map<string, TermPositions[]>(); private filePathMap = new Map<string, string>(); indexDocument(docId: string, filePath: string, content: string) { const tokens = this.tokenizeWithPositions(content); // Store processed document this.documents.set(docId, { id: docId, content, tokens: tokens.map(t => t.token), positions: tokens }); // Store file path mapping this.filePathMap.set(docId, filePath); // Build position index tokens.forEach(({ token, start, end }) => { if (!this.positionIndex.has(token)) { this.positionIndex.set(token, []); } this.positionIndex.get(token)!.push({ docId, start, end }); }); } searchWithProximity(query: string, maxDistance: number = 50): Fragment[] { // Handle undefined or empty query if (!query || query.trim().length === 0) { return []; } const queryTokens = this.tokenize(query); const fragments: Fragment[] = []; // Find documents containing all query terms const docCandidates = this.findDocsWithAllTerms(queryTokens); for (const docId of docCandidates) { const doc = this.documents.get(docId)!; const filePath = this.filePathMap.get(docId)!; const termPositions = this.getTermPositionsInDoc(queryTokens, docId); // Find clusters where terms appear close together const clusters = this.findProximityClusters( termPositions, maxDistance ); // Extract fragments around clusters clusters.forEach((cluster, idx) => { const fragment = this.extractFragmentAroundCluster( doc.content, cluster, 300 // context size ); fragments.push({ id: `${docId}:prox${idx}`, docId, docPath: filePath, content: fragment.text, score: this.scoreCluster(cluster, queryTokens.length), lineStart: this.getLineNumber(doc.content, fragment.start), lineEnd: this.getLineNumber(doc.content, fragment.end), metadata: { clusterSize: cluster.terms.size, proximity: cluster.end - cluster.start } }); }); } return fragments.sort((a, b) => b.score - a.score); } private tokenizeWithPositions(content: string): TokenPosition[] { const tokens: TokenPosition[] = []; const regex = /\b\w+\b/g; let match; while ((match = regex.exec(content)) !== null) { const token = match[0].toLowerCase(); if (token.length > 2) { tokens.push({ token, start: match.index, end: match.index + match[0].length }); } } return tokens; } private tokenize(text: string): string[] { return text.toLowerCase() .match(/\b\w+\b/g) ?.filter(t => t.length > 2) || []; } private findDocsWithAllTerms(queryTokens: string[]): Set<string> { if (queryTokens.length === 0) return new Set(); // Start with documents containing the first term const firstTermDocs = new Set<string>(); const firstTermPositions = this.positionIndex.get(queryTokens[0]); if (firstTermPositions) { firstTermPositions.forEach(pos => firstTermDocs.add(pos.docId)); } // Intersect with documents containing other terms const candidates = new Set(firstTermDocs); for (let i = 1; i < queryTokens.length; i++) { const termDocs = new Set<string>(); const termPositions = this.positionIndex.get(queryTokens[i]); if (termPositions) { termPositions.forEach(pos => termDocs.add(pos.docId)); } // Keep only documents that have all terms so far candidates.forEach(doc => { if (!termDocs.has(doc)) candidates.delete(doc); }); } return candidates; } private getTermPositionsInDoc(terms: string[], docId: string): Map<string, number[]> { const positions = new Map<string, number[]>(); terms.forEach(term => { const termPositions = this.positionIndex.get(term); if (termPositions) { const docPositions = termPositions .filter(p => p.docId === docId) .map(p => p.start); if (docPositions.length > 0) { positions.set(term, docPositions); } } }); return positions; } private findProximityClusters( termPositions: Map<string, number[]>, maxDistance: number ): PositionCluster[] { // Find groups of terms that appear near each other const allPositions: Array<{pos: number, term: string}> = []; termPositions.forEach((positions, term) => { positions.forEach(pos => allPositions.push({ pos, term })); }); allPositions.sort((a, b) => a.pos - b.pos); const clusters: PositionCluster[] = []; let currentCluster: PositionCluster | null = null; for (const { pos, term } of allPositions) { if (!currentCluster || pos - currentCluster.end > maxDistance) { // Start new cluster currentCluster = { start: pos, end: pos, terms: new Set([term]), positions: [pos] }; clusters.push(currentCluster); } else { // Extend current cluster currentCluster.end = pos; currentCluster.terms.add(term); currentCluster.positions.push(pos); } } return clusters.filter(c => c.terms.size >= 2); // At least 2 query terms } private scoreCluster(cluster: PositionCluster, totalQueryTerms: number): number { // Score based on term coverage and proximity const coverage = cluster.terms.size / totalQueryTerms; const density = cluster.terms.size / (cluster.end - cluster.start + 1); const proximityScore = 1 / (1 + Math.log(cluster.end - cluster.start + 1)); return coverage * 0.5 + density * 0.3 + proximityScore * 0.2; } private extractFragmentAroundCluster( content: string, cluster: PositionCluster, contextSize: number ): { text: string; start: number; end: number } { // Find sentence boundaries around the cluster const start = Math.max(0, cluster.start - contextSize / 2); const end = Math.min(content.length, cluster.end + contextSize / 2); // Extend to sentence boundaries let fragmentStart = start; let fragmentEnd = end; // Look backward for sentence start for (let i = start; i >= 0; i--) { if (i === 0 || (i > 0 && '.!?'.includes(content[i - 1]))) { fragmentStart = i; break; } } // Look forward for sentence end for (let i = end; i < content.length; i++) { if ('.!?'.includes(content[i])) { fragmentEnd = i + 1; break; } } return { text: content.substring(fragmentStart, fragmentEnd).trim(), start: fragmentStart, end: fragmentEnd }; } private getLineNumber(content: string, position: number): number { const lines = content.substring(0, position).split('\n'); return lines.length; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aaronsb/obsidian-semantic-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server