folder-mcp

Overview Schema Related Servers Score Discussions

key-phrase-selection.ts•5.46 KiB

/** * Key Phrase Selection Utilities * Phase 10 Sprint 1: Diverse key phrase selection for folder semantic previews */ import type { KeyPhrase } from '../rest/types.js'; /** * Aggregated phrase statistics */ interface PhraseStats { totalScore: number; count: number; maxScore: number; } /** * Scored phrase for selection */ interface ScoredPhrase { text: string; combinedScore: number; frequency: number; avgScore: number; } /** * Select diverse and representative key phrases from all documents in a folder * * Algorithm: "Diverse Top-K Selection" * 1. Aggregate all key phrases from all documents * 2. Score by combining: max relevance score × (1 + log(frequency) × 0.2) * 3. Diversify by skipping phrases with overlapping significant words * 4. Select 10-15 phrases that best represent the folder's content variety * * @param allDocumentKeyPhrases Array of key phrase arrays from each document * @param targetCount Target number of phrases to select (default: 15) * @returns Array of diverse, representative key phrases */ export function selectRepresentativeKeyPhrases( allDocumentKeyPhrases: KeyPhrase[][], targetCount: number = 15 ): KeyPhrase[] { // Step 1: Aggregate all key phrases from all documents const phraseMap = new Map<string, PhraseStats>(); for (const docPhrases of allDocumentKeyPhrases) { if (!docPhrases || !Array.isArray(docPhrases)) continue; for (const phrase of docPhrases) { if (!phrase || !phrase.text || typeof phrase.score !== 'number') continue; const normalizedText = phrase.text.toLowerCase().trim(); if (!normalizedText) continue; const existing = phraseMap.get(normalizedText) || { totalScore: 0, count: 0, maxScore: 0 }; phraseMap.set(normalizedText, { totalScore: existing.totalScore + phrase.score, count: existing.count + 1, maxScore: Math.max(existing.maxScore, phrase.score) }); } } // If no phrases found, return empty array if (phraseMap.size === 0) { return []; } // Step 2: Calculate combined score (frequency * relevance) const scoredPhrases: ScoredPhrase[] = Array.from(phraseMap.entries()).map(([text, stats]) => ({ text, // Combine: max score (quality) + log(count) (frequency boost for common phrases) combinedScore: stats.maxScore * (1 + Math.log(stats.count) * 0.2), frequency: stats.count, avgScore: stats.totalScore / stats.count })); // Step 3: Select diverse phrases const selected: KeyPhrase[] = []; const usedTerms = new Set<string>(); // Sort by combined score (highest first) scoredPhrases.sort((a, b) => b.combinedScore - a.combinedScore); for (const phrase of scoredPhrases) { // Skip very short phrases unless they're highly scored if (phrase.text.length < 3 && phrase.avgScore < 0.8) continue; // Split into significant words (ignore short words) const words = phrase.text .split(/\s+/) .filter(word => word.length > 3) .map(word => word.toLowerCase()); // Check for overlap with already selected phrases const hasSignificantOverlap = words.length > 0 && words.filter(word => usedTerms.has(word)).length > words.length / 2; // Take high-scoring phrases, or lower ones if they're diverse enough if (selected.length < 5 || !hasSignificantOverlap) { selected.push({ text: phrase.text, score: Math.round(phrase.avgScore * 1000) / 1000 // Round to 3 decimal places }); // Mark significant words as used words.forEach(word => { if (word.length > 4) { // Only track longer words to avoid over-filtering usedTerms.add(word); } }); } if (selected.length >= targetCount) break; } return selected; } /** * Calculate complexity indicator based on average readability score * * Flesch Reading Ease Score interpretation: * - 90-100: Very Easy * - 80-89: Easy * - 70-79: Fairly Easy * - 60-69: Standard * - 50-59: Fairly Difficult * - 30-49: Difficult (Technical) * - 0-29: Very Difficult (Highly Technical) * * @param avgReadability Average readability score * @returns Complexity indicator: 'simple', 'moderate', or 'technical' */ export function calculateComplexityIndicator(avgReadability: number | null | undefined): 'simple' | 'moderate' | 'technical' { if (avgReadability === null || avgReadability === undefined) { return 'moderate'; // Default when no data available } if (avgReadability >= 60) { return 'simple'; } else if (avgReadability >= 40) { return 'moderate'; } else { return 'technical'; } } /** * Get relative path from folder root * @param filePath Full file path * @param folderPath Folder root path * @returns Relative path from folder root */ export function getRelativePath(filePath: string, folderPath: string): string { // Normalize paths to use forward slashes const normalizedFilePath = filePath.replace(/\\/g, '/'); const normalizedFolderPath = folderPath.replace(/\\/g, '/'); // Remove trailing slash from folder path const folderBase = normalizedFolderPath.endsWith('/') ? normalizedFolderPath.slice(0, -1) : normalizedFolderPath; // If file path starts with folder path, remove it if (normalizedFilePath.startsWith(folderBase)) { const relative = normalizedFilePath.slice(folderBase.length); // Remove leading slash if present return relative.startsWith('/') ? relative.slice(1) : relative; } // If not a child of folder, return the full path return filePath; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

key-phrase-selection.ts•5.46 KiB