folder-mcp

Overview Schema Related Servers Score Discussions

processing.ts•6.56 KiB

/** * Content Processing Utilities * * Minimal utilities for content transformation. The main semantic extraction * is now handled by SemanticExtractionService (Sprint 0 implementation). */ import { ParsedContent } from '../../types/index.js'; import { extractKeyPhraseCandidates } from '../semantic/algorithms/ngram-utils.js'; /** * Content transformation options */ export interface ContentTransformationOptions { normalizeWhitespace?: boolean; removeEmptyLines?: boolean; preserveFormatting?: boolean; maxLineLength?: number; } /** * Content enhancement metadata */ export interface ContentEnhancement { keyPhrases: string[]; summary?: string; readabilityScore?: number; } /** * Enhanced content with processing metadata */ export interface EnhancedContent extends ParsedContent { enhancement: ContentEnhancement; transformationApplied: ContentTransformationOptions; processingTimestamp: Date; } /** * Transform and normalize content * @deprecated Use SemanticExtractionService for new extraction workflows */ export function transformContent(content: ParsedContent, options: ContentTransformationOptions = {}): ParsedContent { const defaultOptions: ContentTransformationOptions = { normalizeWhitespace: true, removeEmptyLines: true, preserveFormatting: false }; const opts = { ...defaultOptions, ...options }; let transformedContent = content.content; if (opts.normalizeWhitespace) { // Normalize whitespace while preserving intentional formatting transformedContent = transformedContent.replace(/[ \t]+/g, ' '); } if (opts.removeEmptyLines) { // Remove excessive empty lines but preserve paragraph breaks transformedContent = transformedContent.replace(/\n\s*\n\s*\n+/g, '\n\n'); } if (opts.maxLineLength && !opts.preserveFormatting) { // Wrap long lines while preserving paragraph structure const paragraphs = transformedContent.split(/\n\s*\n/); const wrappedParagraphs = paragraphs.map(paragraph => wrapText(paragraph, opts.maxLineLength!) ); transformedContent = wrappedParagraphs.join('\n\n'); } return { ...content, content: transformedContent.trim() }; } /** * Enhance content with extracted metadata * @deprecated Use SemanticExtractionService for new extraction workflows */ export function enhanceContent(content: ParsedContent): EnhancedContent { const keyPhrases = extractKeyPhraseCandidates(content.content, 10); const summary = generateSummary(content.content); const readabilityScore = calculateReadabilityScore(content.content); return { ...content, enhancement: { keyPhrases, summary, readabilityScore }, transformationApplied: {}, processingTimestamp: new Date() }; } /** * Generate a simple extractive summary */ export function generateSummary(text: string, maxLength: number = 200): string { const sentences = extractSentences(text); if (sentences.length <= 2) { return text.substring(0, maxLength); } // Simple scoring based on sentence position and length const scoredSentences = sentences.map((sentence, index) => ({ sentence, score: scoreSentence(sentence, index, sentences.length) })); // Sort by score and take top sentences scoredSentences.sort((a, b) => b.score - a.score); let summary = ''; let currentLength = 0; for (const { sentence } of scoredSentences) { if (currentLength + sentence.length > maxLength) break; if (summary) summary += ' '; summary += sentence; currentLength += sentence.length; } return summary || (sentences[0] ? sentences[0].substring(0, maxLength) : ''); } /** * Calculate readability score (Flesch Reading Ease approximation) */ export function calculateReadabilityScore(text: string): number { // Input validation - return 0 for invalid or empty input if (!text || typeof text !== 'string') { return 0; } const sentences = extractSentences(text); const words = text.split(/\s+/).filter(w => w.length > 0); const syllables = words.reduce((total, word) => total + countSyllables(word), 0); if (sentences.length === 0 || words.length === 0) return 0; const avgWordsPerSentence = words.length / sentences.length; const avgSyllablesPerWord = syllables / words.length; // Flesch Reading Ease formula const score = 206.835 - (1.015 * avgWordsPerSentence) - (84.6 * avgSyllablesPerWord); // Clamp score between 0 and 100 return Math.max(0, Math.min(100, Math.round(score))); } /** * Helper functions */ function wrapText(text: string, maxLength: number): string { const words = text.split(' '); const lines: string[] = []; let currentLine = ''; for (const word of words) { if (currentLine.length + word.length + 1 <= maxLength) { currentLine += (currentLine ? ' ' : '') + word; } else { if (currentLine) lines.push(currentLine); currentLine = word; } } if (currentLine) lines.push(currentLine); return lines.join('\n'); } function extractSentences(text: string): string[] { return text.split(/[.!?]+/) .map(s => s.trim()) .filter(s => s.length > 0); } function scoreSentence(sentence: string, index: number, totalSentences: number): number { let score = 0; // Position bonus (first and early sentences are often important) if (index === 0) score += 3; else if (index < totalSentences * 0.2) score += 2; // Length bonus (medium-length sentences often contain key information) const words = sentence.split(/\s+/).length; if (words >= 10 && words <= 25) score += 2; // Keyword bonus (sentences with common important words) const importantWords = ['important', 'key', 'main', 'primary', 'significant', 'note', 'conclusion']; const lowerSentence = sentence.toLowerCase(); importantWords.forEach(word => { if (lowerSentence.includes(word)) score += 1; }); return score; } function countSyllables(word: string): number { word = word.toLowerCase(); if (word.length <= 3) return 1; const vowels = 'aeiouy'; let syllableCount = 0; let previousWasVowel = false; for (let i = 0; i < word.length; i++) { const currentChar = word[i]; if (currentChar) { const isVowel = vowels.includes(currentChar); if (isVowel && !previousWasVowel) { syllableCount++; } previousWasVowel = isVowel; } } // Adjust for silent 'e' if (word.endsWith('e')) syllableCount--; return Math.max(1, syllableCount); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

processing.ts•6.56 KiB