folder-mcp

Overview Schema Related Servers Score Discussions

content.test.ts•8.65 KiB

/** * Domain Layer - Content Module Tests * * Unit tests for the content domain module using REAL business documents */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import path from 'path'; import fs from 'fs/promises'; import { setupTestEnvironment, cleanupTestEnvironment, type TestEnvironment } from '../../helpers/setup.js'; import { ContentProcessor, ChunkingOptions, ContentMetadata, ProcessedContent } from '../../../src/domain/content/index.js'; import type { TextChunk, ParsedContent, ChunkedContent } from '../../../src/types/index.js'; describe('Domain Layer - Content Module', () => { let contentProcessor: ContentProcessor; let testEnv: TestEnvironment; let testKnowledgeBasePath: string; beforeEach(async () => { contentProcessor = new ContentProcessor(); testEnv = await setupTestEnvironment(); testKnowledgeBasePath = testEnv.folderPath; }); afterEach(async () => { await cleanupTestEnvironment(testEnv); }); describe('Text Chunking', () => { it('should create valid text chunks', () => { const parsedContent: ParsedContent = { content: 'This is a test document with some content for chunking.', type: 'txt', originalPath: 'test.txt' }; const options: ChunkingOptions = { maxTokens: 20, overlapPercent: 0.25 }; const result = contentProcessor.chunkText(parsedContent, options); expect(result).toHaveProperty('chunks'); expect(result).toHaveProperty('totalChunks'); expect(result.chunks).toBeInstanceOf(Array); expect(result.totalChunks).toBeGreaterThan(0); result.chunks.forEach((chunk, index) => { expect(chunk).toHaveProperty('content'); expect(chunk).toHaveProperty('startPosition'); expect(chunk).toHaveProperty('endPosition'); expect(chunk).toHaveProperty('tokenCount'); expect(chunk).toHaveProperty('chunkIndex'); expect(chunk).toHaveProperty('metadata'); expect(chunk.chunkIndex).toBe(index); expect(chunk.content.length).toBeGreaterThan(0); }); }); it('should chunk real business document content', async () => { // Test chunking with REAL business document content const realDocPath = path.join(testKnowledgeBasePath, 'Marketing', 'competitive_analysis.md'); const realContent = await fs.readFile(realDocPath, 'utf-8'); const parsedContent: ParsedContent = { content: realContent, type: 'md', originalPath: realDocPath }; const options: ChunkingOptions = { maxTokens: 100, // Reasonable size for real content overlapPercent: 0.25 }; const result = contentProcessor.chunkText(parsedContent, options); // Test that chunking works with real business content expect(result).toHaveProperty('chunks'); expect(result).toHaveProperty('totalChunks'); expect(result.chunks).toBeInstanceOf(Array); expect(result.totalChunks).toBeGreaterThan(0); // Real content should produce meaningful chunks result.chunks.forEach((chunk, index) => { expect(chunk).toHaveProperty('content'); expect(chunk).toHaveProperty('startPosition'); expect(chunk).toHaveProperty('endPosition'); expect(chunk).toHaveProperty('tokenCount'); expect(chunk).toHaveProperty('chunkIndex'); expect(chunk).toHaveProperty('metadata'); expect(chunk.chunkIndex).toBe(index); expect(chunk.content.length).toBeGreaterThan(0); // Real content chunks should contain business terms expect(typeof chunk.content).toBe('string'); }); }); it('should handle real empty file', async () => { // Test with REAL empty file from test knowledge base const emptyFilePath = path.join(testKnowledgeBasePath, 'test-edge-cases', 'empty.txt'); const emptyContent = await fs.readFile(emptyFilePath, 'utf-8'); const parsedContent: ParsedContent = { content: emptyContent, type: 'txt', originalPath: emptyFilePath }; const options: ChunkingOptions = { maxTokens: 100, overlapPercent: 0.1 }; const result = contentProcessor.chunkText(parsedContent, options); expect(result.totalChunks).toBe(0); expect(result.chunks).toHaveLength(0); }); it('should respect chunking options', () => { const parsedContent: ParsedContent = { content: 'This is a much longer text that should definitely be chunked into multiple pieces based on the token limits we set.\n\n' + 'It contains multiple sentences and paragraphs.\n\n' + 'Each sentence has enough words to trigger chunking when we set a low token limit.\n\n' + 'This ensures that our chunking algorithm is working correctly with the specified parameters.\n\n' + 'Additional content to make sure we exceed the token limits and create multiple chunks.', type: 'txt', originalPath: 'test.txt' }; const options: ChunkingOptions = { maxTokens: 25, minTokens: 10, overlapPercent: 0.2 }; const result = contentProcessor.chunkText(parsedContent, options); // The algorithm prioritizes sentence boundaries, so it may create fewer chunks // but each chunk should respect the general token guidelines expect(result.totalChunks).toBeGreaterThan(0); result.chunks.forEach(chunk => { expect(chunk.tokenCount).toBeGreaterThan(0); expect(chunk.content.length).toBeGreaterThan(0); expect(chunk.chunkIndex).toBeGreaterThanOrEqual(0); }); }); it('should handle empty content', () => { const parsedContent: ParsedContent = { content: '', type: 'txt', originalPath: 'empty.txt' }; const options: ChunkingOptions = { maxTokens: 100, overlapPercent: 0.1 }; const result = contentProcessor.chunkText(parsedContent, options); expect(result.totalChunks).toBe(0); expect(result.chunks).toHaveLength(0); }); }); describe('Content Processing', () => { it('should process content with metadata', () => { const parsedContent: ParsedContent = { content: 'Test document content with multiple words for processing.', type: 'txt', originalPath: 'test/document.txt' }; const processed = contentProcessor.processContent(parsedContent); expect(processed).toHaveProperty('processingMetadata'); expect(processed.processingMetadata).toHaveProperty('originalTokenCount'); expect(processed.processingMetadata).toHaveProperty('processingTime'); expect(processed.processingMetadata.contentType).toBe(parsedContent.type); expect(processed.content).toBe(parsedContent.content); }); it('should extract metadata correctly', () => { const parsedContent: ParsedContent = { content: 'This is a test document with multiple words.', type: 'txt', originalPath: 'test/doc.txt' }; const metadata = contentProcessor.extractMetadata(parsedContent); expect(metadata.wordCount).toBe(8); // "This is a test document with multiple words" expect(metadata.paragraphCount).toBeGreaterThan(0); expect(metadata.estimatedReadingTime).toBeGreaterThan(0); expect(metadata.contentType).toBe(parsedContent.type); }); }); describe('Token Estimation', () => { it('should estimate token count correctly', () => { const text = 'This is a sample text for token estimation testing.'; const tokenCount = contentProcessor.estimateTokenCount(text); expect(tokenCount).toBeGreaterThan(0); expect(typeof tokenCount).toBe('number'); }); it('should handle empty text', () => { const tokenCount = contentProcessor.estimateTokenCount(''); expect(tokenCount).toBe(0); }); }); describe('Sentence Boundaries', () => { it('should find sentence boundaries', () => { const text = 'First sentence. Second sentence! Third sentence?'; const boundaries = contentProcessor.findSentenceBoundaries(text); expect(boundaries).toBeInstanceOf(Array); expect(boundaries.length).toBeGreaterThan(1); expect(boundaries[0]).toBe(0); // Should start at 0 expect(boundaries[boundaries.length - 1]).toBe(text.length); // Should end at text length }); it('should handle text without sentence endings', () => { const text = 'No sentence endings here'; const boundaries = contentProcessor.findSentenceBoundaries(text); expect(boundaries).toEqual([0, text.length]); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

content.test.ts•8.65 KiB