MCP Appium

Official

Overview Schema Related Servers Score Discussions

appium-mcp
src
tools
documentation

simple-pdf-indexer.ts•15.3 KiB

/** * Simple Markdown Indexer * * A simplified implementation for indexing Markdown documents into an in-memory vector store * using LangChain's RecursiveCharacterTextSplitter and MemoryVectorStore. * The vector store is persisted to a file for use across different script executions. */ import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; import { Document } from 'langchain/document'; import { MemoryVectorStore } from 'langchain/vectorstores/memory'; import * as fs from 'fs'; import * as path from 'path'; import { promisify } from 'util'; import { fileURLToPath } from 'url'; // Initialize embeddings using sentence-transformers (no API key required) import { SentenceTransformersEmbeddings } from './sentence-transformers-embeddings.js'; import log from '../../logger.js'; let embeddings: SentenceTransformersEmbeddings | null = null; /** * Initialize embeddings lazily when needed * Uses sentence-transformers exclusively (no API key required) */ function getEmbeddings(): SentenceTransformersEmbeddings { if (embeddings) { return embeddings; } try { // Use local sentence-transformers (no API key required) log.info('Using local sentence-transformers embeddings'); const modelName = process.env.SENTENCE_TRANSFORMERS_MODEL || 'Xenova/all-MiniLM-L6-v2'; embeddings = new SentenceTransformersEmbeddings({ modelName }); log.info(`Using sentence-transformers model: ${modelName}`); } catch (error) { throw new Error( `Failed to initialize embeddings: ${ error instanceof Error ? error.message : String(error) }` ); } return embeddings; } // Path to store the documents const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const DOCUMENTS_PATH = path.join(__dirname, './uploads/documents.json'); // Global variable to store the in-memory vector store let memoryVectorStore: MemoryVectorStore | null = null; /** * Save the documents to a file * @param documents The documents to save * @param append Whether to append to existing documents or overwrite */ async function saveDocuments( documents: Document[], append: boolean = false ): Promise<void> { try { // Create directory if it doesn't exist const dir = path.dirname(DOCUMENTS_PATH); if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } // Serialize the new documents const serializedNew = documents.map(doc => ({ pageContent: doc.pageContent, metadata: doc.metadata, })); let allSerialized = serializedNew; // If appending and file exists, read existing documents and combine if (append && fs.existsSync(DOCUMENTS_PATH)) { try { const existingContent = fs.readFileSync(DOCUMENTS_PATH, 'utf-8'); if (existingContent) { const existingSerialized = JSON.parse(existingContent); allSerialized = [...existingSerialized, ...serializedNew]; log.info( `Appending ${serializedNew.length} documents to existing ${existingSerialized.length} documents` ); } } catch (readError) { log.warn( 'Error reading existing documents, overwriting instead:', readError ); } } // Write to file fs.writeFileSync(DOCUMENTS_PATH, JSON.stringify(allSerialized)); log.info( `${ append ? 'Appended to' : 'Saved' } documents in ${DOCUMENTS_PATH} (total: ${allSerialized.length})` ); } catch (error) { log.error('Error saving documents:', error); throw error; } } /** * Clear the documents file */ async function clearDocumentsFile(): Promise<void> { try { if (fs.existsSync(DOCUMENTS_PATH)) { fs.writeFileSync(DOCUMENTS_PATH, JSON.stringify([])); log.info(`Cleared documents file at ${DOCUMENTS_PATH}`); } } catch (error) { log.error('Error clearing documents file:', error); throw error; } } /** * Load the documents from a file * @returns The loaded documents or null if the file doesn't exist */ async function loadDocuments(): Promise<Document[] | null> { try { if (!fs.existsSync(DOCUMENTS_PATH)) { log.info('No saved documents found'); return null; } // Read from file const serialized = JSON.parse(fs.readFileSync(DOCUMENTS_PATH, 'utf-8')); // Convert to Document objects const documents = serialized.map( (doc: any) => new Document({ pageContent: doc.pageContent, metadata: doc.metadata, }) ); log.info(`${documents.length} documents loaded from ${DOCUMENTS_PATH}`); return documents; } catch (error) { log.error('Error loading documents:', error); return null; } } /** * Extract text from a Markdown file * @param markdownPath Path to the Markdown file * @returns Extracted text as a string */ async function extractTextFromMarkdown(markdownPath: string): Promise<string> { try { const text = fs.readFileSync(markdownPath, 'utf-8'); return text; } catch (error) { log.error('Error extracting text from Markdown:', error); throw new Error( `Failed to extract text from Markdown: ${ error instanceof Error ? error.message : String(error) }` ); } } /** * Initialize the vector store with Markdown content * @param markdownPath Path to the Markdown file * @param chunkSize Size of each chunk in characters * @param chunkOverlap Number of characters to overlap between chunks * @returns The initialized MemoryVectorStore */ export async function initializeVectorStore( markdownPath: string, chunkSize: number = 1000, chunkOverlap: number = 200 ): Promise<MemoryVectorStore> { try { log.info(`Initializing vector store for Markdown: ${markdownPath}`); log.info(`Using chunk size: ${chunkSize}, overlap: ${chunkOverlap}`); // Extract text from Markdown log.info('Extracting text from Markdown...'); const markdownText = await extractTextFromMarkdown(markdownPath); log.info(`Extracted ${markdownText.length} characters from Markdown`); // Create text splitter const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap, }); // Split text into documents log.info('Splitting text into chunks...'); const documents = await textSplitter.createDocuments([markdownText]); log.info(`Created ${documents.length} document chunks`); // Store documents in memory vector store log.info('Storing documents in memory vector store...'); const vectorStore = await MemoryVectorStore.fromDocuments( documents, getEmbeddings() ); // Save the vector store in the global variable for later use memoryVectorStore = vectorStore; // Save documents to file for persistence await saveDocuments(documents, false); // Don't append for single file indexing log.info('Successfully stored documents in memory vector store'); return vectorStore; } catch (error) { log.error('Error initializing vector store:', error); throw error; } } /** * Get all Markdown files in a directory recursively * @param dirPath Path to the directory * @returns Array of Markdown file paths */ export async function getMarkdownFilesInDirectory( dirPath: string ): Promise<string[]> { try { const readdir = promisify(fs.readdir); const stat = promisify(fs.stat); // Check if directory exists if (!fs.existsSync(dirPath)) { log.error(`Directory does not exist: ${dirPath}`); return []; } const markdownFiles: string[] = []; async function scanDirectory(currentPath: string): Promise<void> { const files = await readdir(currentPath); for (const file of files) { const filePath = path.join(currentPath, file); const stats = await stat(filePath); if (stats.isDirectory()) { // Recursively scan subdirectories await scanDirectory(filePath); } else if ( stats.isFile() && path.extname(file).toLowerCase() === '.md' ) { markdownFiles.push(filePath); } } } await scanDirectory(dirPath); log.info(`Found ${markdownFiles.length} Markdown files in ${dirPath}`); return markdownFiles; } catch (error) { log.error('Error getting Markdown files:', error); return []; } } /** * Index a Markdown file into the memory vector store * @param markdownPath Path to the Markdown file * @param chunkSize Size of each chunk in characters * @param chunkOverlap Number of characters to overlap between chunks */ export async function indexMarkdown( markdownPath: string, chunkSize: number = 1000, chunkOverlap: number = 200 ): Promise<void> { try { log.info('Starting Markdown indexing process...'); // Initialize vector store await initializeVectorStore(markdownPath, chunkSize, chunkOverlap); log.info('Markdown indexing completed successfully'); } catch (error) { log.error('Markdown indexing failed:', error); throw error; } } /** * Index all Markdown files in a directory * @param dirPath Path to the directory containing Markdown files * @param chunkSize Size of each chunk in characters * @param chunkOverlap Number of characters to overlap between chunks * @returns Array of indexed Markdown file paths */ export async function indexAllMarkdownFiles( dirPath: string, chunkSize: number = 1000, chunkOverlap: number = 200 ): Promise<string[]> { try { log.info( `Starting indexing of all Markdown files in directory: ${dirPath}` ); // Get all Markdown files in the directory const markdownFiles = await getMarkdownFilesInDirectory(dirPath); if (markdownFiles.length === 0) { log.info('No Markdown files found in the directory'); return []; } // Clear the documents file before starting await clearDocumentsFile(); // Index each Markdown file const indexedFiles: string[] = []; for (let i = 0; i < markdownFiles.length; i++) { const markdownFile = markdownFiles[i]; try { log.info( `Indexing Markdown ${i + 1}/${markdownFiles.length}: ${markdownFile}` ); // Extract text from Markdown log.info('Extracting text from Markdown...'); const markdownText = await extractTextFromMarkdown(markdownFile); log.info(`Extracted ${markdownText.length} characters from Markdown`); // Create text splitter const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap, }); // Split text into documents log.info('Splitting text into chunks...'); const documents = await textSplitter.createDocuments([markdownText]); log.info(`Created ${documents.length} document chunks`); // Add file metadata to each document const filename = path.basename(markdownFile); const relativePath = path.relative(dirPath, markdownFile); documents.forEach(doc => { doc.metadata = { ...doc.metadata, source: markdownFile, filename: filename, relativePath: relativePath, }; }); // Store documents in memory vector store log.info('Storing documents in memory vector store...'); if (i === 0) { // For the first Markdown file, create a new vector store memoryVectorStore = await MemoryVectorStore.fromDocuments( documents, getEmbeddings() ); } else { // For subsequent Markdown files, add to the existing vector store await memoryVectorStore?.addDocuments(documents); } // Save documents to file (append for all Markdown files except the first one) await saveDocuments(documents, i > 0); indexedFiles.push(markdownFile); log.info(`Successfully indexed Markdown: ${filename}`); } catch (error) { log.error(`Error indexing Markdown ${markdownFile}:`, error); // Continue with next file even if one fails } } log.info( `Successfully indexed ${indexedFiles.length} out of ${markdownFiles.length} Markdown files` ); return indexedFiles; } catch (error) { log.error('Error indexing all Markdown files:', error); throw error; } } /** * Query the vector store for similar documents * @param query The query text * @param topK Number of results to return * @returns Array of documents with their content and metadata */ export async function queryVectorStore( query: string, topK: number = 25 ): Promise<Document[]> { try { // Check if the vector store has been initialized in memory if (!memoryVectorStore) { // Try to load documents from file const documents = await loadDocuments(); // If documents exist, create a new vector store if (documents && documents.length > 0) { log.info('Creating vector store from saved documents...'); memoryVectorStore = await MemoryVectorStore.fromDocuments( documents, getEmbeddings() ); } else { throw new Error( 'Vector store has not been initialized. Please index a PDF first.' ); } } // Query the vector store const results = await memoryVectorStore.similaritySearch(query, topK); return results; } catch (error) { log.error('Error querying vector store:', error); throw error; } } // This allows the script to be run directly from the command line if (import.meta.url === `file://${process.argv[1]}`) { // Parse command line arguments const args = process.argv.slice(2); let markdownPath: string; let chunkSize = 1000; // Default chunk size let chunkOverlap = 200; // Default overlap let indexSingleFile = false; // Get Markdown path or directory path if (args.length > 0 && args[0]) { // Use provided path markdownPath = path.resolve(process.cwd(), args[0]); // Check if the provided path is a file or directory if (fs.existsSync(markdownPath) && fs.statSync(markdownPath).isFile()) { indexSingleFile = true; } } else { // Use default path to resources directory const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); markdownPath = path.resolve(__dirname, '../../resources'); } // Get chunk size if provided if (args.length > 1 && !isNaN(Number(args[1]))) { chunkSize = Number(args[1]); } // Get overlap if provided if (args.length > 2 && !isNaN(Number(args[2]))) { chunkOverlap = Number(args[2]); } // Log embeddings provider that will be used log.info('Using sentence-transformers embeddings (no API key required)'); // Run the indexing process if (indexSingleFile) { // Index a single Markdown file log.info(`Indexing single Markdown file: ${markdownPath}`); indexMarkdown(markdownPath, chunkSize, chunkOverlap) .then(() => { process.exit(0); }) .catch(error => { log.error('Indexing failed:', error); process.exit(1); }); } else { // Index all Markdown files in the directory log.info(`Indexing all Markdown files in directory: ${markdownPath}`); indexAllMarkdownFiles(markdownPath, chunkSize, chunkOverlap) .then(indexedFiles => { log.info(`Successfully indexed ${indexedFiles.length} Markdown files`); process.exit(0); }) .catch(error => { log.error('Indexing failed:', error); process.exit(1); }); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/appium/appium-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

simple-pdf-indexer.ts•15.3 KiB