db-mcp

text.ts•40.2 KiB

/** * SQLite Text Processing Tools * * String manipulation and pattern matching: * regex, split, concat, format, fuzzy match, phonetic, normalize, validate. * 13 tools total. */ import { z } from "zod"; import type { SqliteAdapter } from "../SqliteAdapter.js"; import type { ToolDefinition, RequestContext } from "../../../types/index.js"; import { readOnly, write } from "../../../utils/annotations.js"; import { validateWhereClause, sanitizeIdentifier, } from "../../../utils/index.js"; import { RegexMatchOutputSchema, TextSplitOutputSchema, RegexReplaceOutputSchema, } from "../output-schemas.js"; // Text tool schemas const RegexExtractSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to extract from"), pattern: z.string().describe("Regular expression pattern"), groupIndex: z.number().optional().default(0).describe("Capture group index"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const RegexMatchSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to match"), pattern: z.string().describe("Regular expression pattern"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const TextSplitSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to split"), delimiter: z.string().describe("Delimiter string"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const TextConcatSchema = z.object({ table: z.string().describe("Table name"), columns: z.array(z.string()).describe("Columns to concatenate"), separator: z .string() .optional() .default("") .describe("Separator between values"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const TextReplaceSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to update"), searchPattern: z.string().describe("Text to search for"), replaceWith: z.string().describe("Replacement text"), whereClause: z.string().describe("WHERE clause"), }); const TextTrimSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to trim"), mode: z.enum(["both", "left", "right"]).optional().default("both"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const TextCaseSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to transform"), mode: z.enum(["upper", "lower"]).describe("Case transformation"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const TextSubstringSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to extract from"), start: z.number().describe("Start position (1-indexed)"), length: z.number().optional().describe("Number of characters"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); // New text tool schemas const FuzzyMatchSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to search"), search: z.string().describe("Search string"), maxDistance: z .number() .optional() .default(3) .describe("Maximum Levenshtein distance"), tokenize: z .boolean() .optional() .default(true) .describe( "Split column values into words and match against tokens (default: true). Set false to match entire column value.", ), limit: z.number().optional().default(10), }); const PhoneticMatchSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to search"), search: z.string().describe("Search string"), algorithm: z.enum(["soundex", "metaphone"]).optional().default("soundex"), limit: z.number().optional().default(100), includeRowData: z .boolean() .optional() .default(true) .describe("Include full row data in results (default: true)"), }); const TextNormalizeSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to normalize"), mode: z .enum(["nfc", "nfd", "nfkc", "nfkd", "strip_accents"]) .describe("Normalization mode"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const TextValidateSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to validate"), pattern: z .enum(["email", "phone", "url", "uuid", "ipv4", "custom"]) .describe("Validation pattern"), customPattern: z .string() .optional() .describe("Custom regex (required if pattern=custom)"), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); const AdvancedSearchSchema = z.object({ table: z.string().describe("Table name"), column: z.string().describe("Column to search"), searchTerm: z.string().describe("Search term"), techniques: z .array(z.enum(["exact", "fuzzy", "phonetic"])) .optional() .default(["exact", "fuzzy", "phonetic"]) .describe("Search techniques to use"), fuzzyThreshold: z .number() .optional() .default(0.6) .describe( "Fuzzy match similarity threshold (0-1). Lower values are more lenient: 0.3-0.4 for loose matching (e.g., 'laptob' matches 'laptop'), 0.6-0.8 for strict matching.", ), whereClause: z.string().optional(), limit: z.number().optional().default(100), }); /** * Get all text processing tools */ export function getTextTools(adapter: SqliteAdapter): ToolDefinition[] { return [ createRegexExtractTool(adapter), createRegexMatchTool(adapter), createTextSplitTool(adapter), createTextConcatTool(adapter), createTextReplaceTool(adapter), createTextTrimTool(adapter), createTextCaseTool(adapter), createTextSubstringTool(adapter), createFuzzyMatchTool(adapter), createPhoneticMatchTool(adapter), createTextNormalizeTool(adapter), createTextValidateTool(adapter), createAdvancedSearchTool(adapter), ]; } /** * Extract text using regex pattern * Note: SQLite doesn't have native regex, we do this in JS */ function createRegexExtractTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_regex_extract", description: "Extract text matching a regex pattern. Processed in JavaScript after fetching data.", group: "text", inputSchema: RegexExtractSchema, outputSchema: RegexMatchOutputSchema, requiredScopes: ["read"], annotations: readOnly("Regex Extract"), handler: async (params: unknown, _context: RequestContext) => { const input = RegexExtractSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); let sql = `SELECT rowid as id, ${column} as value FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); // Apply regex in JavaScript const regex = new RegExp(input.pattern); const extracts = (result.rows ?? []) .map((row) => { const rawValue = row["value"]; const value = typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue ?? ""); const match = regex.exec(value); // Safely coerce rowid to number, defaulting to row index or 0 const rawRowid = row["id"]; const rowid = typeof rawRowid === "number" ? rawRowid : typeof rawRowid === "string" ? parseInt(rawRowid, 10) || 0 : 0; return { rowid, original: value, extracted: match ? (match[input.groupIndex] ?? match[0]) : null, }; }) .filter((r) => r.extracted !== null); return { success: true, rowCount: extracts.length, matches: extracts, }; }, }; } /** * Match rows using regex pattern */ function createRegexMatchTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_regex_match", description: "Find rows where column matches a regex pattern. Processed in JavaScript.", group: "text", inputSchema: RegexMatchSchema, outputSchema: RegexMatchOutputSchema, requiredScopes: ["read"], annotations: readOnly("Regex Match"), handler: async (params: unknown, _context: RequestContext) => { const input = RegexMatchSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); let sql = `SELECT rowid as id, ${column} as value FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); // Apply regex in JavaScript const regex = new RegExp(input.pattern); const matches = (result.rows ?? []) .filter((row) => { const rawValue = row["value"]; const value = typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue ?? ""); return regex.test(value); }) .map((row) => { // Ensure rowid is a number for output schema compliance const rawRowid = row["id"]; const rowid = typeof rawRowid === "number" ? rawRowid : typeof rawRowid === "string" ? parseInt(rawRowid, 10) || 0 : 0; return { ...row, rowid }; }); return { success: true, rowCount: matches.length, matches, }; }, }; } /** * Split text into array */ function createTextSplitTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_split", description: "Split a text column by delimiter into array results.", group: "text", inputSchema: TextSplitSchema, outputSchema: TextSplitOutputSchema, requiredScopes: ["read"], annotations: readOnly("Text Split"), handler: async (params: unknown, _context: RequestContext) => { const input = TextSplitSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); let sql = `SELECT rowid as id, ${column} as value FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); // Split in JavaScript - return per-row results for traceability const rows = (result.rows ?? []).map((row) => { const rawRowid = row["id"]; const rowid = typeof rawRowid === "number" ? rawRowid : typeof rawRowid === "string" ? parseInt(rawRowid, 10) || 0 : 0; const rawValue = row["value"]; const original = typeof rawValue === "string" ? rawValue : rawValue === null || rawValue === undefined ? "" : JSON.stringify(rawValue); const parts = original.split(input.delimiter); return { rowid, original: rawValue === null ? null : original, parts, }; }); return { success: true, rowCount: rows.length, rows, }; }, }; } /** * Concatenate columns */ function createTextConcatTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_concat", description: "Concatenate multiple columns with optional separator.", group: "text", inputSchema: TextConcatSchema, requiredScopes: ["read"], annotations: readOnly("Text Concat"), handler: async (params: unknown, _context: RequestContext) => { const input = TextConcatSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const quotedCols = input.columns.map((c) => sanitizeIdentifier(c)); // Build concatenation expression using || operator const sep = input.separator.replace(/'/g, "''"); // Build: COALESCE(col1, '') || 'sep' || COALESCE(col2, '') || ... const concatExpr = quotedCols .map((c) => `COALESCE(${c}, '')`) .join(` || '${sep}' || `); let sql = `SELECT ${concatExpr} as concatenated FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); return { success: true, rowCount: result.rows?.length ?? 0, values: result.rows?.map((r) => r["concatenated"]), }; }, }; } /** * Replace text in column */ function createTextReplaceTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_replace", description: "Replace text in a column using SQLite replace() function.", group: "text", inputSchema: TextReplaceSchema, outputSchema: RegexReplaceOutputSchema, requiredScopes: ["write"], annotations: write("Text Replace"), handler: async (params: unknown, _context: RequestContext) => { const input = TextReplaceSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); const search = input.searchPattern.replace(/'/g, "''"); const replace = input.replaceWith.replace(/'/g, "''"); validateWhereClause(input.whereClause); const sql = `UPDATE ${table} SET ${column} = replace(${column}, '${search}', '${replace}') WHERE ${input.whereClause}`; const result = await adapter.executeWriteQuery(sql); return { success: true, rowsAffected: result.rowsAffected, }; }, }; } /** * Trim whitespace */ function createTextTrimTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_trim", description: "Trim whitespace from text column values.", group: "text", inputSchema: TextTrimSchema, requiredScopes: ["read"], annotations: readOnly("Text Trim"), handler: async (params: unknown, _context: RequestContext) => { const input = TextTrimSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); let trimFunc: string; switch (input.mode) { case "left": trimFunc = "ltrim"; break; case "right": trimFunc = "rtrim"; break; default: trimFunc = "trim"; } let sql = `SELECT rowid, ${column} as original, ${trimFunc}(${column}) as trimmed FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); return { success: true, rowCount: result.rows?.length ?? 0, results: result.rows, }; }, }; } /** * Change text case */ function createTextCaseTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_case", description: "Convert text to uppercase or lowercase.", group: "text", inputSchema: TextCaseSchema, requiredScopes: ["read"], annotations: readOnly("Text Case"), handler: async (params: unknown, _context: RequestContext) => { const input = TextCaseSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); const caseFunc = input.mode === "upper" ? "upper" : "lower"; let sql = `SELECT rowid, ${column} as original, ${caseFunc}(${column}) as transformed FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); return { success: true, rowCount: result.rows?.length ?? 0, results: result.rows, }; }, }; } /** * Extract substring */ function createTextSubstringTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_substring", description: "Extract a substring from text column using substr().", group: "text", inputSchema: TextSubstringSchema, requiredScopes: ["read"], annotations: readOnly("Text Substring"), handler: async (params: unknown, _context: RequestContext) => { const input = TextSubstringSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); const substrExpr = input.length !== undefined ? `substr(${column}, ${input.start}, ${input.length})` : `substr(${column}, ${input.start})`; let sql = `SELECT rowid, ${column} as original, ${substrExpr} as substring FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); return { success: true, rowCount: result.rows?.length ?? 0, results: result.rows, }; }, }; } // ============================================================================= // New Text Tools: Fuzzy, Phonetic, Normalize, Validate // ============================================================================= /** * Calculate Levenshtein distance between two strings */ function levenshtein(a: string, b: string): number { const aLower = a.toLowerCase(); const bLower = b.toLowerCase(); // Handle edge cases if (aLower === bLower) return 0; if (aLower.length === 0) return bLower.length; if (bLower.length === 0) return aLower.length; // Create matrix with proper initialization const rows = bLower.length + 1; const cols = aLower.length + 1; const matrix: number[][] = []; for (let i = 0; i < rows; i++) { matrix.push(new Array<number>(cols).fill(0)); } // Initialize first column and row for (let i = 0; i < rows; i++) { const row = matrix[i]; if (row) row[0] = i; } for (let j = 0; j < cols; j++) { const firstRow = matrix[0]; if (firstRow) firstRow[j] = j; } // Fill in the rest of the matrix for (let i = 1; i < rows; i++) { for (let j = 1; j < cols; j++) { const currentRow = matrix[i]; const prevRow = matrix[i - 1]; if (!currentRow || !prevRow) continue; const cost = bLower[i - 1] === aLower[j - 1] ? 0 : 1; const del = (prevRow[j] ?? 0) + 1; const ins = (currentRow[j - 1] ?? 0) + 1; const sub = (prevRow[j - 1] ?? 0) + cost; currentRow[j] = Math.min(del, ins, sub); } } const lastRow = matrix[bLower.length]; return lastRow?.[aLower.length] ?? 0; } /** * Simple Metaphone implementation */ function metaphone(word: string): string { const vowels = "AEIOU"; let result = ""; const w = word.toUpperCase().replace(/[^A-Z]/g, ""); for (let i = 0; i < w.length && result.length < 4; i++) { const c = w.charAt(i); const prev = i > 0 ? w.charAt(i - 1) : ""; const next = i < w.length - 1 ? w.charAt(i + 1) : ""; // Skip duplicate adjacent letters if (c === prev && c !== "C") continue; // Skip vowels except at start if (vowels.includes(c) && i > 0) continue; // Consonant rules (simplified) if (c === "B" && i === w.length - 1 && prev === "M") continue; if (c === "C") { if (next === "H") { result += "X"; i++; } else if ("IEY".includes(next)) result += "S"; else result += "K"; } else if (c === "D") { const afterNext = w.charAt(i + 2); if (next === "G" && "IEY".includes(afterNext)) { result += "J"; i++; } else result += "T"; } else if (c === "G") { if (next === "H") continue; if ("IEY".includes(next)) result += "J"; else result += "K"; } else if (c === "K" && prev === "C") continue; else if (c === "P" && next === "H") { result += "F"; i++; } else if (c === "Q") result += "K"; else if (c === "S" && next === "H") { result += "X"; i++; } else if (c === "T" && next === "H") { result += "0"; i++; } else if (c === "W" && vowels.includes(next)) result += "W"; else if (c === "X") result += "KS"; else if (c === "Z") result += "S"; else if (!"HW".includes(c)) result += c; } return result; } /** * Strip accents from text */ function stripAccents(text: string): string { return text.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); } /** * Simple Soundex implementation for phonetic matching */ function soundex(word: string): string { if (!word) return "0000"; const upper = word.toUpperCase().replace(/[^A-Z]/g, ""); if (!upper) return "0000"; const first = upper[0] ?? "0"; const mapping: Record<string, string> = { B: "1", F: "1", P: "1", V: "1", C: "2", G: "2", J: "2", K: "2", Q: "2", S: "2", X: "2", Z: "2", D: "3", T: "3", L: "4", M: "5", N: "5", R: "6", }; let result = first; for (let i = 1; i < upper.length && result.length < 4; i++) { const char = upper[i]; if (char && mapping[char]) { const code = mapping[char]; if (code && !result.endsWith(code)) { result += code; } } } return (result + "000").slice(0, 4); } /** * Validation patterns */ const VALIDATION_PATTERNS: Record<string, RegExp> = { email: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/, phone: /^[+]?[(]?[0-9]{1,4}[)]?[-\s./0-9]{7,}$/, url: /^https?:\/\/[^\s/$.?#].[^\s]*$/i, uuid: /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i, ipv4: /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/, }; /** * Fuzzy match using Levenshtein distance */ function createFuzzyMatchTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_fuzzy_match", description: "Find fuzzy matches using Levenshtein distance. By default, splits values into tokens and matches against each word (use tokenize:false to match entire value). Use maxDistance 1-3 for similar-length strings.", group: "text", inputSchema: FuzzyMatchSchema, outputSchema: z.object({ success: z.boolean(), matchCount: z.number(), tokenized: z.boolean(), matches: z.array( z.object({ value: z.string(), matchedToken: z.string().optional(), tokenDistance: z.number().optional(), distance: z.number(), }), ), }), requiredScopes: ["read"], annotations: readOnly("Fuzzy Match"), handler: async (params: unknown, _context: RequestContext) => { const input = FuzzyMatchSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); const sql = `SELECT ${column} FROM ${table} WHERE ${column} IS NOT NULL LIMIT 1000`; const result = await adapter.executeReadQuery(sql); const matches: { value: string; matchedToken?: string; tokenDistance?: number; distance: number; }[] = []; for (const row of result.rows ?? []) { const rawValue = row[input.column]; const value = typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue ?? ""); if (input.tokenize) { // Token-based matching: split into words and find best match const tokens = value.split(/\s+/).filter((t) => t.length > 0); let bestToken = ""; let bestDistance = Infinity; for (const token of tokens) { const dist = levenshtein(input.search, token); if (dist < bestDistance) { bestDistance = dist; bestToken = token; } } if (bestDistance <= input.maxDistance) { matches.push({ value, matchedToken: bestToken, tokenDistance: bestDistance, distance: bestDistance, }); } } else { // Legacy behavior: match against entire column value const distance = levenshtein(input.search, value); if (distance <= input.maxDistance) { matches.push({ value, distance }); } } } // Sort by distance (ascending) and limit matches.sort((a, b) => a.distance - b.distance); const limited = matches.slice(0, input.limit); return { success: true, matchCount: limited.length, tokenized: input.tokenize, matches: limited, }; }, }; } /** * Phonetic match using Soundex or Metaphone */ function createPhoneticMatchTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_phonetic_match", description: "Find phonetically similar values using Soundex (SQLite native) or Metaphone algorithm.", group: "text", inputSchema: PhoneticMatchSchema, outputSchema: z.object({ success: z.boolean(), searchCode: z.string(), matchCount: z.number(), matches: z.array( z.object({ value: z.string(), phoneticCode: z.string(), row: z.record(z.string(), z.unknown()).optional(), }), ), }), requiredScopes: ["read"], annotations: readOnly("Phonetic Match"), handler: async (params: unknown, _context: RequestContext) => { const input = PhoneticMatchSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); const searchCode = input.algorithm === "metaphone" ? metaphone(input.search) : soundex(input.search); // Compute locally to ensure it's always available let sql: string; if (input.algorithm === "soundex") { // Try SQLite's native soundex function first, fall back to JS implementation sql = `SELECT *, soundex(${column}) as _phonetic FROM ${table} WHERE soundex(${column}) = soundex('${input.search.replace(/'/g, "''")}') LIMIT ${input.limit}`; try { const result = await adapter.executeReadQuery(sql); const matches = (result.rows ?? []).map((row) => { const rawValue = row[input.column]; const rawPhonetic = row["_phonetic"]; const match: { value: string; phoneticCode: string; row?: Record<string, unknown>; } = { value: typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue ?? ""), phoneticCode: typeof rawPhonetic === "string" ? rawPhonetic : "", }; if (input.includeRowData) { match.row = row; } return match; }); return { success: true, searchCode, // Use pre-computed local soundex code matchCount: matches.length, matches, }; } catch (error) { // If SQLite soundex() is unavailable (WASM mode), fall back to JS implementation if ( error instanceof Error && error.message.toLowerCase().includes("no such function: soundex") ) { // Fall through to JS-based soundex matching below sql = `SELECT * FROM ${table} WHERE ${column} IS NOT NULL LIMIT 1000`; const result = await adapter.executeReadQuery(sql); const matches: { value: string; phoneticCode: string; row?: Record<string, unknown>; }[] = []; for (const row of result.rows ?? []) { const rawValue = row[input.column]; const value = typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue ?? ""); const code = soundex(value); // Use JS-based soundex if (code === searchCode) { const match: { value: string; phoneticCode: string; row?: Record<string, unknown>; } = { value, phoneticCode: code }; if (input.includeRowData) { match.row = row; } matches.push(match); } } return { success: true, searchCode, matchCount: matches.length, matches: matches.slice(0, input.limit), }; } throw error; } } else { // Metaphone in JS sql = `SELECT * FROM ${table} WHERE ${column} IS NOT NULL LIMIT 1000`; const result = await adapter.executeReadQuery(sql); const matches: { value: string; phoneticCode: string; row?: Record<string, unknown>; }[] = []; for (const row of result.rows ?? []) { const rawValue = row[input.column]; const value = typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue ?? ""); const code = metaphone(value); if (code === searchCode) { const match: { value: string; phoneticCode: string; row?: Record<string, unknown>; } = { value, phoneticCode: code }; if (input.includeRowData) { match.row = row; } matches.push(match); } } return { success: true, searchCode, matchCount: matches.length, matches: matches.slice(0, input.limit), }; } }, }; } /** * Normalize text (Unicode normalization or accent stripping) */ function createTextNormalizeTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_normalize", description: "Normalize text using Unicode normalization (NFC, NFD, NFKC, NFKD) or strip accents.", group: "text", inputSchema: TextNormalizeSchema, outputSchema: z.object({ success: z.boolean(), rowCount: z.number(), rows: z.array( z.object({ original: z.string(), normalized: z.string(), }), ), }), requiredScopes: ["read"], annotations: readOnly("Text Normalize"), handler: async (params: unknown, _context: RequestContext) => { const input = TextNormalizeSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); let sql = `SELECT ${column} as original FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); const rows = (result.rows ?? []).map((row) => { const rawOriginal = row["original"]; const original = typeof rawOriginal === "string" ? rawOriginal : JSON.stringify(rawOriginal ?? ""); let normalized: string; if (input.mode === "strip_accents") { normalized = stripAccents(original); } else { normalized = original.normalize( input.mode.toUpperCase() as "NFC" | "NFD" | "NFKC" | "NFKD", ); } return { original, normalized }; }); return { success: true, rowCount: rows.length, rows, }; }, }; } /** * Validate text against common patterns */ function createTextValidateTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_text_validate", description: "Validate text values against patterns: email, phone, URL, UUID, IPv4, or custom regex.", group: "text", inputSchema: TextValidateSchema, outputSchema: z.object({ success: z.boolean(), totalRows: z.number(), validCount: z.number(), invalidCount: z.number(), invalidRows: z.array( z.object({ value: z.string().nullable(), rowid: z.number().optional(), }), ), }), requiredScopes: ["read"], annotations: readOnly("Text Validate"), handler: async (params: unknown, _context: RequestContext) => { const input = TextValidateSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); // Get validation pattern let pattern: RegExp; if (input.pattern === "custom") { if (!input.customPattern) { throw new Error("customPattern is required when pattern='custom'"); } // Normalize pattern: handle common JSON double-escaping issues // e.g., "\\." in JSON becomes "\." in JavaScript string, but some clients // may send "\\\\." which becomes "\\" - normalize excessive backslashes const normalizedPattern = input.customPattern.replace(/\\\\/g, "\\"); try { pattern = new RegExp(normalizedPattern); } catch { throw new Error( `Invalid regex pattern: ${input.customPattern} (normalized to: ${normalizedPattern})`, ); } } else { const foundPattern = VALIDATION_PATTERNS[input.pattern]; if (!foundPattern) { throw new Error(`Unknown pattern: ${input.pattern}`); } pattern = foundPattern; } let sql = `SELECT rowid, ${column} as value FROM ${table}`; if (input.whereClause) { validateWhereClause(input.whereClause); sql += ` WHERE ${input.whereClause}`; } sql += ` LIMIT ${input.limit}`; const result = await adapter.executeReadQuery(sql); const invalidRows: { value: string | null; rowid?: number }[] = []; let validCount = 0; for (const row of result.rows ?? []) { const rawValue = row["value"]; // Handle null/empty values with user-friendly display const value = rawValue === null || rawValue === undefined ? "" : typeof rawValue === "string" ? rawValue : JSON.stringify(rawValue); // Display actual value: null for null/undefined, otherwise the value (truncated if long) const displayValue = rawValue === null || rawValue === undefined ? null : value.length > 100 ? value.slice(0, 100) + "..." : value; if (pattern.test(value)) { validCount++; } else { const rowid = row["rowid"]; if (typeof rowid === "number") { invalidRows.push({ value: displayValue, rowid }); } else { invalidRows.push({ value: displayValue }); } } } return { success: true, totalRows: result.rows?.length ?? 0, validCount, invalidCount: invalidRows.length, invalidRows, }; }, }; } /** * Output schema for advanced search */ const AdvancedSearchOutputSchema = z.object({ success: z.boolean(), searchTerm: z.string(), techniques: z.array(z.string()), matchCount: z.number(), matches: z.array( z.object({ rowid: z.number(), text: z.string(), matchTypes: z.array(z.string()), bestScore: z.number(), bestType: z.string(), }), ), }); /** * Advanced search combining multiple text processing techniques */ function createAdvancedSearchTool(adapter: SqliteAdapter): ToolDefinition { return { name: "sqlite_advanced_search", description: "Advanced search combining exact, fuzzy (Levenshtein), and phonetic (Soundex) matching", group: "text", inputSchema: AdvancedSearchSchema, outputSchema: AdvancedSearchOutputSchema, requiredScopes: ["read"], annotations: readOnly("Advanced Search"), handler: async (params: unknown, _context: RequestContext) => { const input = AdvancedSearchSchema.parse(params); // Validate and quote identifiers const table = sanitizeIdentifier(input.table); const column = sanitizeIdentifier(input.column); // Fetch candidate rows let whereClause = ""; if (input.whereClause) { validateWhereClause(input.whereClause); whereClause = ` AND ${input.whereClause}`; } const query = `SELECT rowid as id, ${column} AS value FROM ${table} WHERE ${column} IS NOT NULL${whereClause} LIMIT 1000`; const result = await adapter.executeQuery(query); if (!result.rows || result.rows.length === 0) { return { success: true, searchTerm: input.searchTerm, techniques: input.techniques, matchCount: 0, matches: [], }; } const searchLower = input.searchTerm.toLowerCase(); const searchSoundex = soundex(input.searchTerm); interface Match { rowid: number; text: string; matchTypes: string[]; bestScore: number; bestType: string; } const allMatches: Match[] = []; for (const row of result.rows) { const rawValue: unknown = row["value"]; const text = typeof rawValue === "string" ? rawValue : typeof rawValue === "number" ? String(rawValue) : ""; const textLower = text.toLowerCase(); const matches: { type: string; score: number }[] = []; // Exact match (case-insensitive substring) if (input.techniques.includes("exact")) { if (textLower.includes(searchLower)) { matches.push({ type: "exact", score: 1.0 }); } } // Fuzzy match (Levenshtein ratio) if (input.techniques.includes("fuzzy")) { const distance = levenshtein(input.searchTerm, text); const maxLen = Math.max(input.searchTerm.length, text.length); const similarity = maxLen === 0 ? 1 : 1 - distance / maxLen; if (similarity >= input.fuzzyThreshold) { matches.push({ type: "fuzzy", score: similarity }); } } // Phonetic match (Soundex) if (input.techniques.includes("phonetic")) { const words = text.split(/\s+/); for (const word of words) { if (soundex(word) === searchSoundex) { matches.push({ type: "phonetic", score: 0.8 }); break; } } } if (matches.length > 0) { const best = matches.reduce((a, b) => (a.score > b.score ? a : b)); // Safely coerce rowid to number, defaulting to 0 if undefined/null const rawRowid = row["id"]; const rowid = typeof rawRowid === "number" ? rawRowid : typeof rawRowid === "string" ? parseInt(rawRowid, 10) || 0 : 0; allMatches.push({ rowid, text: text.length > 100 ? text.slice(0, 100) + "..." : text, matchTypes: matches.map((m) => m.type), bestScore: Math.round(best.score * 1000) / 1000, bestType: best.type, }); } } // Sort by score and limit allMatches.sort((a, b) => b.bestScore - a.bestScore); const limited = allMatches.slice(0, input.limit); return { success: true, searchTerm: input.searchTerm, techniques: input.techniques, matchCount: limited.length, matches: limited, }; }, }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/neverinfamous/db-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

text.ts•40.2 KiB