PostgreSQL MCP

advanced.ts•35.1 KiB

/** * PostgreSQL Statistics Tools - Advanced Statistics * * Advanced statistical analysis tools: time series, distribution, hypothesis testing, sampling. */ import type { PostgresAdapter } from '../../PostgresAdapter.js'; import type { ToolDefinition, RequestContext } from '../../../../types/index.js'; import { z } from 'zod'; import { readOnly } from '../../../../utils/annotations.js'; import { getToolIcons } from '../../../../utils/icons.js'; // ============================================================================= // Parameter Preprocessing // ============================================================================= /** * Valid interval units for time series analysis */ const VALID_INTERVALS = ['minute', 'hour', 'day', 'week', 'month', 'year'] as const; /** * Interval shorthand mappings */ const INTERVAL_SHORTHANDS: Record<string, string> = { 'daily': 'day', 'hourly': 'hour', 'weekly': 'week', 'monthly': 'month', 'yearly': 'year', 'minutely': 'minute' }; /** * Preprocess time series parameters: * - Extract interval unit from PostgreSQL-style intervals ('1 day' → 'day', '2 hours' → 'hour') * - Normalize to lowercase * - Handle shorthands: daily→day, hourly→hour, weekly→week, monthly→month * - Alias: column → valueColumn, time → timeColumn * - Alias: tableName → table * - Default interval to 'day' if not provided */ function preprocessTimeSeriesParams(input: unknown): unknown { if (typeof input !== 'object' || input === null) { return input; } const result = { ...input as Record<string, unknown> }; // Alias: tableName → table if (result['tableName'] !== undefined && result['table'] === undefined) { result['table'] = result['tableName']; } // Alias: column → valueColumn if (result['column'] !== undefined && result['valueColumn'] === undefined) { result['valueColumn'] = result['column']; } // Alias: time → timeColumn if (result['time'] !== undefined && result['timeColumn'] === undefined) { result['timeColumn'] = result['time']; } // Alias: bucket → interval if (result['bucket'] !== undefined && result['interval'] === undefined) { result['interval'] = result['bucket']; } if (typeof result['interval'] === 'string') { let interval = result['interval'].toLowerCase().trim(); // Handle shorthands: daily → day, hourly → hour, etc. const shorthand = INTERVAL_SHORTHANDS[interval]; if (shorthand !== undefined) { interval = shorthand; } // Extract unit from PostgreSQL-style interval: '1 day', '2 hours', etc. const match = /^\d+\s*(\w+?)s?$/.exec(interval); if (match?.[1] !== undefined) { interval = match[1]; } // Handle plural forms: 'days' → 'day', 'hours' → 'hour' if (interval.endsWith('s') && VALID_INTERVALS.includes(interval.slice(0, -1) as typeof VALID_INTERVALS[number])) { interval = interval.slice(0, -1); } result['interval'] = interval; } else if (result['interval'] === undefined) { // Default interval to 'day' if not provided result['interval'] = 'day'; } // Alias: filter → where if (result['filter'] !== undefined && result['where'] === undefined) { result['where'] = result['filter']; } return result; } /** * Preprocess hypothesis test parameters: * - Normalize testType variants: 'ttest', 't-test', 'T_TEST' → 't_test' * - Default testType to 't_test' if not provided * - Alias: tableName → table, col → column */ function preprocessHypothesisParams(input: unknown): unknown { if (typeof input !== 'object' || input === null) { return input; } const result = { ...input as Record<string, unknown> }; // Alias: tableName → table if (result['tableName'] !== undefined && result['table'] === undefined) { result['table'] = result['tableName']; } // Alias: col → column if (result['col'] !== undefined && result['column'] === undefined) { result['column'] = result['col']; } if (typeof result['testType'] === 'string') { const normalized = result['testType'].toLowerCase().trim(); // t_test variants: t, ttest, t-test, t_test, T_TEST if (normalized === 't' || /^t[-_]?test$/.test(normalized)) { result['testType'] = 't_test'; } // z_test variants: z, ztest, z-test, z_test, Z_TEST else if (normalized === 'z' || /^z[-_]?test$/.test(normalized)) { result['testType'] = 'z_test'; } } else if (result['testType'] === undefined) { // Auto-detect: if populationStdDev or sigma provided, default to z_test if (result['populationStdDev'] !== undefined || result['sigma'] !== undefined) { result['testType'] = 'z_test'; } else { // Default testType to 't_test' if not provided result['testType'] = 't_test'; } } // Alias: filter → where if (result['filter'] !== undefined && result['where'] === undefined) { result['where'] = result['filter']; } return result; } /** * Preprocess distribution parameters: * - Alias: tableName → table, col → column */ function preprocessDistributionParams(input: unknown): unknown { if (typeof input !== 'object' || input === null) { return input; } const result = { ...input as Record<string, unknown> }; // Alias: tableName → table if (result['tableName'] !== undefined && result['table'] === undefined) { result['table'] = result['tableName']; } // Alias: col → column if (result['col'] !== undefined && result['column'] === undefined) { result['column'] = result['col']; } // Alias: filter → where if (result['filter'] !== undefined && result['where'] === undefined) { result['where'] = result['filter']; } return result; } /** * Preprocess sampling parameters: * - Alias: tableName → table, columns → select */ function preprocessSamplingParams(input: unknown): unknown { if (typeof input !== 'object' || input === null) { return input; } const result = { ...input as Record<string, unknown> }; // Alias: tableName → table if (result['tableName'] !== undefined && result['table'] === undefined) { result['table'] = result['tableName']; } // Alias: columns → select if (result['columns'] !== undefined && result['select'] === undefined) { result['select'] = result['columns']; } // Alias: filter → where if (result['filter'] !== undefined && result['where'] === undefined) { result['where'] = result['filter']; } return result; } // ============================================================================= // Advanced Statistics Schemas // ============================================================================= export const StatsTimeSeriesSchema = z.preprocess( preprocessTimeSeriesParams, z.object({ table: z.string().describe('Table name'), valueColumn: z.string().describe('Numeric column to aggregate'), timeColumn: z.string().describe('Timestamp column'), interval: z.enum(['minute', 'hour', 'day', 'week', 'month', 'year']).describe('Time bucket size (default: day)'), aggregation: z.enum(['sum', 'avg', 'min', 'max', 'count']).optional().describe('Aggregation function (default: avg)'), schema: z.string().optional().describe('Schema name'), where: z.string().optional().describe('Filter condition'), limit: z.number().optional().describe('Max time buckets to return'), groupBy: z.string().optional().describe('Column to group time series by') }) ); export const StatsDistributionSchema = z.preprocess( preprocessDistributionParams, z.object({ table: z.string().describe('Table name'), column: z.string().describe('Numeric column'), buckets: z.number().optional().describe('Number of histogram buckets (default: 10)'), schema: z.string().optional().describe('Schema name'), where: z.string().optional().describe('Filter condition'), groupBy: z.string().optional().describe('Column to group distribution by') }).refine((data) => data.buckets === undefined || data.buckets > 0, { message: 'buckets must be greater than 0', path: ['buckets'] }) ); export const StatsHypothesisSchema = z.preprocess( preprocessHypothesisParams, z.object({ table: z.string().describe('Table name'), column: z.string().describe('Numeric column'), testType: z.enum(['t_test', 'z_test']).describe('Type of hypothesis test: t_test or z_test (accepts shorthand: t, z, ttest, ztest)'), hypothesizedMean: z.number().optional().describe('Hypothesized population mean'), mean: z.number().optional().describe('Alias for hypothesizedMean'), expected: z.number().optional().describe('Alias for hypothesizedMean'), populationStdDev: z.number().optional().describe('Known population standard deviation (required for z-test)'), sigma: z.number().optional().describe('Alias for populationStdDev'), schema: z.string().optional().describe('Schema name'), where: z.string().optional().describe('Filter condition'), groupBy: z.string().optional().describe('Column to group hypothesis test by') }).transform((data) => ({ table: data.table, column: data.column, testType: data.testType, hypothesizedMean: data.hypothesizedMean ?? data.mean ?? data.expected ?? 0, populationStdDev: data.populationStdDev ?? data.sigma, schema: data.schema, where: data.where, groupBy: data.groupBy })).refine((data) => data.hypothesizedMean !== 0 || data.hypothesizedMean === 0, { // This allows 0 as a valid hypothesized mean - refinement always passes message: 'hypothesizedMean (or mean/expected alias) is required' }) ); export const StatsSamplingSchema = z.preprocess( preprocessSamplingParams, z.object({ table: z.string().describe('Table name'), method: z.enum(['random', 'bernoulli', 'system']).optional().describe('Sampling method (default: random). Note: system uses page-level sampling and may return 0 rows on small tables'), sampleSize: z.number().optional().describe('Number of rows for random sampling (must be > 0)'), percentage: z.number().optional().describe('Percentage for bernoulli/system sampling (0-100)'), schema: z.string().optional().describe('Schema name'), select: z.array(z.string()).optional().describe('Columns to select'), where: z.string().optional().describe('Filter condition') }).refine((data) => data.sampleSize === undefined || data.sampleSize > 0, { message: 'sampleSize must be greater than 0', path: ['sampleSize'] }).refine((data) => data.percentage === undefined || (data.percentage >= 0 && data.percentage <= 100), { message: 'percentage must be between 0 and 100', path: ['percentage'] }) ); // ============================================================================= // Tool Implementations // ============================================================================= /** * Time series analysis */ export function createStatsTimeSeriesTool(adapter: PostgresAdapter): ToolDefinition { return { name: 'pg_stats_time_series', description: 'Aggregate data into time buckets for time series analysis. Use groupBy to get separate time series per category.', group: 'stats', inputSchema: StatsTimeSeriesSchema, annotations: readOnly('Time Series Analysis'), icons: getToolIcons('stats', readOnly('Time Series Analysis')), handler: async (params: unknown, _context: RequestContext) => { const { table, valueColumn, timeColumn, interval, aggregation, schema, where, limit, groupBy } = StatsTimeSeriesSchema.parse(params) as { table: string; valueColumn: string; timeColumn: string; interval: string; aggregation?: string; schema?: string; where?: string; limit?: number; groupBy?: string; }; const schemaPrefix = schema ? `"${schema}".` : ''; const whereClause = where ? `WHERE ${where}` : ''; const agg = aggregation ?? 'avg'; const lim = limit ?? 100; // Validate timeColumn is a timestamp/date type const typeCheckQuery = ` SELECT data_type FROM information_schema.columns WHERE table_schema = '${schema ?? 'public'}' AND table_name = '${table}' AND column_name = '${timeColumn}' `; const typeResult = await adapter.executeQuery(typeCheckQuery); const typeRow = typeResult.rows?.[0] as { data_type: string } | undefined; if (!typeRow) { return { error: `Column "${timeColumn}" not found in table "${schema ?? 'public'}.${table}"` }; } const validTypes = ['timestamp without time zone', 'timestamp with time zone', 'date', 'time', 'time without time zone', 'time with time zone']; if (!validTypes.includes(typeRow.data_type)) { return { error: `Column "${timeColumn}" is type "${typeRow.data_type}" but must be a timestamp or date type for time series analysis` }; } // Helper to map bucket row const mapBucket = (row: Record<string, unknown>): { timeBucket: Date; value: number; count: number } => ({ timeBucket: row['time_bucket'] as Date, value: Number(row['value']), count: Number(row['count']) }); if (groupBy !== undefined) { // Grouped time series const sql = ` SELECT "${groupBy}" as group_key, DATE_TRUNC('${interval}', "${timeColumn}") as time_bucket, ${agg.toUpperCase()}("${valueColumn}")::numeric(20,6) as value, COUNT(*) as count FROM ${schemaPrefix}"${table}" ${whereClause} GROUP BY "${groupBy}", DATE_TRUNC('${interval}', "${timeColumn}") ORDER BY "${groupBy}", time_bucket DESC `; const result = await adapter.executeQuery(sql); const rows = result.rows ?? []; // Group results by group_key const groupsMap = new Map<unknown, { timeBucket: Date; value: number; count: number }[]>(); for (const row of rows) { const key = row['group_key']; if (!groupsMap.has(key)) { groupsMap.set(key, []); } const bucketList = groupsMap.get(key); if (bucketList !== undefined && bucketList.length < lim) { bucketList.push(mapBucket(row)); } } const groups = Array.from(groupsMap.entries()).map(([key, buckets]) => ({ groupKey: key, buckets })); return { table: `${schema ?? 'public'}.${table}`, valueColumn, timeColumn, interval, aggregation: agg, groupBy, groups, count: groups.length }; } // Ungrouped time series (original behavior) const sql = ` SELECT DATE_TRUNC('${interval}', "${timeColumn}") as time_bucket, ${agg.toUpperCase()}("${valueColumn}")::numeric(20,6) as value, COUNT(*) as count FROM ${schemaPrefix}"${table}" ${whereClause} GROUP BY DATE_TRUNC('${interval}', "${timeColumn}") ORDER BY time_bucket DESC LIMIT ${String(lim)} `; const result = await adapter.executeQuery(sql); const buckets = (result.rows ?? []).map(row => mapBucket(row)); return { table: `${schema ?? 'public'}.${table}`, valueColumn, timeColumn, interval, aggregation: agg, buckets }; } }; } /** * Distribution analysis with histogram */ export function createStatsDistributionTool(adapter: PostgresAdapter): ToolDefinition { return { name: 'pg_stats_distribution', description: 'Analyze data distribution with histogram buckets, skewness, and kurtosis. Use groupBy to get distribution per category.', group: 'stats', inputSchema: StatsDistributionSchema, annotations: readOnly('Distribution Analysis'), icons: getToolIcons('stats', readOnly('Distribution Analysis')), handler: async (params: unknown, _context: RequestContext) => { const parsed = StatsDistributionSchema.parse(params) as { table: string; column: string; buckets?: number; schema?: string; where?: string; groupBy?: string; }; const { table, column, buckets, schema, where, groupBy } = parsed; const schemaPrefix = schema ? `"${schema}".` : ''; const whereClause = where ? `WHERE ${where}` : ''; const numBuckets = buckets ?? 10; // Helper to compute skewness and kurtosis for a given group const computeMoments = async (groupFilter?: string): Promise<{ minVal: number; maxVal: number; skewness: number | null; kurtosis: number | null; } | null> => { const filterClause = groupFilter ? (whereClause ? `${whereClause} AND ${groupFilter}` : `WHERE ${groupFilter}`) : whereClause; const statsQuery = ` WITH stats AS ( SELECT MIN("${column}") as min_val, MAX("${column}") as max_val, AVG("${column}") as mean, STDDEV_POP("${column}") as stddev, COUNT("${column}") as n FROM ${schemaPrefix}"${table}" ${filterClause} ), moments AS ( SELECT s.min_val, s.max_val, s.mean, s.stddev, s.n, CASE WHEN s.stddev > 0 AND s.n > 2 THEN (SUM(POWER(("${column}" - s.mean) / s.stddev, 3)) / s.n)::numeric(10,6) ELSE NULL END as skewness, CASE WHEN s.stddev > 0 AND s.n > 3 THEN ((SUM(POWER(("${column}" - s.mean) / s.stddev, 4)) / s.n) - 3)::numeric(10,6) ELSE NULL END as kurtosis FROM ${schemaPrefix}"${table}" t, stats s ${filterClause} GROUP BY s.min_val, s.max_val, s.mean, s.stddev, s.n ) SELECT * FROM moments `; const result = await adapter.executeQuery(statsQuery); const row = result.rows?.[0]; if (row?.['min_val'] == null || row['max_val'] == null) { return null; } return { minVal: Number(row['min_val']), maxVal: Number(row['max_val']), skewness: row['skewness'] !== null ? Number(row['skewness']) : null, kurtosis: row['kurtosis'] !== null ? Number(row['kurtosis']) : null }; }; // Helper to generate histogram for given min/max const generateHistogram = async (minVal: number, maxVal: number, groupFilter?: string): Promise<{ bucket: number; frequency: number; rangeMin: number; rangeMax: number; }[]> => { const filterClause = groupFilter ? (whereClause ? `${whereClause} AND ${groupFilter}` : `WHERE ${groupFilter}`) : whereClause; const histogramQuery = ` SELECT WIDTH_BUCKET("${column}", ${String(minVal)}, ${String(maxVal + 0.0001)}, ${String(numBuckets)}) as bucket, COUNT(*) as frequency, MIN("${column}") as bucket_min, MAX("${column}") as bucket_max FROM ${schemaPrefix}"${table}" ${filterClause} GROUP BY WIDTH_BUCKET("${column}", ${String(minVal)}, ${String(maxVal + 0.0001)}, ${String(numBuckets)}) ORDER BY bucket `; const result = await adapter.executeQuery(histogramQuery); return (result.rows ?? []).map(row => ({ bucket: Number(row['bucket']), frequency: Number(row['frequency']), rangeMin: Number(row['bucket_min']), rangeMax: Number(row['bucket_max']) })); }; if (groupBy !== undefined) { // Get distinct groups first const groupsQuery = ` SELECT DISTINCT "${groupBy}" as group_key FROM ${schemaPrefix}"${table}" ${whereClause} ORDER BY "${groupBy}" `; const groupsResult = await adapter.executeQuery(groupsQuery); const groupKeys = (groupsResult.rows ?? []).map(r => r['group_key']); // Process each group const groups: { groupKey: unknown; range: { min: number; max: number }; bucketWidth: number; skewness: number | null; kurtosis: number | null; histogram: { bucket: number; frequency: number; rangeMin: number; rangeMax: number }[]; }[] = []; for (const groupKey of groupKeys) { const groupFilter = typeof groupKey === 'string' ? `"${groupBy}" = '${groupKey.replace(/'/g, "''")}'` : `"${groupBy}" = ${String(groupKey)}`; const moments = await computeMoments(groupFilter); if (moments === null) continue; const { minVal, maxVal, skewness, kurtosis } = moments; const bucketWidth = Math.round((maxVal - minVal) / numBuckets * 1e6) / 1e6; const histogram = await generateHistogram(minVal, maxVal, groupFilter); groups.push({ groupKey, range: { min: minVal, max: maxVal }, bucketWidth, skewness, kurtosis, histogram }); } return { table: `${schema ?? 'public'}.${table}`, column, groupBy, groups, count: groups.length }; } // Ungrouped distribution (existing logic) const moments = await computeMoments(); if (moments === null) { return { error: 'No data or all nulls in column' }; } const { minVal, maxVal, skewness, kurtosis } = moments; const bucketWidth = Math.round((maxVal - minVal) / numBuckets * 1e6) / 1e6; const histogram = await generateHistogram(minVal, maxVal); return { table: `${schema ?? 'public'}.${table}`, column, range: { min: minVal, max: maxVal }, bucketWidth, skewness, kurtosis, histogram }; } }; } /** * Hypothesis testing (t-test or z-test) */ export function createStatsHypothesisTool(adapter: PostgresAdapter): ToolDefinition { return { name: 'pg_stats_hypothesis', description: 'Perform one-sample t-test or z-test against a hypothesized mean. For z-test, provide populationStdDev (sigma) for accurate results. Use groupBy to test each group separately.', group: 'stats', inputSchema: StatsHypothesisSchema, annotations: readOnly('Hypothesis Testing'), icons: getToolIcons('stats', readOnly('Hypothesis Testing')), handler: async (params: unknown, _context: RequestContext) => { const { table, column, testType, hypothesizedMean, populationStdDev, schema, where, groupBy } = StatsHypothesisSchema.parse(params) as { table: string; column: string; testType: string; hypothesizedMean: number; populationStdDev?: number; groupBy?: string; schema?: string; where?: string; }; const schemaPrefix = schema ? `"${schema}".` : ''; const whereClause = where ? `WHERE ${where}` : ''; // Helper to calculate test results from row stats const calculateTestResults = (n: number, sampleMean: number, sampleStdDev: number): { sampleSize: number; sampleMean: number; sampleStdDev: number; populationStdDev: number | null; standardError: number; testStatistic: number; degreesOfFreedom: number | null; interpretation: string; note: string; } | { error: string; sampleSize: number } => { if (n < 2 || isNaN(sampleStdDev) || sampleStdDev === 0) { return { error: 'Insufficient data or zero variance', sampleSize: n }; } let stddevUsed: number; let stddevNote: string | undefined; if (testType === 'z_test') { if (populationStdDev !== undefined) { stddevUsed = populationStdDev; } else { stddevUsed = sampleStdDev; stddevNote = 'No populationStdDev provided; using sample stddev (less accurate for z-test)'; } } else { stddevUsed = sampleStdDev; } const standardError = stddevUsed / Math.sqrt(n); const testStatistic = (sampleMean - hypothesizedMean) / standardError; const degreesOfFreedom = n - 1; // Build note with warnings let noteText = stddevNote ?? 'For exact p-values, use external statistical software'; if (n < 30) { noteText = `Small sample size (n=${String(n)}): results may be less reliable. ` + noteText; } return { sampleSize: n, sampleMean, sampleStdDev, populationStdDev: testType === 'z_test' ? (populationStdDev ?? null) : null, standardError, testStatistic, degreesOfFreedom: testType === 't_test' ? degreesOfFreedom : null, interpretation: Math.abs(testStatistic) > 1.96 ? 'Test statistic suggests potential significance at α=0.05 level' : 'Test statistic does not suggest significance at α=0.05 level', note: noteText }; }; if (groupBy !== undefined) { // Grouped hypothesis tests const sql = ` SELECT "${groupBy}" as group_key, COUNT("${column}") as n, AVG("${column}")::numeric(20,6) as mean, STDDEV_SAMP("${column}")::numeric(20,6) as stddev FROM ${schemaPrefix}"${table}" ${whereClause} GROUP BY "${groupBy}" ORDER BY "${groupBy}" `; const result = await adapter.executeQuery(sql); const rows = result.rows ?? []; const groups = rows.map(row => { const n = Number(row['n']); const sampleMean = Number(row['mean']); const sampleStdDev = Number(row['stddev']); return { groupKey: row['group_key'], results: calculateTestResults(n, sampleMean, sampleStdDev) }; }); return { table: `${schema ?? 'public'}.${table}`, column, testType, hypothesizedMean, groupBy, groups, count: groups.length }; } // Ungrouped hypothesis test const sql = ` SELECT COUNT("${column}") as n, AVG("${column}")::numeric(20,6) as mean, STDDEV_SAMP("${column}")::numeric(20,6) as stddev FROM ${schemaPrefix}"${table}" ${whereClause} `; const result = await adapter.executeQuery(sql); const row = result.rows?.[0] as { n: string | number; mean: string | number; stddev: string | number } | undefined; if (!row) return { error: 'No data found' }; const n = Number(row.n); const sampleMean = Number(row.mean); const sampleStdDev = Number(row.stddev); const testResults = calculateTestResults(n, sampleMean, sampleStdDev); // If error, return at top level (not nested in results) if ('error' in testResults) { return testResults; } return { table: `${schema ?? 'public'}.${table}`, column, testType, hypothesizedMean, results: testResults }; } }; } /** * Random sampling */ export function createStatsSamplingTool(adapter: PostgresAdapter): ToolDefinition { return { name: 'pg_stats_sampling', description: 'Get a random sample of rows. Use sampleSize for exact row count (any method), or percentage for approximate sampling with bernoulli/system methods.', group: 'stats', inputSchema: StatsSamplingSchema, annotations: readOnly('Random Sampling'), icons: getToolIcons('stats', readOnly('Random Sampling')), handler: async (params: unknown, _context: RequestContext) => { const { table, method, sampleSize, percentage, schema, select, where } = StatsSamplingSchema.parse(params); const schemaPrefix = schema ? `"${schema}".` : ''; const columns = select && select.length > 0 ? select.map(c => `"${c}"`).join(', ') : '*'; const whereClause = where ? `WHERE ${where}` : ''; const samplingMethod = method ?? 'random'; let sql: string; let note: string | undefined; // If sampleSize is provided, always use ORDER BY RANDOM() LIMIT n for exact counts // TABLESAMPLE BERNOULLI/SYSTEM are percentage-based and cannot guarantee exact row counts if (sampleSize !== undefined) { const limit = sampleSize; sql = ` SELECT ${columns} FROM ${schemaPrefix}"${table}" ${whereClause} ORDER BY RANDOM() LIMIT ${String(limit)} `; if (percentage !== undefined) { note = `sampleSize (${String(sampleSize)}) takes precedence over percentage (${String(percentage)}%). Using ORDER BY RANDOM() LIMIT for exact row count.`; } else if (samplingMethod !== 'random') { note = `Using ORDER BY RANDOM() LIMIT for exact ${String(sampleSize)} row count. TABLESAMPLE ${samplingMethod.toUpperCase()} is percentage-based and cannot guarantee exact counts.`; } } else if (samplingMethod === 'random') { // Default random sampling with default limit const limit = 100; sql = ` SELECT ${columns} FROM ${schemaPrefix}"${table}" ${whereClause} ORDER BY RANDOM() LIMIT ${String(limit)} `; if (percentage !== undefined) { note = `percentage (${String(percentage)}%) is ignored for random method. Use method:'bernoulli' or method:'system' for percentage-based sampling, or use sampleSize for exact row count.`; } } else { // TABLESAMPLE with percentage (approximate row count) const pct = percentage ?? 10; sql = ` SELECT ${columns} FROM ${schemaPrefix}"${table}" TABLESAMPLE ${samplingMethod.toUpperCase()}(${String(pct)}) ${whereClause} `; note = `TABLESAMPLE ${samplingMethod.toUpperCase()}(${String(pct)}%) returns approximately ${String(pct)}% of rows. Actual count varies based on table size and sampling algorithm.`; } const result = await adapter.executeQuery(sql); const rows = result.rows ?? []; const response: { table: string; method: string; sampleSize: number; rows: unknown[]; note?: string; } = { table: `${schema ?? 'public'}.${table}`, method: samplingMethod, sampleSize: rows.length, rows }; if (note !== undefined) { response.note = note; } // Add note if requested sampleSize exceeded available rows if (sampleSize !== undefined && rows.length < sampleSize) { const existingNote = response.note !== undefined ? response.note + ' ' : ''; response.note = existingNote + `Requested ${String(sampleSize)} rows but only ${String(rows.length)} available.`; } return response; } }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/antonygold59/postgresql-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

advanced.ts•35.1 KiB