/**
* PostgreSQL pgvector - Advanced Operations
*/
import type { PostgresAdapter } from "../../PostgresAdapter.js";
import type {
ToolDefinition,
RequestContext,
} from "../../../../types/index.js";
import { z } from "zod";
import { readOnly } from "../../../../utils/annotations.js";
import { getToolIcons } from "../../../../utils/icons.js";
import {
sanitizeIdentifier,
sanitizeTableName,
} from "../../../../utils/identifiers.js";
import { truncateVector } from "./basic.js";
import {
VectorClusterOutputSchema,
VectorIndexOptimizeOutputSchema,
HybridSearchOutputSchema,
VectorPerformanceOutputSchema,
VectorDimensionReduceOutputSchema,
VectorEmbedOutputSchema,
} from "../../schemas/index.js";
/**
* Parse a PostgreSQL vector string to a number array.
*/
function parseVector(vecStr: unknown): number[] | null {
if (typeof vecStr !== "string") return null;
try {
const cleaned = vecStr.replace(/[[\]()]/g, "");
return cleaned.split(",").map(Number);
} catch {
return null;
}
}
export function createVectorClusterTool(
adapter: PostgresAdapter,
): ToolDefinition {
// Schema with parameter smoothing
const ClusterSchemaBase = z.object({
table: z.string().optional().describe("Table name"),
tableName: z.string().optional().describe("Alias for table"),
column: z.string().optional().describe("Vector column"),
col: z.string().optional().describe("Alias for column"),
k: z.number().optional().describe("Number of clusters"),
clusters: z
.number()
.optional()
.describe("Alias for k (number of clusters)"),
iterations: z.number().optional().describe("Max iterations (default: 10)"),
sampleSize: z.number().optional().describe("Sample size for large tables"),
schema: z.string().optional().describe("Database schema (default: public)"),
});
const ClusterSchema = ClusterSchemaBase.transform((data) => ({
table: data.table ?? data.tableName ?? "",
column: data.column ?? data.col ?? "",
k: data.k ?? data.clusters,
iterations: data.iterations,
sampleSize: data.sampleSize,
schema: data.schema,
})).refine((data) => data.k !== undefined, {
message: "k (or clusters alias) is required",
});
return {
name: "pg_vector_cluster",
description:
"Perform K-means clustering on vectors. Returns cluster centroids only (not row assignments). To assign rows to clusters, compare row vectors to centroids using pg_vector_distance.",
group: "vector",
inputSchema: ClusterSchemaBase,
outputSchema: VectorClusterOutputSchema,
annotations: readOnly("Vector Cluster"),
icons: getToolIcons("vector", readOnly("Vector Cluster")),
handler: async (params: unknown, _context: RequestContext) => {
const parsed = ClusterSchema.parse(params);
// Refine guarantees k is defined, but add explicit check for TypeScript
const k = parsed.k;
if (k === undefined) {
throw new Error("k (or clusters alias) is required");
}
if (k < 1) {
return {
success: false,
error: "k must be at least 1 (number of clusters)",
suggestion: "Provide k >= 1, typically between 2 and 20",
};
}
const maxIter = parsed.iterations ?? 10;
const sample = parsed.sampleSize ?? 10000;
const tableName = sanitizeTableName(parsed.table, parsed.schema);
const columnName = sanitizeIdentifier(parsed.column);
const sampleSql = `
SELECT ${columnName} as vec
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
ORDER BY RANDOM()
LIMIT ${String(sample)}
`;
const sampleResult = await adapter.executeQuery(sampleSql);
const vectors = (sampleResult.rows ?? []) as { vec: string }[];
if (vectors.length < k) {
return {
success: false,
error: `Cannot create ${String(k)} clusters with only ${String(vectors.length)} data points. Reduce k to at most ${String(vectors.length)} or increase sampleSize.`,
k: k,
availableDataPoints: vectors.length,
sampleSize: sample,
};
}
const initialCentroids = vectors.slice(0, k).map((v) => v.vec);
const clusterSql = `
WITH sample_vectors AS (
SELECT ROW_NUMBER() OVER () as id, ${columnName} as vec
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
LIMIT ${String(sample)}
),
centroids AS (
SELECT unnest($1::vector[]) as centroid
)
SELECT
c.centroid,
COUNT(*) as cluster_size,
AVG(s.vec) as new_centroid
FROM sample_vectors s
CROSS JOIN LATERAL (
SELECT centroid, ROW_NUMBER() OVER (ORDER BY s.vec <-> centroid) as rn
FROM centroids
) c
WHERE c.rn = 1
GROUP BY c.centroid
`;
let centroids = initialCentroids;
for (let i = 0; i < maxIter; i++) {
try {
const result = await adapter.executeQuery(clusterSql, [centroids]);
centroids = (result.rows ?? []).map(
(r: Record<string, unknown>) => r["new_centroid"] as string,
);
} catch {
break;
}
}
// Truncate large centroids for display (like pg_vector_aggregate does)
const parsedCentroids = centroids.map((c) => {
const parsed = parseVector(c);
if (parsed === null) {
return { vector: c };
}
// For large vectors, use preview format (first 10 dimensions)
if (parsed.length > 10) {
const truncated = truncateVector(parsed, 10);
return {
preview: truncated.preview,
dimensions: truncated.dimensions,
truncated: truncated.truncated,
};
}
return { vector: parsed };
});
return {
k: k,
iterations: maxIter,
sampleSize: vectors.length,
centroids: parsedCentroids,
note: "For production clustering, consider using specialized libraries",
};
},
};
}
export function createVectorIndexOptimizeTool(
adapter: PostgresAdapter,
): ToolDefinition {
// Schema with parameter smoothing
const IndexOptimizeSchemaBase = z.object({
table: z.string().optional().describe("Table name"),
tableName: z.string().optional().describe("Alias for table"),
column: z.string().optional().describe("Vector column"),
col: z.string().optional().describe("Alias for column"),
schema: z.string().optional().describe("Database schema (default: public)"),
});
const IndexOptimizeSchema = IndexOptimizeSchemaBase.transform((data) => ({
table: data.table ?? data.tableName ?? "",
column: data.column ?? data.col ?? "",
schema: data.schema,
}));
return {
name: "pg_vector_index_optimize",
description:
"Analyze vector column and recommend optimal index parameters for IVFFlat/HNSW.",
group: "vector",
inputSchema: IndexOptimizeSchemaBase,
outputSchema: VectorIndexOptimizeOutputSchema,
annotations: readOnly("Vector Index Optimize"),
icons: getToolIcons("vector", readOnly("Vector Index Optimize")),
handler: async (params: unknown, _context: RequestContext) => {
const parsed = IndexOptimizeSchema.parse(params ?? {});
const tableName = sanitizeTableName(parsed.table, parsed.schema);
const columnName = sanitizeIdentifier(parsed.column);
const schemaName = parsed.schema ?? "public";
const statsSql = `
SELECT
reltuples::bigint as estimated_rows,
pg_size_pretty(pg_total_relation_size('${tableName}'::regclass)) as table_size
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.relname = $1 AND n.nspname = $2
`;
const statsResult = await adapter.executeQuery(statsSql, [
parsed.table,
schemaName,
]);
// PostgreSQL returns bigint as string, cast as needed
const stats = (statsResult.rows?.[0] ?? {}) as {
estimated_rows: string | number;
table_size: string;
};
// Validate column is actually a vector type before calling vector_dims
const typeCheckSql = `
SELECT udt_name FROM information_schema.columns
WHERE table_schema = $1 AND table_name = $2 AND column_name = $3
`;
const typeResult = await adapter.executeQuery(typeCheckSql, [
schemaName,
parsed.table,
parsed.column,
]);
if ((typeResult.rows?.length ?? 0) === 0) {
return {
success: false,
error: `Column '${parsed.column}' does not exist in table '${parsed.table}'`,
suggestion: "Use pg_describe_table to find available columns",
};
}
const udtName = typeResult.rows?.[0]?.["udt_name"] as string | undefined;
if (udtName !== "vector") {
return {
success: false,
error: `Column '${parsed.column}' is not a vector column (type: ${udtName ?? "unknown"})`,
suggestion: "Use a column with vector type for index optimization",
};
}
const dimSql = `
SELECT vector_dims(${columnName}) as dimensions
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
LIMIT 1
`;
const dimResult = await adapter.executeQuery(dimSql);
const dimensions = (
dimResult.rows?.[0] as { dimensions: number } | undefined
)?.dimensions;
const indexSql = `
SELECT i.indexname, i.indexdef
FROM pg_indexes i
WHERE i.tablename = $1 AND i.schemaname = $2
AND i.indexdef LIKE '%vector%'
`;
const indexResult = await adapter.executeQuery(indexSql, [
parsed.table,
schemaName,
]);
// Convert PostgreSQL bigint string to number for output schema compliance
const rows = Number(stats.estimated_rows ?? 0);
const recommendations = [];
if (rows < 10000) {
recommendations.push({
type: "none",
reason: "Table is small enough for brute force search",
});
} else if (rows < 100000) {
recommendations.push({
type: "ivfflat",
lists: Math.min(100, Math.round(Math.sqrt(rows))),
reason: "IVFFlat recommended for medium tables",
});
} else {
recommendations.push({
type: "hnsw",
m: dimensions !== undefined && dimensions > 768 ? 32 : 16,
efConstruction: 64,
reason: "HNSW recommended for large tables with high recall",
});
recommendations.push({
type: "ivfflat",
lists: Math.round(Math.sqrt(rows)),
reason: "IVFFlat is faster to build but lower recall",
});
}
return {
table: parsed.table,
column: parsed.column,
dimensions,
estimatedRows: rows,
tableSize: stats.table_size,
existingIndexes: indexResult.rows,
recommendations,
};
},
};
}
export function createHybridSearchTool(
adapter: PostgresAdapter,
): ToolDefinition {
// Schema with parameter smoothing
const HybridSearchSchemaBase = z.object({
table: z.string().optional().describe("Table name"),
tableName: z.string().optional().describe("Alias for table"),
vectorColumn: z.string().optional().describe("Vector column"),
vectorCol: z.string().optional().describe("Alias for vectorColumn"),
textColumn: z.string().describe("Text column for FTS"),
vector: z.array(z.number()).describe("Query vector"),
textQuery: z.string().describe("Text search query"),
vectorWeight: z
.number()
.optional()
.describe("Weight for vector score (0-1, default: 0.5)"),
limit: z.number().optional().describe("Max results"),
select: z
.array(z.string())
.optional()
.describe("Columns to return (defaults to non-vector columns)"),
});
const HybridSearchSchema = HybridSearchSchemaBase.transform((data) => ({
table: data.table ?? data.tableName ?? "",
vectorColumn: data.vectorColumn ?? data.vectorCol ?? "",
textColumn: data.textColumn,
vector: data.vector,
textQuery: data.textQuery,
vectorWeight: data.vectorWeight,
limit: data.limit,
select: data.select,
}));
return {
name: "pg_hybrid_search",
description:
"Combined vector similarity and full-text search with weighted scoring.",
group: "vector",
inputSchema: HybridSearchSchemaBase,
outputSchema: HybridSearchOutputSchema,
annotations: readOnly("Hybrid Search"),
icons: getToolIcons("vector", readOnly("Hybrid Search")),
handler: async (params: unknown, _context: RequestContext) => {
const parsed = HybridSearchSchema.parse(params);
// Validate required parameters before using them
if (parsed.table === "") {
return {
success: false,
error: "table (or tableName) parameter is required",
requiredParams: [
"table",
"vectorColumn",
"textColumn",
"vector",
"textQuery",
],
};
}
if (parsed.vectorColumn === "") {
return {
success: false,
error: "vectorColumn (or vectorCol) parameter is required",
requiredParams: [
"table",
"vectorColumn",
"textColumn",
"vector",
"textQuery",
],
};
}
// Parse schema.table format (embedded schema takes priority)
let resolvedTable = parsed.table;
let resolvedSchema: string | undefined;
if (parsed.table.includes(".")) {
const parts = parsed.table.split(".");
resolvedSchema = parts[0];
resolvedTable = parts[1] ?? parsed.table;
}
const schemaName = resolvedSchema ?? "public";
const tableName = sanitizeTableName(resolvedTable, schemaName);
// Check column type - reject if it's a tsvector
const colTypeSql = `
SELECT data_type, udt_name
FROM information_schema.columns
WHERE table_schema = $1 AND table_name = $2 AND column_name = $3
`;
const colTypeResult = await adapter.executeQuery(colTypeSql, [
schemaName,
resolvedTable,
parsed.vectorColumn,
]);
const colType = colTypeResult.rows?.[0] as
| { data_type?: string; udt_name?: string }
| undefined;
if (
colType?.udt_name === "tsvector" ||
colType?.data_type === "tsvector"
) {
return {
success: false,
error: `Column '${parsed.vectorColumn}' is tsvector, not vector. For hybrid search, vectorColumn must be a pgvector column (type 'vector'). Use textColumn for text search.`,
suggestion: `Specify a different vector column, or check your table structure with pg_describe_table`,
};
}
if (colType?.udt_name !== "vector" && colType !== undefined) {
const actualType = colType.udt_name ?? colType.data_type ?? "unknown";
return {
success: false,
error: `Column '${parsed.vectorColumn}' has type '${actualType}', not 'vector'. Hybrid search requires a pgvector column.`,
columnType: actualType,
};
}
const vectorWeight = parsed.vectorWeight ?? 0.5;
// Fix floating point precision (e.g., 0.30000000000000004 -> 0.3)
const textWeight = Math.round((1 - vectorWeight) * 1000) / 1000;
const limitVal = parsed.limit ?? 10;
const vectorStr = `[${parsed.vector.join(",")}]`;
// Build select clause - use specified columns, excluding vector column if using t.*
let selectCols: string;
if (parsed.select !== undefined && parsed.select.length > 0) {
// Use only the explicitly selected columns
selectCols = parsed.select.map((c) => `t."${c}"`).join(", ");
} else {
// Get all columns except vector columns to avoid token waste
const colsSql = `
SELECT column_name
FROM information_schema.columns
WHERE table_schema = $1 AND table_name = $2
AND udt_name != 'vector'
ORDER BY ordinal_position
`;
const colsResult = await adapter.executeQuery(colsSql, [
schemaName,
resolvedTable,
]);
const cols = (colsResult.rows ?? []).map(
(r: Record<string, unknown>) => r["column_name"] as string,
);
selectCols =
cols.length > 0 ? cols.map((c) => `t."${c}"`).join(", ") : "t.*";
}
const sql = `
WITH vector_scores AS (
SELECT
ctid,
1 - ("${parsed.vectorColumn}" <=> '${vectorStr}'::vector) as vector_score
FROM ${tableName}
WHERE "${parsed.vectorColumn}" IS NOT NULL
ORDER BY "${parsed.vectorColumn}" <=> '${vectorStr}'::vector
LIMIT ${String(limitVal * 3)}
),
text_scores AS (
SELECT
ctid,
ts_rank(to_tsvector('english', "${parsed.textColumn}"), plainto_tsquery($1)) as text_score
FROM ${tableName}
WHERE to_tsvector('english', "${parsed.textColumn}") @@ plainto_tsquery($1)
)
SELECT
${selectCols},
COALESCE(v.vector_score, 0) * ${String(vectorWeight)} +
COALESCE(ts.text_score, 0) * ${String(textWeight)} as combined_score,
COALESCE(v.vector_score, 0) as vector_score,
COALESCE(ts.text_score, 0) as text_score
FROM ${tableName} t
LEFT JOIN vector_scores v ON t.ctid = v.ctid
LEFT JOIN text_scores ts ON t.ctid = ts.ctid
WHERE v.ctid IS NOT NULL OR ts.ctid IS NOT NULL
ORDER BY combined_score DESC
LIMIT ${String(limitVal)}
`;
try {
const result = await adapter.executeQuery(sql, [parsed.textQuery]);
return {
results: result.rows,
count: result.rows?.length ?? 0,
vectorWeight,
textWeight,
};
} catch (error: unknown) {
if (error instanceof Error) {
// Parse column not found errors
const colMatch = /column "([^"]+)" does not exist/.exec(
error.message,
);
if (colMatch) {
const missingCol = colMatch[1] ?? "";
// Determine which parameter has the issue
let paramName = "column";
if (missingCol === parsed.textColumn) {
paramName = "textColumn";
} else if (missingCol === parsed.vectorColumn) {
paramName = "vectorColumn";
}
return {
success: false,
error: `Column '${missingCol}' does not exist in table '${resolvedTable}'`,
parameterWithIssue: paramName,
suggestion: "Use pg_describe_table to find available columns",
};
}
// Parse dimension mismatch errors
const dimMatch = /different vector dimensions (\d+) and (\d+)/.exec(
error.message,
);
if (dimMatch) {
const expectedDim = dimMatch[1] ?? "0";
const providedDim = dimMatch[2] ?? "0";
return {
success: false,
error: `Vector dimension mismatch: column expects ${expectedDim} dimensions, but you provided ${providedDim} dimensions.`,
expectedDimensions: parseInt(expectedDim, 10),
providedDimensions: parseInt(providedDim, 10),
suggestion:
"Ensure your query vector has the same dimensions as the column.",
};
}
// Parse relation not found errors
const relationMatch = /relation "([^"]+)" does not exist/.exec(
error.message,
);
if (relationMatch) {
const missingRelation = relationMatch[1] ?? "";
return {
success: false,
error: `Table '${missingRelation}' does not exist`,
suggestion:
"Use pg_list_tables to find available tables, or check the schema name",
};
}
// Return generic database error as {success: false} instead of throwing
return {
success: false,
error: error.message,
suggestion: "Check your query parameters and table structure",
};
}
// For non-Error exceptions, return generic error
return {
success: false,
error: "An unexpected error occurred",
details: String(error),
};
}
},
};
}
export function createVectorPerformanceTool(
adapter: PostgresAdapter,
): ToolDefinition {
// Schema with parameter smoothing
const PerformanceSchemaBase = z.object({
table: z.string().optional().describe("Table name"),
tableName: z.string().optional().describe("Alias for table"),
column: z.string().optional().describe("Vector column"),
col: z.string().optional().describe("Alias for column"),
testVector: z
.array(z.number())
.optional()
.describe("Test vector for benchmarking"),
schema: z.string().optional().describe("Database schema (default: public)"),
});
const PerformanceSchema = PerformanceSchemaBase.transform((data) => ({
table: data.table ?? data.tableName ?? "",
column: data.column ?? data.col ?? "",
testVector: data.testVector,
schema: data.schema,
}));
return {
name: "pg_vector_performance",
description:
"Analyze vector search performance and index effectiveness. Provide testVector for benchmarking (recommended).",
group: "vector",
inputSchema: PerformanceSchemaBase,
outputSchema: VectorPerformanceOutputSchema,
annotations: readOnly("Vector Performance"),
icons: getToolIcons("vector", readOnly("Vector Performance")),
handler: async (params: unknown, _context: RequestContext) => {
const parsed = PerformanceSchema.parse(params);
// Validate required params
if (parsed.table === "") {
return {
success: false,
error: "table (or tableName) parameter is required",
requiredParams: ["table", "column"],
};
}
if (parsed.column === "") {
return {
success: false,
error:
"column (or col) parameter is required for the vector column name",
requiredParams: ["table", "column"],
};
}
const tableName = sanitizeTableName(parsed.table, parsed.schema);
const columnName = sanitizeIdentifier(parsed.column);
const schemaName = parsed.schema ?? "public";
// Check if column exists
const colCheckSql = `
SELECT 1 FROM information_schema.columns
WHERE table_schema = $1 AND table_name = $2 AND column_name = $3
`;
const colCheckResult = await adapter.executeQuery(colCheckSql, [
schemaName,
parsed.table,
parsed.column,
]);
if ((colCheckResult.rows?.length ?? 0) === 0) {
return {
success: false,
error: `Column '${parsed.column}' does not exist in table '${parsed.table}'`,
suggestion: "Verify the column name using pg_describe_table",
};
}
const indexSql = `
SELECT
i.indexname,
i.indexdef,
pg_size_pretty(pg_relation_size((i.schemaname || '.' || i.indexname)::regclass)) as index_size,
s.idx_scan,
s.idx_tup_read
FROM pg_indexes i
LEFT JOIN pg_stat_user_indexes s ON s.indexrelname = i.indexname AND s.schemaname = i.schemaname
WHERE i.tablename = $1 AND i.schemaname = $2
AND i.indexdef LIKE '%vector%'
`;
const indexResult = await adapter.executeQuery(indexSql, [
parsed.table,
schemaName,
]);
const statsSql = `
SELECT
reltuples::bigint as estimated_rows,
pg_size_pretty(pg_relation_size('${tableName}'::regclass)) as table_size
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.relname = $1 AND n.nspname = $2
`;
const statsResult = await adapter.executeQuery(statsSql, [
parsed.table,
schemaName,
]);
// PostgreSQL returns bigint as string, cast as needed
const stats = (statsResult.rows?.[0] ?? {}) as {
estimated_rows?: string | number;
table_size?: string;
};
let benchmark = null;
let testVectorSource: string | undefined;
let testVector = parsed.testVector;
// Auto-generate test vector from first row if not provided
if (testVector === undefined) {
try {
const sampleSql = `SELECT ${columnName}::text as vec FROM ${tableName} WHERE ${columnName} IS NOT NULL LIMIT 1`;
const sampleResult = await adapter.executeQuery(sampleSql);
const sampleRow = sampleResult.rows?.[0] as
| { vec?: string }
| undefined;
if (sampleRow?.vec !== undefined) {
// Parse vector string like "[0.1,0.2,0.3]" to array
const vecStr = sampleRow.vec.replace(/[[\]]/g, "");
testVector = vecStr.split(",").map(Number);
testVectorSource = "auto-generated from first row";
}
} catch {
// Silently ignore - benchmark just won't be available
}
} else {
testVectorSource = "user-provided";
}
if (testVector !== undefined && testVector.length > 0) {
const vectorStr = `[${testVector.join(",")}]`;
const benchSql = `
EXPLAIN ANALYZE
SELECT * FROM ${tableName}
ORDER BY ${columnName} <-> '${vectorStr}'::vector
LIMIT 10
`;
const benchResult = await adapter.executeQuery(benchSql);
// Truncate large vectors in EXPLAIN output to reduce payload size
// Pattern matches vector literals like '[0.1,0.2,...,0.9]'::vector
const vectorPattern = /\[[\d.,\s-e]+\]'::vector/g;
const truncatedRows = (benchResult.rows ?? []).map(
(row: Record<string, unknown>) => {
const planLine = row["QUERY PLAN"] as string | undefined;
if (planLine && planLine.length > 200) {
// Truncate long vector literals in query plan
const truncated = planLine.replace(
vectorPattern,
`[...${String(testVector.length)} dims]'::vector`,
);
return { "QUERY PLAN": truncated };
}
return row;
},
);
benchmark = truncatedRows;
}
// Convert PostgreSQL bigint strings to numbers for output schema compliance
const estimatedRows = Number(stats.estimated_rows ?? 0);
// Map indexes to convert bigint stats to numbers (idx_scan, idx_tup_read)
const indexes = (indexResult.rows ?? []).map(
(row: Record<string, unknown>) => ({
...row,
idx_scan: row["idx_scan"] != null ? Number(row["idx_scan"]) : null,
idx_tup_read:
row["idx_tup_read"] != null ? Number(row["idx_tup_read"]) : null,
}),
);
const response: Record<string, unknown> = {
table: parsed.table,
column: parsed.column,
tableSize: stats.table_size,
// PostgreSQL returns -1 for tables that haven't been analyzed; normalize to 0
estimatedRows: estimatedRows < 0 ? 0 : estimatedRows,
indexes,
benchmark,
recommendations:
(indexResult.rows?.length ?? 0) === 0
? [
"No vector index found - consider creating one for better performance",
]
: [],
};
if (testVectorSource !== undefined) {
response["testVectorSource"] = testVectorSource;
}
if (benchmark === null) {
response["hint"] =
"No vectors in table to auto-generate test. Provide testVector param for benchmarking.";
}
return response;
},
};
}
export function createVectorDimensionReduceTool(
adapter: PostgresAdapter,
): ToolDefinition {
// Define base schema that exposes all properties correctly to MCP
const VectorDimensionReduceSchemaBase = z.object({
// Direct vector mode
vector: z
.array(z.number())
.optional()
.describe("Vector to reduce (for direct mode)"),
// Table-based mode - include aliases for Split Schema compliance
table: z.string().optional().describe("Table name (for table mode)"),
tableName: z.string().optional().describe("Alias for table"),
column: z
.string()
.optional()
.describe("Vector column name (for table mode)"),
col: z.string().optional().describe("Alias for column"),
idColumn: z
.string()
.optional()
.describe("ID column to include in results (default: id)"),
limit: z.number().optional().describe("Max rows to process (default: 100)"),
// Common parameters - targetDimensions is required
targetDimensions: z
.number()
.optional()
.describe("Target number of dimensions"),
dimensions: z.number().optional().describe("Alias for targetDimensions"),
seed: z.number().optional().describe("Random seed for reproducibility"),
summarize: z
.boolean()
.optional()
.describe(
"Summarize reduced vectors to preview format in table mode (default: true)",
),
});
// Schema with alias resolution applied via refinement
const VectorDimensionReduceSchema = VectorDimensionReduceSchemaBase.transform(
(data) => {
// Handle aliases: dimensions -> targetDimensions, tableName -> table, col -> column
const resolvedTargetDimensions = data.targetDimensions ?? data.dimensions;
return {
...data,
table: data.table ?? data.tableName,
column: data.column ?? data.col,
targetDimensions: resolvedTargetDimensions,
};
},
).refine((data) => data.targetDimensions !== undefined, {
message: "targetDimensions (or dimensions alias) is required",
});
// Helper function for dimension reduction
const reduceVector = (
vector: number[],
targetDim: number,
seed: number,
): number[] => {
const originalDim = vector.length;
const seededRandom = (s: number): number => {
const x = Math.sin(s) * 10000;
return x - Math.floor(x);
};
const reduced: number[] = [];
const scaleFactor = Math.sqrt(originalDim / targetDim);
for (let i = 0; i < targetDim; i++) {
let sum = 0;
for (let j = 0; j < originalDim; j++) {
const randVal = seededRandom(seed + i * originalDim + j) > 0.5 ? 1 : -1;
sum += (vector[j] ?? 0) * randVal;
}
reduced.push(sum / scaleFactor);
}
return reduced;
};
return {
name: "pg_vector_dimension_reduce",
description:
"Reduce vector dimensions using random projection. Supports direct vector input OR table-based extraction.",
group: "vector",
// Use base schema for MCP so properties are properly exposed in tool schema
inputSchema: VectorDimensionReduceSchemaBase,
outputSchema: VectorDimensionReduceOutputSchema,
annotations: readOnly("Vector Dimension Reduce"),
icons: getToolIcons("vector", readOnly("Vector Dimension Reduce")),
handler: async (params: unknown, _context: RequestContext) => {
// Use transformed schema with alias resolution for validation
const parsed = VectorDimensionReduceSchema.parse(params);
// Refine guarantees targetDimensions is defined, but add explicit check for type narrowing
const targetDim = parsed.targetDimensions;
if (targetDim === undefined) {
throw new Error("targetDimensions (or dimensions alias) is required");
}
const seed = parsed.seed ?? 42;
// Direct vector mode
if (parsed.vector !== undefined) {
const originalDim = parsed.vector.length;
if (targetDim >= originalDim) {
return {
success: false,
error: "Target dimensions must be less than original",
originalDimensions: originalDim,
targetDimensions: targetDim,
suggestion: `Reduce from ${String(originalDim)} to a smaller number`,
};
}
return {
originalDimensions: originalDim,
targetDimensions: targetDim,
reduced: reduceVector(parsed.vector, targetDim, seed),
method: "random_projection",
note: "For PCA or UMAP, use external libraries",
};
}
// Table-based mode
if (parsed.table !== undefined && parsed.column !== undefined) {
const idCol = parsed.idColumn ?? "id";
const limitVal = parsed.limit ?? 100;
// Fetch vectors from table
const sql = `
SELECT "${idCol}" as id, "${parsed.column}"::text as vector_text
FROM "${parsed.table}"
WHERE "${parsed.column}" IS NOT NULL
LIMIT ${String(limitVal)}
`;
const result = await adapter.executeQuery(sql);
if ((result.rows?.length ?? 0) === 0) {
return {
error: "No vectors found in table",
table: parsed.table,
column: parsed.column,
};
}
// Determine if we should summarize (default true for table mode)
const shouldSummarize = parsed.summarize ?? true;
// Parse and reduce each vector
const reducedRows: {
id: unknown;
original_dimensions: number;
reduced:
| number[]
| {
preview: number[] | null;
dimensions: number;
truncated: boolean;
};
}[] = [];
let originalDim = 0;
for (const row of result.rows ?? []) {
const vectorText = row["vector_text"] as string;
// Parse PostgreSQL vector format: [0.1, 0.2, ...]
const vectorMatch = /\[([\d.,\s-e]+)\]/.exec(vectorText);
if (vectorMatch?.[1] === undefined) continue;
const vector = vectorMatch[1]
.split(",")
.map((s) => parseFloat(s.trim()));
if (originalDim === 0) originalDim = vector.length;
if (targetDim >= vector.length) continue;
const reducedVector = reduceVector(vector, targetDim, seed);
// Apply summarization if requested
reducedRows.push({
id: row["id"],
original_dimensions: vector.length,
reduced: shouldSummarize
? truncateVector(reducedVector)
: reducedVector,
});
}
const response: Record<string, unknown> = {
mode: "table",
table: parsed.table,
column: parsed.column,
originalDimensions: originalDim,
targetDimensions: targetDim,
processedCount: reducedRows.length,
rows: reducedRows,
method: "random_projection",
note: "For PCA or UMAP, use external libraries",
};
// Add summarize indicator when summarization was applied
if (shouldSummarize) {
response["summarized"] = true;
response["hint"] =
"Vectors summarized to preview format. Use summarize: false for full vectors.";
}
return response;
}
return {
error:
"Either vector (for direct mode) or table+column (for table mode) must be provided",
usage: {
directMode: "{ vector: [0.1, 0.2, ...], targetDimensions: 50 }",
tableMode:
'{ table: "embeddings", column: "vector", targetDimensions: 50, limit: 100 }',
},
};
},
};
}
export function createVectorEmbedTool(): ToolDefinition {
const EmbedSchema = z.object({
text: z.string().describe("Text to embed"),
dimensions: z
.number()
.optional()
.describe("Vector dimensions (default: 384)"),
summarize: z
.boolean()
.optional()
.describe("Truncate embedding for display (default: true)"),
});
return {
name: "pg_vector_embed",
description:
"Generate text embeddings. Returns a simple hash-based embedding for demos (use external APIs for production).",
group: "vector",
inputSchema: EmbedSchema,
outputSchema: VectorEmbedOutputSchema,
annotations: readOnly("Vector Embed"),
icons: getToolIcons("vector", readOnly("Vector Embed")),
// eslint-disable-next-line @typescript-eslint/require-await
handler: async (params: unknown, _context: RequestContext) => {
const parsed = EmbedSchema.parse(params ?? {});
// Validate non-empty text
if (parsed.text === undefined || parsed.text === "") {
return {
success: false,
error: "text parameter is required and must be non-empty",
suggestion: "Provide text content to generate an embedding",
};
}
const dims = parsed.dimensions ?? 384;
const shouldSummarize = parsed.summarize ?? true;
const vector: number[] = [];
for (let i = 0; i < dims; i++) {
let hash = 0;
for (let j = 0; j < parsed.text.length; j++) {
hash = ((hash << 5) - hash + parsed.text.charCodeAt(j) + i) | 0;
}
vector.push(Math.sin(hash) * 0.5);
}
const magnitude = Math.sqrt(vector.reduce((sum, x) => sum + x * x, 0));
const normalized = vector.map((x) => x / magnitude);
// Always return object format for output schema compliance
// When summarized: use truncateVector helper
// When not summarized: wrap full vector in object format with truncated: false
const embeddingOutput = shouldSummarize
? truncateVector(normalized)
: {
preview: normalized,
dimensions: dims,
truncated: false,
};
return {
embedding: embeddingOutput,
dimensions: dims,
textLength: parsed.text.length,
warning:
"This is a demo embedding using hash functions. For production, use OpenAI, Cohere, or other embedding APIs.",
};
},
};
}