Skip to main content
Glama

SingleStore MCP Server

generate_synthetic_data

Create and insert synthetic data into a specified table, supporting custom column generators and batch processing for efficient data population in SingleStore databases.

Instructions

Generate and insert synthetic data into an existing table

Input Schema

NameRequiredDescriptionDefault
batch_sizeNoNumber of rows to insert in each batch
column_generatorsNoCustom generators for specific columns (optional)
countNoNumber of rows to generate and insert
tableYesName of the table to insert data into

Input Schema (JSON Schema)

{ "properties": { "batch_size": { "default": 1000, "description": "Number of rows to insert in each batch", "type": "number" }, "column_generators": { "additionalProperties": { "properties": { "end": { "description": "Ending value for random number generator", "type": "number" }, "formula": { "description": "SQL expression for formula generator", "type": "string" }, "start": { "description": "Starting value for sequence generator", "type": "number" }, "type": { "description": "Type of generator to use", "enum": [ "sequence", "random", "values", "formula" ], "type": "string" }, "values": { "description": "Array of values to choose from for values generator", "items": { "type": "string" }, "type": "array" } }, "type": "object" }, "description": "Custom generators for specific columns (optional)", "type": "object" }, "count": { "default": 100, "description": "Number of rows to generate and insert", "type": "number" }, "table": { "description": "Name of the table to insert data into", "type": "string" } }, "required": [ "table" ], "type": "object" }

Implementation Reference

  • Main handler for generate_synthetic_data tool. Queries table schema, generates synthetic values using column types or custom generators, inserts data in batches excluding auto-increment columns, returns insertion stats.
    case 'generate_synthetic_data': { if (!request.params.arguments || typeof request.params.arguments.table !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'Table parameter must be a string' ); } // First convert to unknown, then to our expected type const args = request.params.arguments as unknown as GenerateSyntheticDataArguments; const table = args.table; const count = Number(args.count || 100); const batchSize = Math.min(Number(args.batch_size || 1000), 5000); const columnGenerators = args.column_generators || {}; try { // Get table schema to understand column types const [columns] = await conn.query( 'DESCRIBE ??', [table] ) as [ColumnRowDataPacket[], mysql.FieldPacket[]]; if (columns.length === 0) { throw new McpError( ErrorCode.InvalidParams, `Table ${table} does not exist or has no columns` ); } // Identify auto-increment columns to exclude from insert const autoIncrementColumns = new Set( columns .filter(col => col.Extra?.includes('auto_increment')) .map(col => col.Field) ); // Filter out auto-increment columns const insertableColumns = columns.filter(col => !autoIncrementColumns.has(col.Field)); if (insertableColumns.length === 0) { throw new McpError( ErrorCode.InvalidParams, `Table ${table} has only auto-increment columns, cannot insert data` ); } // Generate data in batches let totalInserted = 0; const startTime = Date.now(); for (let batchStart = 0; batchStart < count; batchStart += batchSize) { const batchCount = Math.min(batchSize, count - batchStart); const rows = []; // Generate rows for this batch for (let i = 0; i < batchCount; i++) { const row: Record<string, any> = {}; for (const column of insertableColumns) { const columnName = column.Field; const columnType = column.Type.toLowerCase(); const isNullable = column.Null === 'YES'; // Check if we have a custom generator for this column if (columnGenerators[columnName]) { const generator = columnGenerators[columnName] as ColumnGenerator; switch (generator.type) { case 'sequence': row[columnName] = (generator.start || 0) + batchStart + i; break; case 'random': if (columnType.includes('int')) { const min = generator.start || 0; const max = generator.end || 1000000; row[columnName] = Math.floor(Math.random() * (max - min + 1)) + min; } else if (columnType.includes('float') || columnType.includes('double') || columnType.includes('decimal')) { const min = generator.start || 0; const max = generator.end || 1000; row[columnName] = Math.random() * (max - min) + min; } break; case 'values': if (Array.isArray(generator.values) && generator.values.length > 0) { const index = Math.floor(Math.random() * generator.values.length); row[columnName] = generator.values[index]; } break; case 'formula': // Formulas are applied during the INSERT statement row[columnName] = null; break; default: // Fall back to default generation row[columnName] = this.generateValueForColumn(columnType, isNullable); } } else { // Use default generation based on column type row[columnName] = this.generateValueForColumn(columnType, isNullable); } } rows.push(row); } // Prepare column names for the INSERT statement const columnNames = insertableColumns.map(col => col.Field); // Prepare placeholders for the VALUES clause const placeholders = rows.map(() => `(${columnNames.map(() => '?').join(', ')})` ).join(', '); // Flatten the values for the query const values = rows.flatMap(row => columnNames.map(col => row[col]) ); // Execute the INSERT statement const [result] = await conn.query( `INSERT INTO ${table} (${columnNames.join(', ')}) VALUES ${placeholders}`, values ) as [ResultSetHeader, mysql.FieldPacket[]]; totalInserted += result.affectedRows; } const duration = (Date.now() - startTime) / 1000; return { content: [ { type: 'text', text: JSON.stringify({ success: true, table, rows_inserted: totalInserted, duration_seconds: duration, rows_per_second: Math.round(totalInserted / duration) }, null, 2) } ] }; } catch (error: unknown) { const err = error as Error; throw new McpError( ErrorCode.InternalError, `Failed to generate synthetic data: ${err.message}` ); } }
  • TypeScript interface defining the input arguments for the generate_synthetic_data tool.
    interface GenerateSyntheticDataArguments { table: string; count?: number; batch_size?: number; column_generators?: Record<string, ColumnGenerator>; }
  • Input schema definition for the generate_synthetic_data tool in the ListToolsRequestSchema response.
    { name: 'generate_synthetic_data', description: 'Generate and insert synthetic data into an existing table', inputSchema: { type: 'object', properties: { table: { type: 'string', description: 'Name of the table to insert data into' }, count: { type: 'number', description: 'Number of rows to generate and insert', default: 100 }, batch_size: { type: 'number', description: 'Number of rows to insert in each batch', default: 1000 }, column_generators: { type: 'object', description: 'Custom generators for specific columns (optional)', additionalProperties: { type: 'object', properties: { type: { type: 'string', enum: ['sequence', 'random', 'values', 'formula'], description: 'Type of generator to use' }, start: { type: 'number', description: 'Starting value for sequence generator' }, end: { type: 'number', description: 'Ending value for random number generator' }, values: { type: 'array', items: { type: 'string' }, description: 'Array of values to choose from for values generator' }, formula: { type: 'string', description: 'SQL expression for formula generator' } } } } }, required: ['table'] } },
  • src/index.ts:1175-1379 (registration)
    Registration of the tool in the MCP server's ListToolsRequestSchema handler, including it in the available tools list.
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'generate_er_diagram', description: 'Generate a Mermaid ER diagram of the database schema', inputSchema: { type: 'object', properties: {}, required: [], }, }, { name: 'list_tables', description: 'List all tables in the database', inputSchema: { type: 'object', properties: {}, required: [], }, }, { name: 'query_table', description: 'Execute a query on a table', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'SQL query to execute', }, }, required: ['query'], }, }, { name: 'describe_table', description: 'Get detailed information about a table', inputSchema: { type: 'object', properties: { table: { type: 'string', description: 'Name of the table to describe', }, }, required: ['table'], }, }, { name: 'run_read_query', description: 'Execute a read-only (SELECT) query on the database', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'SQL SELECT query to execute', }, }, required: ['query'], }, }, { name: 'create_table', description: 'Create a new table in the database with specified columns and constraints', inputSchema: { type: 'object', properties: { table_name: { type: 'string', description: 'Name of the table to create' }, columns: { type: 'array', items: { type: 'object', properties: { name: { type: 'string', description: 'Column name' }, type: { type: 'string', description: 'Data type (e.g., INT, VARCHAR(255), etc.)' }, nullable: { type: 'boolean', description: 'Whether the column can be NULL' }, default: { type: 'string', description: 'Default value for the column' }, auto_increment: { type: 'boolean', description: 'Whether the column should auto increment' } }, required: ['name', 'type'] }, description: 'List of columns to create' }, table_options: { type: 'object', properties: { shard_key: { type: 'array', items: { type: 'string' }, description: 'Columns to use as shard key' }, sort_key: { type: 'array', items: { type: 'string' }, description: 'Columns to use as sort key' }, is_reference: { type: 'boolean', description: 'Whether this is a reference table' }, compression: { type: 'string', enum: ['SPARSE'], description: 'Table compression type' }, auto_increment_start: { type: 'number', description: 'Starting value for auto increment columns' } } } }, required: ['table_name', 'columns'] } }, { name: 'generate_synthetic_data', description: 'Generate and insert synthetic data into an existing table', inputSchema: { type: 'object', properties: { table: { type: 'string', description: 'Name of the table to insert data into' }, count: { type: 'number', description: 'Number of rows to generate and insert', default: 100 }, batch_size: { type: 'number', description: 'Number of rows to insert in each batch', default: 1000 }, column_generators: { type: 'object', description: 'Custom generators for specific columns (optional)', additionalProperties: { type: 'object', properties: { type: { type: 'string', enum: ['sequence', 'random', 'values', 'formula'], description: 'Type of generator to use' }, start: { type: 'number', description: 'Starting value for sequence generator' }, end: { type: 'number', description: 'Ending value for random number generator' }, values: { type: 'array', items: { type: 'string' }, description: 'Array of values to choose from for values generator' }, formula: { type: 'string', description: 'SQL expression for formula generator' } } } } }, required: ['table'] } }, { name: 'optimize_sql', description: 'Analyze a SQL query using PROFILE and provide optimization recommendations', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'SQL query to analyze and optimize' } }, required: ['query'] } } ], }));
  • Helper function to generate random values based on MySQL column types, used by the synthetic data generator.
    private generateValueForColumn(columnType: string, isNullable: boolean): any { // Handle NULL values for nullable columns (10% chance) if (isNullable && Math.random() < 0.1) { return null; } // Integer types if (columnType.includes('int')) { return Math.floor(Math.random() * 1000000); } // Decimal/numeric types if (columnType.includes('decimal') || columnType.includes('numeric') || columnType.includes('float') || columnType.includes('double')) { return parseFloat((Math.random() * 1000).toFixed(2)); } // Date and time types if (columnType.includes('date')) { const start = new Date(2020, 0, 1); const end = new Date(); return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime())) .toISOString().split('T')[0]; } if (columnType.includes('time')) { const hours = Math.floor(Math.random() * 24); const minutes = Math.floor(Math.random() * 60); const seconds = Math.floor(Math.random() * 60); return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`; } if (columnType.includes('datetime') || columnType.includes('timestamp')) { const start = new Date(2020, 0, 1); const end = new Date(); return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime())) .toISOString().slice(0, 19).replace('T', ' '); } // Boolean/bit types if (columnType.includes('bool') || columnType.includes('bit(1)')) { return Math.random() > 0.5 ? 1 : 0; } // Text types if (columnType.includes('char') || columnType.includes('text')) { // Extract the length for varchar/char if specified let length = 10; const matches = columnType.match(/\((\d+)\)/); if (matches && matches[1]) { length = Math.min(parseInt(matches[1], 10), 50); // Cap at 50 to avoid huge strings } return randomBytes(Math.ceil(length / 2)) .toString('hex') .slice(0, length); } // JSON type if (columnType.includes('json')) { return JSON.stringify({ id: Math.floor(Math.random() * 1000), name: `Item ${Math.floor(Math.random() * 100)}`, value: Math.random() * 100 }); } // Enum or set types if (columnType.includes('enum') || columnType.includes('set')) { // Extract values from enum/set definition: enum('value1','value2') const matches = columnType.match(/'([^']+)'/g); if (matches && matches.length > 0) { const values = matches.map(m => m.replace(/'/g, '')); return values[Math.floor(Math.random() * values.length)]; } } // Default fallback for unknown types return 'unknown_type_data'; }

Other Tools

Related Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/madhukarkumar/singlestore-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server