Skip to main content
Glama
madhukarkumar

SingleStore MCP Server

generate_synthetic_data

Create and insert synthetic data into a specified table, supporting custom column generators and batch processing for efficient data population in SingleStore databases.

Instructions

Generate and insert synthetic data into an existing table

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
batch_sizeNoNumber of rows to insert in each batch
column_generatorsNoCustom generators for specific columns (optional)
countNoNumber of rows to generate and insert
tableYesName of the table to insert data into

Implementation Reference

  • Main handler for generate_synthetic_data tool. Queries table schema, generates synthetic values using column types or custom generators, inserts data in batches excluding auto-increment columns, returns insertion stats.
    case 'generate_synthetic_data': { if (!request.params.arguments || typeof request.params.arguments.table !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'Table parameter must be a string' ); } // First convert to unknown, then to our expected type const args = request.params.arguments as unknown as GenerateSyntheticDataArguments; const table = args.table; const count = Number(args.count || 100); const batchSize = Math.min(Number(args.batch_size || 1000), 5000); const columnGenerators = args.column_generators || {}; try { // Get table schema to understand column types const [columns] = await conn.query( 'DESCRIBE ??', [table] ) as [ColumnRowDataPacket[], mysql.FieldPacket[]]; if (columns.length === 0) { throw new McpError( ErrorCode.InvalidParams, `Table ${table} does not exist or has no columns` ); } // Identify auto-increment columns to exclude from insert const autoIncrementColumns = new Set( columns .filter(col => col.Extra?.includes('auto_increment')) .map(col => col.Field) ); // Filter out auto-increment columns const insertableColumns = columns.filter(col => !autoIncrementColumns.has(col.Field)); if (insertableColumns.length === 0) { throw new McpError( ErrorCode.InvalidParams, `Table ${table} has only auto-increment columns, cannot insert data` ); } // Generate data in batches let totalInserted = 0; const startTime = Date.now(); for (let batchStart = 0; batchStart < count; batchStart += batchSize) { const batchCount = Math.min(batchSize, count - batchStart); const rows = []; // Generate rows for this batch for (let i = 0; i < batchCount; i++) { const row: Record<string, any> = {}; for (const column of insertableColumns) { const columnName = column.Field; const columnType = column.Type.toLowerCase(); const isNullable = column.Null === 'YES'; // Check if we have a custom generator for this column if (columnGenerators[columnName]) { const generator = columnGenerators[columnName] as ColumnGenerator; switch (generator.type) { case 'sequence': row[columnName] = (generator.start || 0) + batchStart + i; break; case 'random': if (columnType.includes('int')) { const min = generator.start || 0; const max = generator.end || 1000000; row[columnName] = Math.floor(Math.random() * (max - min + 1)) + min; } else if (columnType.includes('float') || columnType.includes('double') || columnType.includes('decimal')) { const min = generator.start || 0; const max = generator.end || 1000; row[columnName] = Math.random() * (max - min) + min; } break; case 'values': if (Array.isArray(generator.values) && generator.values.length > 0) { const index = Math.floor(Math.random() * generator.values.length); row[columnName] = generator.values[index]; } break; case 'formula': // Formulas are applied during the INSERT statement row[columnName] = null; break; default: // Fall back to default generation row[columnName] = this.generateValueForColumn(columnType, isNullable); } } else { // Use default generation based on column type row[columnName] = this.generateValueForColumn(columnType, isNullable); } } rows.push(row); } // Prepare column names for the INSERT statement const columnNames = insertableColumns.map(col => col.Field); // Prepare placeholders for the VALUES clause const placeholders = rows.map(() => `(${columnNames.map(() => '?').join(', ')})` ).join(', '); // Flatten the values for the query const values = rows.flatMap(row => columnNames.map(col => row[col]) ); // Execute the INSERT statement const [result] = await conn.query( `INSERT INTO ${table} (${columnNames.join(', ')}) VALUES ${placeholders}`, values ) as [ResultSetHeader, mysql.FieldPacket[]]; totalInserted += result.affectedRows; } const duration = (Date.now() - startTime) / 1000; return { content: [ { type: 'text', text: JSON.stringify({ success: true, table, rows_inserted: totalInserted, duration_seconds: duration, rows_per_second: Math.round(totalInserted / duration) }, null, 2) } ] }; } catch (error: unknown) { const err = error as Error; throw new McpError( ErrorCode.InternalError, `Failed to generate synthetic data: ${err.message}` ); } }
  • TypeScript interface defining the input arguments for the generate_synthetic_data tool.
    interface GenerateSyntheticDataArguments { table: string; count?: number; batch_size?: number; column_generators?: Record<string, ColumnGenerator>; }
  • Input schema definition for the generate_synthetic_data tool in the ListToolsRequestSchema response.
    { name: 'generate_synthetic_data', description: 'Generate and insert synthetic data into an existing table', inputSchema: { type: 'object', properties: { table: { type: 'string', description: 'Name of the table to insert data into' }, count: { type: 'number', description: 'Number of rows to generate and insert', default: 100 }, batch_size: { type: 'number', description: 'Number of rows to insert in each batch', default: 1000 }, column_generators: { type: 'object', description: 'Custom generators for specific columns (optional)', additionalProperties: { type: 'object', properties: { type: { type: 'string', enum: ['sequence', 'random', 'values', 'formula'], description: 'Type of generator to use' }, start: { type: 'number', description: 'Starting value for sequence generator' }, end: { type: 'number', description: 'Ending value for random number generator' }, values: { type: 'array', items: { type: 'string' }, description: 'Array of values to choose from for values generator' }, formula: { type: 'string', description: 'SQL expression for formula generator' } } } } }, required: ['table'] } },
  • src/index.ts:1175-1379 (registration)
    Registration of the tool in the MCP server's ListToolsRequestSchema handler, including it in the available tools list.
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'generate_er_diagram', description: 'Generate a Mermaid ER diagram of the database schema', inputSchema: { type: 'object', properties: {}, required: [], }, }, { name: 'list_tables', description: 'List all tables in the database', inputSchema: { type: 'object', properties: {}, required: [], }, }, { name: 'query_table', description: 'Execute a query on a table', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'SQL query to execute', }, }, required: ['query'], }, }, { name: 'describe_table', description: 'Get detailed information about a table', inputSchema: { type: 'object', properties: { table: { type: 'string', description: 'Name of the table to describe', }, }, required: ['table'], }, }, { name: 'run_read_query', description: 'Execute a read-only (SELECT) query on the database', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'SQL SELECT query to execute', }, }, required: ['query'], }, }, { name: 'create_table', description: 'Create a new table in the database with specified columns and constraints', inputSchema: { type: 'object', properties: { table_name: { type: 'string', description: 'Name of the table to create' }, columns: { type: 'array', items: { type: 'object', properties: { name: { type: 'string', description: 'Column name' }, type: { type: 'string', description: 'Data type (e.g., INT, VARCHAR(255), etc.)' }, nullable: { type: 'boolean', description: 'Whether the column can be NULL' }, default: { type: 'string', description: 'Default value for the column' }, auto_increment: { type: 'boolean', description: 'Whether the column should auto increment' } }, required: ['name', 'type'] }, description: 'List of columns to create' }, table_options: { type: 'object', properties: { shard_key: { type: 'array', items: { type: 'string' }, description: 'Columns to use as shard key' }, sort_key: { type: 'array', items: { type: 'string' }, description: 'Columns to use as sort key' }, is_reference: { type: 'boolean', description: 'Whether this is a reference table' }, compression: { type: 'string', enum: ['SPARSE'], description: 'Table compression type' }, auto_increment_start: { type: 'number', description: 'Starting value for auto increment columns' } } } }, required: ['table_name', 'columns'] } }, { name: 'generate_synthetic_data', description: 'Generate and insert synthetic data into an existing table', inputSchema: { type: 'object', properties: { table: { type: 'string', description: 'Name of the table to insert data into' }, count: { type: 'number', description: 'Number of rows to generate and insert', default: 100 }, batch_size: { type: 'number', description: 'Number of rows to insert in each batch', default: 1000 }, column_generators: { type: 'object', description: 'Custom generators for specific columns (optional)', additionalProperties: { type: 'object', properties: { type: { type: 'string', enum: ['sequence', 'random', 'values', 'formula'], description: 'Type of generator to use' }, start: { type: 'number', description: 'Starting value for sequence generator' }, end: { type: 'number', description: 'Ending value for random number generator' }, values: { type: 'array', items: { type: 'string' }, description: 'Array of values to choose from for values generator' }, formula: { type: 'string', description: 'SQL expression for formula generator' } } } } }, required: ['table'] } }, { name: 'optimize_sql', description: 'Analyze a SQL query using PROFILE and provide optimization recommendations', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'SQL query to analyze and optimize' } }, required: ['query'] } } ], }));
  • Helper function to generate random values based on MySQL column types, used by the synthetic data generator.
    private generateValueForColumn(columnType: string, isNullable: boolean): any { // Handle NULL values for nullable columns (10% chance) if (isNullable && Math.random() < 0.1) { return null; } // Integer types if (columnType.includes('int')) { return Math.floor(Math.random() * 1000000); } // Decimal/numeric types if (columnType.includes('decimal') || columnType.includes('numeric') || columnType.includes('float') || columnType.includes('double')) { return parseFloat((Math.random() * 1000).toFixed(2)); } // Date and time types if (columnType.includes('date')) { const start = new Date(2020, 0, 1); const end = new Date(); return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime())) .toISOString().split('T')[0]; } if (columnType.includes('time')) { const hours = Math.floor(Math.random() * 24); const minutes = Math.floor(Math.random() * 60); const seconds = Math.floor(Math.random() * 60); return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`; } if (columnType.includes('datetime') || columnType.includes('timestamp')) { const start = new Date(2020, 0, 1); const end = new Date(); return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime())) .toISOString().slice(0, 19).replace('T', ' '); } // Boolean/bit types if (columnType.includes('bool') || columnType.includes('bit(1)')) { return Math.random() > 0.5 ? 1 : 0; } // Text types if (columnType.includes('char') || columnType.includes('text')) { // Extract the length for varchar/char if specified let length = 10; const matches = columnType.match(/\((\d+)\)/); if (matches && matches[1]) { length = Math.min(parseInt(matches[1], 10), 50); // Cap at 50 to avoid huge strings } return randomBytes(Math.ceil(length / 2)) .toString('hex') .slice(0, length); } // JSON type if (columnType.includes('json')) { return JSON.stringify({ id: Math.floor(Math.random() * 1000), name: `Item ${Math.floor(Math.random() * 100)}`, value: Math.random() * 100 }); } // Enum or set types if (columnType.includes('enum') || columnType.includes('set')) { // Extract values from enum/set definition: enum('value1','value2') const matches = columnType.match(/'([^']+)'/g); if (matches && matches.length > 0) { const values = matches.map(m => m.replace(/'/g, '')); return values[Math.floor(Math.random() * values.length)]; } } // Default fallback for unknown types return 'unknown_type_data'; }

Other Tools

Related Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/madhukarkumar/singlestore-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server