INTEGRATION.md•44.9 kB
# GEPA MCP Server Integration Guide
This guide provides comprehensive examples and patterns for integrating the GEPA MCP Server with various programming languages, frameworks, and development workflows.
## Table of Contents
- [Quick Start](#quick-start)
- [MCP Client Setup](#mcp-client-setup)
- [Integration Patterns](#integration-patterns)
- [Language Examples](#language-examples)
- [Framework Integration](#framework-integration)
- [CI/CD Integration](#cicd-integration)
- [Best Practices](#best-practices)
## Quick Start
### 1. Install and Start GEPA MCP Server
```bash
# Install dependencies
npm install
# Build the server
npm run build
# Start the server (stdio mode)
npm run mcp:start
```
### 2. Basic MCP Client Setup
```typescript
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
// Create MCP client
const client = new Client({
name: "gepa-integration-client",
version: "1.0.0"
});
// Connect via stdio
const transport = new StdioClientTransport({
command: "node",
args: ["dist/mcp/server.js"]
});
await client.connect(transport);
```
### 3. Your First Evolution
```typescript
// Start evolution process
const evolution = await client.request({
method: "tools/call",
params: {
name: "gepa_start_evolution",
arguments: {
taskDescription: "Generate clean, documented TypeScript functions",
seedPrompt: "Create a TypeScript function that:",
config: {
populationSize: 15,
generations: 8
}
}
}
});
console.log(evolution.content[0].text);
```
## MCP Client Setup
### Node.js/TypeScript Client
```typescript
// client.ts
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
export class GEPAClient {
private client: Client;
private transport: StdioClientTransport;
constructor() {
this.client = new Client({
name: "gepa-client",
version: "1.0.0"
});
this.transport = new StdioClientTransport({
command: "node",
args: ["dist/mcp/server.js"],
cwd: "/path/to/gepa-mcp-server"
});
}
async connect(): Promise<void> {
await this.client.connect(this.transport);
}
async callTool(name: string, args: any): Promise<any> {
const response = await this.client.request({
method: "tools/call",
params: {
name,
arguments: args
}
});
return response;
}
async disconnect(): Promise<void> {
await this.client.close();
}
}
// Usage
const gepa = new GEPAClient();
await gepa.connect();
try {
const result = await gepa.callTool('gepa_start_evolution', {
taskDescription: 'Generate API documentation'
});
console.log(result);
} finally {
await gepa.disconnect();
}
```
### Python Client
```python
# gepa_client.py
import asyncio
import json
import subprocess
from typing import Dict, Any, Optional
class GEPAClient:
def __init__(self, server_path: str):
self.server_path = server_path
self.process = None
async def connect(self):
"""Start the GEPA MCP server process"""
self.process = await asyncio.create_subprocess_exec(
'node', f'{self.server_path}/dist/mcp/server.js',
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Call a GEPA tool with the given arguments"""
if not self.process:
raise RuntimeError("Client not connected. Call connect() first.")
request = {
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {
"name": tool_name,
"arguments": arguments
}
}
# Send request
request_json = json.dumps(request) + '\n'
self.process.stdin.write(request_json.encode())
await self.process.stdin.drain()
# Read response
response_line = await self.process.stdout.readline()
response = json.loads(response_line.decode())
return response.get('result', response)
async def disconnect(self):
"""Close the connection to the server"""
if self.process:
self.process.terminate()
await self.process.wait()
# Usage example
async def main():
client = GEPAClient('/path/to/gepa-mcp-server')
await client.connect()
try:
# Start evolution
result = await client.call_tool('gepa_start_evolution', {
'taskDescription': 'Generate Python data classes',
'seedPrompt': 'Create a Python dataclass for:',
'config': {
'populationSize': 20,
'generations': 10
}
})
print(result['content'][0]['text'])
finally:
await client.disconnect()
if __name__ == "__main__":
asyncio.run(main())
```
### Java Client
```java
// GEPAClient.java
import java.io.*;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
public class GEPAClient implements AutoCloseable {
private Process serverProcess;
private BufferedWriter stdin;
private BufferedReader stdout;
private ObjectMapper objectMapper;
private int requestId = 0;
public GEPAClient(String serverPath) throws IOException {
this.objectMapper = new ObjectMapper();
// Start GEPA server process
ProcessBuilder pb = new ProcessBuilder(
"node", serverPath + "/dist/mcp/server.js"
);
this.serverProcess = pb.start();
this.stdin = new BufferedWriter(
new OutputStreamWriter(serverProcess.getOutputStream())
);
this.stdout = new BufferedReader(
new InputStreamReader(serverProcess.getInputStream())
);
}
public CompletableFuture<JsonNode> callTool(String toolName, Map<String, Object> arguments) {
return CompletableFuture.supplyAsync(() -> {
try {
// Create request
Map<String, Object> request = new HashMap<>();
request.put("jsonrpc", "2.0");
request.put("id", ++requestId);
request.put("method", "tools/call");
Map<String, Object> params = new HashMap<>();
params.put("name", toolName);
params.put("arguments", arguments);
request.put("params", params);
// Send request
String requestJson = objectMapper.writeValueAsString(request);
stdin.write(requestJson + "\n");
stdin.flush();
// Read response
String responseLine = stdout.readLine();
JsonNode response = objectMapper.readTree(responseLine);
return response.get("result");
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
@Override
public void close() throws Exception {
if (stdin != null) stdin.close();
if (stdout != null) stdout.close();
if (serverProcess != null) {
serverProcess.destroy();
serverProcess.waitFor();
}
}
// Usage example
public static void main(String[] args) throws Exception {
try (GEPAClient client = new GEPAClient("/path/to/gepa-mcp-server")) {
Map<String, Object> evolutionArgs = new HashMap<>();
evolutionArgs.put("taskDescription", "Generate Java POJOs");
evolutionArgs.put("seedPrompt", "Create a Java class that represents:");
Map<String, Object> config = new HashMap<>();
config.put("populationSize", 15);
config.put("generations", 8);
evolutionArgs.put("config", config);
JsonNode result = client.callTool("gepa_start_evolution", evolutionArgs).get();
System.out.println(result.get("content").get(0).get("text").asText());
}
}
}
```
## Integration Patterns
### 1. Complete Evolution Workflow
```typescript
class EvolutionWorkflow {
constructor(private client: GEPAClient) {}
async runCompleteEvolution(
taskDescription: string,
taskIds: string[],
evaluationCriteria: EvaluationCriteria
): Promise<OptimalCandidate> {
// 1. Start evolution
const evolution = await this.client.callTool('gepa_start_evolution', {
taskDescription,
config: {
populationSize: evaluationCriteria.populationSize || 20,
generations: evaluationCriteria.generations || 10
}
});
const evolutionId = this.extractEvolutionId(evolution);
// 2. Generate and evaluate candidates
const candidates = await this.generateCandidates(evolutionId);
const evaluatedCandidates = [];
for (const candidate of candidates) {
// Evaluate each candidate
const evaluation = await this.client.callTool('gepa_evaluate_prompt', {
promptId: candidate.id,
taskIds,
rolloutCount: evaluationCriteria.rolloutCount || 5,
parallel: true
});
// Record trajectories for each evaluation
const trajectories = await this.recordEvaluationTrajectories(
candidate.id,
taskIds,
evaluation
);
evaluatedCandidates.push({
candidate,
evaluation,
trajectories
});
}
// 3. Perform reflection analysis on failed trajectories
const failedTrajectories = evaluatedCandidates
.flatMap(ec => ec.trajectories)
.filter(t => !t.success);
if (failedTrajectories.length > 0) {
const reflection = await this.client.callTool('gepa_reflect', {
trajectoryIds: failedTrajectories.map(t => t.id),
targetPromptId: this.findBestCandidateId(evaluatedCandidates),
analysisDepth: 'deep'
});
// Apply reflection insights for next generation...
}
// 4. Get optimal candidate from Pareto frontier
const optimal = await this.client.callTool('gepa_select_optimal', {
taskContext: taskDescription,
performanceWeight: evaluationCriteria.performanceWeight || 0.7,
diversityWeight: evaluationCriteria.diversityWeight || 0.3
});
return this.parseOptimalCandidate(optimal);
}
private async recordEvaluationTrajectories(
promptId: string,
taskIds: string[],
evaluation: any
): Promise<Trajectory[]> {
const trajectories = [];
for (const taskId of taskIds) {
// Simulate trajectory recording (would be real execution data)
const trajectory = await this.client.callTool('gepa_record_trajectory', {
promptId,
taskId,
executionSteps: this.generateExecutionSteps(taskId),
result: {
success: Math.random() > 0.2,
score: Math.random() * 0.5 + 0.5,
output: { taskResult: `Result for ${taskId}` }
},
metadata: {
llmModel: 'claude-3-sonnet',
executionTime: Math.random() * 3000 + 1000,
tokenUsage: Math.random() * 1000 + 500
}
});
trajectories.push(trajectory);
}
return trajectories;
}
// Helper methods...
private extractEvolutionId(evolution: any): string {
// Parse evolution ID from response
return 'evolution_' + Date.now();
}
private async generateCandidates(evolutionId: string): Promise<Candidate[]> {
// Generate initial candidate population
return [];
}
private generateExecutionSteps(taskId: string): ExecutionStep[] {
return [
{
action: 'parse_task',
timestamp: new Date().toISOString(),
success: true,
reasoning: `Parsing task requirements for ${taskId}`
},
{
action: 'generate_solution',
timestamp: new Date().toISOString(),
success: true,
reasoning: 'Generating solution based on parsed requirements'
}
];
}
}
// Usage
const workflow = new EvolutionWorkflow(client);
const optimal = await workflow.runCompleteEvolution(
'Generate comprehensive unit tests for TypeScript functions',
['unit_test_basic', 'unit_test_edge_cases', 'unit_test_mocking'],
{
populationSize: 25,
generations: 15,
rolloutCount: 8,
performanceWeight: 0.8,
diversityWeight: 0.2
}
);
```
### 2. Automated Prompt Optimization Pipeline
```typescript
class PromptOptimizationPipeline {
constructor(
private client: GEPAClient,
private config: PipelineConfig
) {}
async optimizePrompt(
initialPrompt: string,
taskDescription: string,
testSuite: TestCase[]
): Promise<OptimizedPrompt> {
// 1. Baseline evaluation
const baseline = await this.evaluateBaseline(initialPrompt, testSuite);
// 2. Start evolution with baseline as seed
const evolution = await this.client.callTool('gepa_start_evolution', {
taskDescription,
seedPrompt: initialPrompt,
config: this.config.evolution
});
// 3. Iterative improvement cycle
let generation = 0;
let bestCandidate = null;
const improvementHistory = [];
while (generation < this.config.maxGenerations) {
// Generate candidate variations
const candidates = await this.generateCandidateGeneration(
generation,
bestCandidate || initialPrompt
);
// Evaluate candidates against test suite
const evaluationResults = await Promise.all(
candidates.map(candidate =>
this.evaluateCandidate(candidate, testSuite)
)
);
// Record trajectories for reflection
await this.recordGenerationTrajectories(candidates, evaluationResults);
// Analyze failures and get improvement suggestions
const reflection = await this.analyzeFailures(
evaluationResults.filter(r => !r.success),
bestCandidate?.id
);
// Update best candidate
const currentBest = this.selectBestCandidate(evaluationResults);
if (!bestCandidate || currentBest.score > bestCandidate.score) {
bestCandidate = currentBest;
}
improvementHistory.push({
generation,
bestScore: bestCandidate.score,
improvements: reflection.suggestions,
candidateCount: candidates.length
});
// Check convergence
if (this.hasConverged(improvementHistory)) {
console.log(`Converged after ${generation + 1} generations`);
break;
}
generation++;
}
// 4. Select final optimal candidate
const optimal = await this.client.callTool('gepa_select_optimal', {
taskContext: taskDescription,
performanceWeight: 0.85,
diversityWeight: 0.15
});
return {
originalPrompt: initialPrompt,
optimizedPrompt: optimal.candidate.content,
improvementScore: optimal.score - baseline.score,
generations: generation + 1,
history: improvementHistory,
testResults: await this.runFinalValidation(optimal.candidate, testSuite)
};
}
private async evaluateBaseline(
prompt: string,
testSuite: TestCase[]
): Promise<EvaluationResult> {
const results = await Promise.all(
testSuite.map(test => this.runTest(prompt, test))
);
return {
prompt,
score: results.reduce((sum, r) => sum + r.score, 0) / results.length,
success: results.every(r => r.success),
details: results
};
}
private async runTest(prompt: string, test: TestCase): Promise<TestResult> {
// Execute test case with prompt (implementation specific)
// This would integrate with your actual testing framework
const startTime = Date.now();
try {
const result = await this.executePromptWithTest(prompt, test);
const executionTime = Date.now() - startTime;
return {
testId: test.id,
success: result.success,
score: result.score,
executionTime,
output: result.output,
error: result.error
};
} catch (error) {
return {
testId: test.id,
success: false,
score: 0,
executionTime: Date.now() - startTime,
error: error.message
};
}
}
private hasConverged(history: ImprovementHistory[]): boolean {
if (history.length < 3) return false;
const recentScores = history.slice(-3).map(h => h.bestScore);
const improvement = recentScores[2] - recentScores[0];
return improvement < this.config.convergenceThreshold;
}
}
// Usage
const pipeline = new PromptOptimizationPipeline(client, {
evolution: {
populationSize: 20,
generations: 15,
mutationRate: 0.15
},
maxGenerations: 25,
convergenceThreshold: 0.01
});
const testSuite = [
{
id: 'basic_functionality',
input: 'Simple function requirements',
expectedOutput: 'Clean TypeScript function',
criteria: 'Correctness and syntax'
},
{
id: 'edge_cases',
input: 'Complex edge case scenarios',
expectedOutput: 'Robust error handling',
criteria: 'Error handling and validation'
}
];
const optimized = await pipeline.optimizePrompt(
'Write a TypeScript function',
'Generate production-ready TypeScript functions with proper error handling',
testSuite
);
console.log(`Improved by ${(optimized.improvementScore * 100).toFixed(1)}% over ${optimized.generations} generations`);
```
### 3. Continuous Learning System
```typescript
class ContinuousLearningSystem {
private performanceHistory: Map<string, PerformanceMetrics[]> = new Map();
private recentTrajectories: Trajectory[] = [];
constructor(
private client: GEPAClient,
private config: LearningConfig
) {
this.startPeriodicAnalysis();
}
async recordProductionUsage(
promptId: string,
taskType: string,
executionData: ExecutionData
): Promise<void> {
// Record trajectory for real-world usage
await this.client.callTool('gepa_record_trajectory', {
promptId,
taskId: `production_${taskType}_${Date.now()}`,
executionSteps: executionData.steps,
result: executionData.result,
metadata: {
environment: 'production',
taskType,
...executionData.metadata
}
});
// Update performance tracking
this.updatePerformanceHistory(promptId, taskType, executionData.result);
}
async triggerAdaptiveImprovement(taskType: string): Promise<void> {
const recentFailures = this.recentTrajectories
.filter(t => t.taskType === taskType && !t.success)
.slice(-10); // Last 10 failures
if (recentFailures.length >= this.config.failureThreshold) {
console.log(`Triggering adaptive improvement for ${taskType}`);
// Get current best prompt for this task type
const currentBest = await this.getCurrentBestPrompt(taskType);
// Analyze failure patterns
const reflection = await this.client.callTool('gepa_reflect', {
trajectoryIds: recentFailures.map(t => t.id),
targetPromptId: currentBest.id,
analysisDepth: 'deep',
focusAreas: ['error_recovery', 'edge_case_handling']
});
// Start targeted evolution based on analysis
const evolution = await this.client.callTool('gepa_start_evolution', {
taskDescription: `Improve ${taskType} handling based on production failures`,
seedPrompt: currentBest.content,
config: {
populationSize: 15,
generations: 8,
mutationRate: 0.2 // Higher mutation for exploration
}
});
// Automatically evaluate and deploy if significant improvement
await this.evaluateAndDeploy(evolution, taskType);
}
}
private startPeriodicAnalysis(): void {
setInterval(async () => {
await this.performPeriodicAnalysis();
}, this.config.analysisInterval);
}
private async performPeriodicAnalysis(): Promise<void> {
// Analyze performance trends across all task types
for (const [taskType, metrics] of this.performanceHistory) {
const recentMetrics = metrics.slice(-20); // Last 20 executions
const avgPerformance = recentMetrics.reduce((sum, m) => sum + m.score, 0) / recentMetrics.length;
if (avgPerformance < this.config.performanceThreshold) {
console.log(`Performance degradation detected for ${taskType}: ${avgPerformance.toFixed(3)}`);
await this.triggerAdaptiveImprovement(taskType);
}
}
// Create backup of current state
await this.client.callTool('gepa_create_backup', {
label: `periodic_backup_${new Date().toISOString().split('T')[0]}`,
includeTrajectories: true
});
}
private async evaluateAndDeploy(
evolution: any,
taskType: string
): Promise<void> {
// Get improved candidates from Pareto frontier
const candidates = await this.client.callTool('gepa_get_pareto_frontier', {
minPerformance: 0.8,
limit: 3
});
// Test candidates against recent production scenarios
const validationResults = await this.validateCandidates(
candidates.candidates,
taskType
);
// Deploy best candidate if improvement is significant
const bestValidation = validationResults
.sort((a, b) => b.score - a.score)[0];
if (bestValidation.score > this.getCurrentPerformance(taskType) + 0.1) {
await this.deployPrompt(bestValidation.candidate, taskType);
console.log(`Deployed improved prompt for ${taskType}: +${(bestValidation.score * 100).toFixed(1)}%`);
}
}
private async validateCandidates(
candidates: Candidate[],
taskType: string
): Promise<ValidationResult[]> {
// Run validation against production-like scenarios
const validationTasks = this.getValidationTasks(taskType);
return await Promise.all(
candidates.map(async candidate => {
const results = await Promise.all(
validationTasks.map(task =>
this.runValidationTask(candidate, task)
)
);
return {
candidate,
score: results.reduce((sum, r) => sum + r.score, 0) / results.length,
success: results.every(r => r.success),
details: results
};
})
);
}
}
// Usage
const learningSystem = new ContinuousLearningSystem(client, {
failureThreshold: 5,
performanceThreshold: 0.7,
analysisInterval: 3600000 // 1 hour
});
// In your production application
app.post('/api/generate', async (req, res) => {
const result = await executePrompt(req.body.prompt, req.body.input);
// Record usage for continuous learning
await learningSystem.recordProductionUsage(
req.body.promptId,
req.body.taskType,
{
steps: result.executionSteps,
result: result.outcome,
metadata: {
userId: req.user.id,
timestamp: new Date(),
requestId: req.id
}
}
);
res.json(result);
});
```
## Framework Integration
### Express.js API Server
```typescript
// server.ts
import express from 'express';
import { GEPAClient } from './gepa-client';
const app = express();
const gepaClient = new GEPAClient();
app.use(express.json());
// Initialize GEPA client
app.listen(3000, async () => {
await gepaClient.connect();
console.log('Server running with GEPA integration');
});
// Evolution endpoint
app.post('/api/evolution/start', async (req, res) => {
try {
const { taskDescription, seedPrompt, config } = req.body;
const result = await gepaClient.callTool('gepa_start_evolution', {
taskDescription,
seedPrompt,
config
});
res.json({
success: true,
evolutionId: extractEvolutionId(result),
message: result.content[0].text
});
} catch (error) {
res.status(500).json({
success: false,
error: error.message
});
}
});
// Evaluation endpoint
app.post('/api/evolution/evaluate', async (req, res) => {
try {
const { promptId, taskIds, rolloutCount } = req.body;
const result = await gepaClient.callTool('gepa_evaluate_prompt', {
promptId,
taskIds,
rolloutCount: rolloutCount || 5,
parallel: true
});
res.json({
success: true,
evaluation: parseEvaluationResult(result)
});
} catch (error) {
res.status(500).json({
success: false,
error: error.message
});
}
});
// Pareto frontier endpoint
app.get('/api/optimization/pareto-frontier', async (req, res) => {
try {
const { minPerformance, taskFilter, limit } = req.query;
const result = await gepaClient.callTool('gepa_get_pareto_frontier', {
minPerformance: minPerformance ? parseFloat(minPerformance) : undefined,
taskFilter: taskFilter ? taskFilter.split(',') : undefined,
limit: limit ? parseInt(limit) : 10
});
res.json({
success: true,
frontier: parseFrontierResult(result)
});
} catch (error) {
res.status(500).json({
success: false,
error: error.message
});
}
});
// Health check with recovery status
app.get('/api/health', async (req, res) => {
try {
const status = await gepaClient.callTool('gepa_recovery_status', {
includeMetrics: true
});
res.json({
success: true,
status: parseHealthStatus(status),
timestamp: new Date().toISOString()
});
} catch (error) {
res.status(503).json({
success: false,
error: 'GEPA server unavailable',
details: error.message
});
}
});
// Graceful shutdown
process.on('SIGTERM', async () => {
await gepaClient.disconnect();
process.exit(0);
});
```
### React Dashboard Component
```typescript
// EvolutionDashboard.tsx
import React, { useState, useEffect } from 'react';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
import { Button } from '@/components/ui/button';
import { Progress } from '@/components/ui/progress';
interface EvolutionDashboardProps {
apiBaseUrl: string;
}
export const EvolutionDashboard: React.FC<EvolutionDashboardProps> = ({
apiBaseUrl
}) => {
const [evolution, setEvolution] = useState(null);
const [frontier, setFrontier] = useState([]);
const [isLoading, setIsLoading] = useState(false);
const [healthStatus, setHealthStatus] = useState(null);
useEffect(() => {
fetchHealthStatus();
fetchParetoFrontier();
// Periodic updates
const interval = setInterval(() => {
fetchHealthStatus();
fetchParetoFrontier();
}, 30000);
return () => clearInterval(interval);
}, []);
const fetchHealthStatus = async () => {
try {
const response = await fetch(`${apiBaseUrl}/api/health`);
const data = await response.json();
setHealthStatus(data.status);
} catch (error) {
console.error('Failed to fetch health status:', error);
}
};
const fetchParetoFrontier = async () => {
try {
const response = await fetch(`${apiBaseUrl}/api/optimization/pareto-frontier`);
const data = await response.json();
setFrontier(data.frontier?.candidates || []);
} catch (error) {
console.error('Failed to fetch Pareto frontier:', error);
}
};
const startEvolution = async (taskDescription: string) => {
setIsLoading(true);
try {
const response = await fetch(`${apiBaseUrl}/api/evolution/start`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
taskDescription,
config: {
populationSize: 20,
generations: 10
}
})
});
const data = await response.json();
setEvolution(data);
} catch (error) {
console.error('Failed to start evolution:', error);
} finally {
setIsLoading(false);
}
};
return (
<div className="space-y-6">
{/* Health Status */}
<Card>
<CardHeader>
<CardTitle>System Health</CardTitle>
</CardHeader>
<CardContent>
{healthStatus && (
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<div className="text-center">
<div className={`text-2xl font-bold ${
healthStatus.overall === 'HEALTHY' ? 'text-green-600' : 'text-red-600'
}`}>
{healthStatus.overall}
</div>
<div className="text-sm text-gray-600">Overall Status</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold">{healthStatus.metrics?.backupsAvailable || 0}</div>
<div className="text-sm text-gray-600">Backups Available</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold">
{(healthStatus.recoveryHistory?.successRate * 100 || 0).toFixed(1)}%
</div>
<div className="text-sm text-gray-600">Success Rate</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold">{frontier.length}</div>
<div className="text-sm text-gray-600">Optimal Candidates</div>
</div>
</div>
)}
</CardContent>
</Card>
{/* Evolution Control */}
<Card>
<CardHeader>
<CardTitle>Start New Evolution</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div>
<label className="block text-sm font-medium mb-2">
Task Description
</label>
<textarea
className="w-full px-3 py-2 border border-gray-300 rounded-md"
rows={3}
placeholder="Describe the task you want to optimize prompts for..."
id="taskDescription"
/>
</div>
<Button
onClick={() => {
const desc = (document.getElementById('taskDescription') as HTMLTextAreaElement).value;
if (desc.trim()) startEvolution(desc);
}}
disabled={isLoading}
>
{isLoading ? 'Starting Evolution...' : 'Start Evolution'}
</Button>
{evolution && (
<div className="mt-4 p-4 bg-green-50 border border-green-200 rounded-md">
<div className="font-medium">Evolution Started!</div>
<div className="text-sm text-gray-600 mt-1">
Evolution ID: {evolution.evolutionId}
</div>
</div>
)}
</CardContent>
</Card>
{/* Pareto Frontier */}
<Card>
<CardHeader>
<CardTitle>Top Performing Candidates</CardTitle>
</CardHeader>
<CardContent>
{frontier.length > 0 ? (
<div className="space-y-3">
{frontier.slice(0, 5).map((candidate, index) => (
<div key={candidate.id} className="flex items-center space-x-4 p-3 border rounded-lg">
<div className="text-lg font-bold text-gray-600">#{index + 1}</div>
<div className="flex-1">
<div className="font-medium">{candidate.id}</div>
<div className="text-sm text-gray-600">
Generation {candidate.generation} • Score: {candidate.score?.toFixed(3)}
</div>
</div>
<Progress value={candidate.score * 100} className="w-24" />
</div>
))}
</div>
) : (
<div className="text-center text-gray-500 py-8">
No candidates available. Start an evolution to see results.
</div>
)}
</CardContent>
</Card>
</div>
);
};
```
## CI/CD Integration
### GitHub Actions Workflow
```yaml
# .github/workflows/gepa-optimization.yml
name: GEPA Prompt Optimization
on:
push:
branches: [main]
paths: ['prompts/**']
schedule:
- cron: '0 2 * * *' # Daily at 2 AM
workflow_dispatch:
inputs:
task_description:
description: 'Task description for optimization'
required: true
default: 'Optimize prompts for code generation'
jobs:
optimize-prompts:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: '18'
cache: 'npm'
- name: Install dependencies
run: |
npm install
cd gepa-mcp-server && npm install && npm run build
- name: Start GEPA server
run: |
cd gepa-mcp-server
npm run mcp:start &
sleep 10 # Wait for server to start
- name: Run prompt optimization
run: |
node scripts/optimize-prompts.js
env:
TASK_DESCRIPTION: ${{ github.event.inputs.task_description || 'Daily prompt optimization' }}
MAX_GENERATIONS: 15
POPULATION_SIZE: 25
- name: Evaluate optimized prompts
run: |
node scripts/evaluate-prompts.js
- name: Create backup
run: |
node scripts/create-backup.js
- name: Generate optimization report
run: |
node scripts/generate-report.js > optimization-report.md
- name: Create Pull Request
if: success()
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: 'feat: optimize prompts via GEPA evolution'
title: 'Automated Prompt Optimization'
body-path: optimization-report.md
branch: gepa-optimization-${{ github.run_number }}
- name: Upload optimization artifacts
uses: actions/upload-artifact@v3
with:
name: optimization-results
path: |
optimization-report.md
optimized-prompts/
evolution-history.json
```
### Optimization Script
```javascript
// scripts/optimize-prompts.js
const { GEPAClient } = require('../lib/gepa-client');
const fs = require('fs').promises;
const path = require('path');
async function main() {
const client = new GEPAClient();
await client.connect();
try {
const taskDescription = process.env.TASK_DESCRIPTION;
const maxGenerations = parseInt(process.env.MAX_GENERATIONS || '10');
const populationSize = parseInt(process.env.POPULATION_SIZE || '20');
console.log(`Starting optimization: ${taskDescription}`);
// Read current prompts
const promptsDir = path.join(process.cwd(), 'prompts');
const promptFiles = await fs.readdir(promptsDir);
const optimizationResults = [];
for (const promptFile of promptFiles.filter(f => f.endsWith('.txt'))) {
const promptPath = path.join(promptsDir, promptFile);
const currentPrompt = await fs.readFile(promptPath, 'utf-8');
console.log(`Optimizing ${promptFile}...`);
// Start evolution
const evolution = await client.callTool('gepa_start_evolution', {
taskDescription: `${taskDescription} (${promptFile})`,
seedPrompt: currentPrompt,
config: {
populationSize,
generations: maxGenerations,
mutationRate: 0.15
}
});
// Simulate evaluation and selection process
// (In real implementation, you'd run your actual test suite)
// Get optimal result
const optimal = await client.callTool('gepa_select_optimal', {
taskContext: taskDescription,
performanceWeight: 0.8,
diversityWeight: 0.2
});
// Save optimized prompt
const optimizedPath = path.join(process.cwd(), 'optimized-prompts', promptFile);
await fs.mkdir(path.dirname(optimizedPath), { recursive: true });
await fs.writeFile(optimizedPath, optimal.candidate.content);
optimizationResults.push({
file: promptFile,
originalLength: currentPrompt.length,
optimizedLength: optimal.candidate.content.length,
improvementScore: optimal.score,
generation: optimal.candidate.generation
});
console.log(`✅ Optimized ${promptFile}: ${optimal.score.toFixed(3)} score`);
}
// Save optimization history
await fs.writeFile(
'evolution-history.json',
JSON.stringify(optimizationResults, null, 2)
);
console.log('Optimization completed successfully!');
} catch (error) {
console.error('Optimization failed:', error);
process.exit(1);
} finally {
await client.disconnect();
}
}
main();
```
## Best Practices
### 1. Error Handling and Resilience
```typescript
class ResilientGEPAClient {
private client: GEPAClient;
private retryConfig: RetryConfig;
private circuitBreaker: CircuitBreaker;
constructor(config: ResilientClientConfig) {
this.client = new GEPAClient();
this.retryConfig = config.retry;
this.circuitBreaker = new CircuitBreaker(config.circuitBreaker);
}
async callToolWithResilience(
toolName: string,
args: any,
options: ResilienceOptions = {}
): Promise<any> {
return this.circuitBreaker.execute(async () => {
return this.retryWithBackoff(async () => {
try {
const result = await this.client.callTool(toolName, args);
if (result.isError) {
throw new GEPAError(result.content[0].text);
}
return result;
} catch (error) {
// Log error with context
console.error(`GEPA tool call failed: ${toolName}`, {
args,
error: error.message,
timestamp: new Date().toISOString()
});
// Check if error is retryable
if (this.isRetryableError(error)) {
throw error;
} else {
throw new NonRetryableError(error.message);
}
}
}, options.retryAttempts || this.retryConfig.maxAttempts);
});
}
private async retryWithBackoff<T>(
operation: () => Promise<T>,
maxAttempts: number
): Promise<T> {
let lastError: Error;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await operation();
} catch (error) {
lastError = error;
if (attempt === maxAttempts) {
throw error;
}
const delay = this.calculateBackoffDelay(attempt);
console.warn(`Attempt ${attempt} failed, retrying in ${delay}ms...`);
await this.sleep(delay);
}
}
throw lastError;
}
private calculateBackoffDelay(attempt: number): number {
return Math.min(
this.retryConfig.baseDelay * Math.pow(2, attempt - 1),
this.retryConfig.maxDelay
);
}
private isRetryableError(error: Error): boolean {
return error.message.includes('timeout') ||
error.message.includes('connection') ||
error.message.includes('unavailable');
}
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
```
### 2. Performance Monitoring
```typescript
class PerformanceMonitor {
private metrics: Map<string, PerformanceMetric[]> = new Map();
async monitorToolCall<T>(
toolName: string,
operation: () => Promise<T>
): Promise<T> {
const startTime = performance.now();
const startMemory = process.memoryUsage();
try {
const result = await operation();
this.recordSuccess(toolName, startTime, startMemory);
return result;
} catch (error) {
this.recordFailure(toolName, startTime, startMemory, error);
throw error;
}
}
private recordSuccess(
toolName: string,
startTime: number,
startMemory: NodeJS.MemoryUsage
): void {
const endTime = performance.now();
const endMemory = process.memoryUsage();
const metric: PerformanceMetric = {
toolName,
duration: endTime - startTime,
memoryDelta: endMemory.heapUsed - startMemory.heapUsed,
success: true,
timestamp: new Date()
};
this.addMetric(toolName, metric);
}
getPerformanceReport(): PerformanceReport {
const report: PerformanceReport = {
tools: {},
overall: {
totalCalls: 0,
successRate: 0,
averageLatency: 0,
memoryEfficiency: 0
}
};
for (const [toolName, metrics] of this.metrics) {
const successfulCalls = metrics.filter(m => m.success);
report.tools[toolName] = {
totalCalls: metrics.length,
successfulCalls: successfulCalls.length,
successRate: successfulCalls.length / metrics.length,
averageLatency: successfulCalls.reduce((sum, m) => sum + m.duration, 0) / successfulCalls.length,
p95Latency: this.calculatePercentile(successfulCalls.map(m => m.duration), 0.95),
averageMemoryUsage: successfulCalls.reduce((sum, m) => sum + Math.abs(m.memoryDelta), 0) / successfulCalls.length
};
}
return report;
}
}
// Usage with monitoring
const monitor = new PerformanceMonitor();
const client = new ResilientGEPAClient(config);
const result = await monitor.monitorToolCall('gepa_start_evolution', () =>
client.callToolWithResilience('gepa_start_evolution', {
taskDescription: 'Generate high-quality documentation'
})
);
```
### 3. Configuration Management
```typescript
// config/gepa-config.ts
export interface GEPAConfiguration {
client: {
serverPath: string;
timeout: number;
maxConcurrentConnections: number;
};
evolution: {
defaultPopulationSize: number;
defaultGenerations: number;
defaultMutationRate: number;
maxCandidatesPerGeneration: number;
};
evaluation: {
defaultRolloutCount: number;
maxParallelEvaluations: number;
evaluationTimeout: number;
};
optimization: {
defaultPerformanceWeight: number;
defaultDiversityWeight: number;
convergenceThreshold: number;
};
resilience: {
maxRetries: number;
baseDelay: number;
maxDelay: number;
circuitBreakerThreshold: number;
};
monitoring: {
enableMetrics: boolean;
metricsRetentionDays: number;
alertThresholds: {
successRate: number;
latency: number;
memoryUsage: number;
};
};
}
export const createDefaultConfig = (): GEPAConfiguration => ({
client: {
serverPath: process.env.GEPA_SERVER_PATH || './gepa-mcp-server',
timeout: parseInt(process.env.GEPA_TIMEOUT || '30000'),
maxConcurrentConnections: parseInt(process.env.GEPA_MAX_CONNECTIONS || '5')
},
evolution: {
defaultPopulationSize: 20,
defaultGenerations: 10,
defaultMutationRate: 0.15,
maxCandidatesPerGeneration: 50
},
evaluation: {
defaultRolloutCount: 5,
maxParallelEvaluations: 10,
evaluationTimeout: 60000
},
optimization: {
defaultPerformanceWeight: 0.7,
defaultDiversityWeight: 0.3,
convergenceThreshold: 0.01
},
resilience: {
maxRetries: 3,
baseDelay: 1000,
maxDelay: 10000,
circuitBreakerThreshold: 5
},
monitoring: {
enableMetrics: true,
metricsRetentionDays: 30,
alertThresholds: {
successRate: 0.95,
latency: 5000,
memoryUsage: 512 * 1024 * 1024 // 512MB
}
}
});
// Environment-specific configurations
export const getConfig = (environment: string = 'development'): GEPAConfiguration => {
const baseConfig = createDefaultConfig();
switch (environment) {
case 'production':
return {
...baseConfig,
evolution: {
...baseConfig.evolution,
defaultPopulationSize: 30,
defaultGenerations: 15
},
resilience: {
...baseConfig.resilience,
maxRetries: 5
}
};
case 'testing':
return {
...baseConfig,
evolution: {
...baseConfig.evolution,
defaultPopulationSize: 5,
defaultGenerations: 3
},
evaluation: {
...baseConfig.evaluation,
defaultRolloutCount: 2
}
};
default:
return baseConfig;
}
};
```
This comprehensive integration guide provides the foundation for successfully implementing GEPA MCP Server in various environments and use cases. The examples demonstrate production-ready patterns for error handling, monitoring, and configuration management that ensure reliable operation at scale.