import { z } from "zod";
import { SequenceGenerator, EvolutionParameters } from "../utils/sequenceUtils.js";
export const evolveSequence = {
definition: {
name: "evolve_sequence",
description: "Simulate evolution of sequences over multiple generations",
inputSchema: {
type: "object",
properties: {
sequence: {
type: "string",
description: "Starting sequence"
},
generations: {
type: "number",
description: "Number of generations to simulate",
minimum: 1
},
populationSize: {
type: "number",
description: "Population size for each generation",
minimum: 2
},
mutationRate: {
type: "number",
description: "Mutation rate per generation (0-1)",
minimum: 0,
maximum: 1
},
selectionPressure: {
type: "number",
description: "Selection pressure (0-1), 0 = no selection, 1 = strong selection",
minimum: 0,
maximum: 1
},
fitnessFunction: {
type: "string",
description: "Fitness function: 'gc-content', 'length', 'hydrophobic', or 'custom'",
enum: ["gc-content", "length", "hydrophobic", "custom"]
},
targetValue: {
type: "number",
description: "Target value for fitness function (e.g., target GC content)"
},
seed: {
type: "number",
description: "Random seed for reproducible results (optional)"
},
trackLineages: {
type: "boolean",
description: "Track individual lineages through generations",
default: false
},
outputFormat: {
type: "string",
description: "Output format: 'summary', 'detailed', or 'fasta'",
enum: ["summary", "detailed", "fasta"]
}
},
required: ["sequence", "generations", "populationSize", "mutationRate"]
},
},
async handler({
sequence,
generations,
populationSize,
mutationRate,
selectionPressure = 0,
fitnessFunction = "gc-content",
targetValue = 0.5,
seed,
trackLineages = false,
outputFormat = "summary"
}: {
sequence: string;
generations: number;
populationSize: number;
mutationRate: number;
selectionPressure?: number;
fitnessFunction?: string;
targetValue?: number;
seed?: number;
trackLineages?: boolean;
outputFormat?: string;
}) {
const generator = new SequenceGenerator(seed);
const evolutionParams: EvolutionParameters = {
generations,
populationSize,
mutationRate,
selectionPressure
};
const isDNA = /^[ATGC]+$/i.test(sequence.replace(/\s/g, ''));
let population = Array(populationSize).fill(null).map(() => ({
sequence: sequence.toUpperCase(),
fitness: 0,
generation: 0,
lineageId: Math.random().toString(36).substr(2, 9)
}));
const evolutionHistory = [];
const lineageData: Array<{
generation: number;
lineageId: string;
sequence: string;
fitness: number;
}> | undefined = trackLineages ? [] : undefined;
for (let gen = 0; gen <= generations; gen++) {
population.forEach(individual => {
individual.fitness = calculateFitness(individual.sequence, fitnessFunction, targetValue, isDNA);
});
population.sort((a, b) => b.fitness - a.fitness);
const genStats = {
generation: gen,
populationSize: population.length,
bestFitness: population[0].fitness,
worstFitness: population[population.length - 1].fitness,
averageFitness: population.reduce((sum, ind) => sum + ind.fitness, 0) / population.length,
bestSequence: population[0].sequence,
diversity: calculateDiversity(population.map(ind => ind.sequence))
};
evolutionHistory.push(genStats);
if (trackLineages && lineageData) {
population.forEach(individual => {
lineageData.push({
generation: gen,
lineageId: individual.lineageId,
sequence: individual.sequence,
fitness: individual.fitness
});
});
}
if (gen < generations) {
const newPopulation = [];
if (selectionPressure > 0) {
const selectionThreshold = Math.floor(populationSize * (1 - selectionPressure));
const survivors = population.slice(0, Math.max(2, selectionThreshold));
while (newPopulation.length < populationSize) {
const parent = survivors[Math.floor(Math.random() * survivors.length)];
let newSequence = parent.sequence;
if (isDNA) {
newSequence = generator.mutateDNA(newSequence, { substitutionRate: mutationRate });
} else {
newSequence = mutateProtein(newSequence, mutationRate, generator);
}
newPopulation.push({
sequence: newSequence,
fitness: 0,
generation: gen + 1,
lineageId: parent.lineageId + '_' + newPopulation.length
});
}
} else {
population.forEach(individual => {
let newSequence = individual.sequence;
if (isDNA) {
newSequence = generator.mutateDNA(newSequence, { substitutionRate: mutationRate });
} else {
newSequence = mutateProtein(newSequence, mutationRate, generator);
}
newPopulation.push({
sequence: newSequence,
fitness: 0,
generation: gen + 1,
lineageId: individual.lineageId
});
});
}
population = newPopulation;
}
}
let output = '';
if (outputFormat === 'fasta') {
const finalPop = population.slice(0, Math.min(10, population.length));
output = finalPop.map((ind, i) =>
`>evolved_seq_${i + 1}_gen_${generations} fitness=${ind.fitness.toFixed(4)}\n${ind.sequence}`
).join('\n\n');
}
const summary = {
initialSequence: sequence,
finalBestSequence: population[0].sequence,
totalGenerations: generations,
populationSize,
mutationRate,
selectionPressure,
fitnessFunction,
targetValue,
finalBestFitness: population[0].fitness,
improvementRatio: population[0].fitness / evolutionHistory[0].bestFitness,
seed: seed || "random"
};
const result: any = {
summary,
evolutionHistory: outputFormat === 'detailed' ? evolutionHistory : evolutionHistory.filter((_, i) => i % Math.max(1, Math.floor(generations / 20)) === 0 || i === generations)
};
if (outputFormat === 'fasta') {
result.fastaOutput = output;
}
if (trackLineages && lineageData) {
result.lineageData = lineageData;
}
if (outputFormat === 'detailed') {
result.finalPopulation = population.slice(0, 10);
}
return {
content: [{
type: "text",
text: JSON.stringify(result, null, 2)
}]
};
}
};
function calculateFitness(sequence: string, fitnessFunction: string, targetValue: number, isDNA: boolean): number {
switch (fitnessFunction) {
case 'gc-content':
if (!isDNA) return 0;
const gcContent = (sequence.match(/[GC]/gi) || []).length / sequence.length;
return 1 - Math.abs(gcContent - targetValue);
case 'length':
return 1 - Math.abs(sequence.length - targetValue) / Math.max(sequence.length, targetValue);
case 'hydrophobic':
if (isDNA) return 0;
const hydrophobic = new Set(['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W']);
const hydrophobicRatio = sequence.split('').filter(aa => hydrophobic.has(aa)).length / sequence.length;
return 1 - Math.abs(hydrophobicRatio - targetValue);
default:
return Math.random();
}
}
function calculateDiversity(sequences: string[]): number {
const uniqueSequences = new Set(sequences);
return uniqueSequences.size / sequences.length;
}
function mutateProtein(sequence: string, mutationRate: number, generator: SequenceGenerator): string {
const aminoAcids = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'];
let mutated = sequence.split('');
for (let i = 0; i < mutated.length; i++) {
if (Math.random() < mutationRate) {
const currentAA = mutated[i];
let newAA;
do {
newAA = aminoAcids[Math.floor(Math.random() * aminoAcids.length)];
} while (newAA === currentAA);
mutated[i] = newAA;
}
}
return mutated.join('');
}