import { z } from "zod";
import { SequenceGenerator } from "../utils/sequenceUtils.js";
export const generateDNA = {
definition: {
name: "generate_dna_sequence",
description: "Generate random DNA sequences with specified parameters",
inputSchema: {
type: "object",
properties: {
length: {
type: "number",
description: "Length of the DNA sequence to generate"
},
gcContent: {
type: "number",
description: "GC content (0-1), default is 0.5",
minimum: 0,
maximum: 1
},
count: {
type: "number",
description: "Number of sequences to generate, default is 1",
minimum: 1
},
seed: {
type: "number",
description: "Random seed for reproducible results (optional)"
},
model: {
type: "string",
description: "Generation model: 'random', 'markov', or 'codon-biased'",
enum: ["random", "markov", "codon-biased"]
},
outputFormat: {
type: "string",
description: "Output format: 'fasta' or 'plain'",
enum: ["fasta", "plain"]
}
},
required: ["length"]
},
},
async handler({
length,
gcContent = 0.5,
count = 1,
seed,
model = "random",
outputFormat = "fasta"
}: {
length: number;
gcContent?: number;
count?: number;
seed?: number;
model?: string;
outputFormat?: string;
}) {
const generator = new SequenceGenerator(seed);
const sequences = [];
for (let i = 0; i < count; i++) {
let sequence: string;
switch (model) {
case "random":
sequence = generator.generateRandomDNA(length, gcContent);
break;
case "markov":
sequence = generateMarkovDNA(length, gcContent, generator);
break;
case "codon-biased":
sequence = generateCodonBiasedDNA(length, generator);
break;
default:
sequence = generator.generateRandomDNA(length, gcContent);
}
const actualGC = (sequence.match(/[GC]/g) || []).length / sequence.length;
sequences.push({
id: `sim_dna_${i + 1}`,
sequence,
length: sequence.length,
gcContent: Math.round(actualGC * 10000) / 100,
model
});
}
let output = '';
if (outputFormat === 'fasta') {
output = sequences.map(seq =>
`>${seq.id} length=${seq.length} gc=${seq.gcContent}% model=${seq.model}\n${seq.sequence}`
).join('\n\n');
} else {
output = sequences.map(seq => seq.sequence).join('\n');
}
const stats = {
totalSequences: sequences.length,
averageLength: Math.round(sequences.reduce((sum, seq) => sum + seq.length, 0) / sequences.length),
averageGC: Math.round(sequences.reduce((sum, seq) => sum + seq.gcContent, 0) / sequences.length * 100) / 100,
model,
seed: seed || "random"
};
return {
content: [{
type: "text",
text: JSON.stringify({
statistics: stats,
sequences: outputFormat === 'fasta' ? output : sequences,
rawOutput: outputFormat === 'plain' ? output : undefined
}, null, 2)
}]
};
}
};
function generateMarkovDNA(length: number, gcContent: number, generator: SequenceGenerator): string {
const transitions = {
'A': { 'A': 0.3, 'T': 0.3, 'G': 0.2, 'C': 0.2 },
'T': { 'A': 0.3, 'T': 0.3, 'G': 0.2, 'C': 0.2 },
'G': { 'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3 },
'C': { 'A': 0.2, 'T': 0.2, 'G': 0.3, 'C': 0.3 }
};
let sequence = generator.generateRandomDNA(1, gcContent);
for (let i = 1; i < length; i++) {
const lastBase = sequence[i - 1] as keyof typeof transitions;
const probs = transitions[lastBase];
const rand = Math.random();
let cumProb = 0;
for (const [base, prob] of Object.entries(probs)) {
cumProb += prob;
if (rand < cumProb) {
sequence += base;
break;
}
}
}
return sequence;
}
function generateCodonBiasedDNA(length: number, generator: SequenceGenerator): string {
const codonUsage = {
'F': ['TTT', 'TTC'], 'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'], 'Y': ['TAT', 'TAC'],
'C': ['TGT', 'TGC'], 'W': ['TGG'], 'P': ['CCT', 'CCC', 'CCA', 'CCG'],
'H': ['CAT', 'CAC'], 'Q': ['CAA', 'CAG'], 'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
'I': ['ATT', 'ATC', 'ATA'], 'M': ['ATG'], 'T': ['ACT', 'ACC', 'ACA', 'ACG'],
'N': ['AAT', 'AAC'], 'K': ['AAA', 'AAG'], 'V': ['GTT', 'GTC', 'GTA', 'GTG'],
'A': ['GCT', 'GCC', 'GCA', 'GCG'], 'D': ['GAT', 'GAC'], 'E': ['GAA', 'GAG'],
'G': ['GGT', 'GGC', 'GGA', 'GGG'], '*': ['TAA', 'TAG', 'TGA']
};
const aminoAcids = Object.keys(codonUsage).filter(aa => aa !== '*');
let sequence = '';
const targetCodons = Math.floor(length / 3);
for (let i = 0; i < targetCodons; i++) {
const aa = aminoAcids[Math.floor(Math.random() * aminoAcids.length)];
const codons = codonUsage[aa as keyof typeof codonUsage];
const codon = codons[Math.floor(Math.random() * codons.length)];
sequence += codon;
}
return sequence.substring(0, length);
}