import { z } from "zod";
import { SequenceGenerator, AMINO_ACIDS } from "../utils/sequenceUtils.js";
export const generateProtein = {
definition: {
name: "generate_protein_sequence",
description: "Generate random protein sequences with specified parameters",
inputSchema: {
type: "object",
properties: {
length: {
type: "number",
description: "Length of the protein sequence to generate"
},
count: {
type: "number",
description: "Number of sequences to generate, default is 1",
minimum: 1
},
seed: {
type: "number",
description: "Random seed for reproducible results (optional)"
},
model: {
type: "string",
description: "Generation model: 'random', 'hydrophobic-bias', or 'disorder-prone'",
enum: ["random", "hydrophobic-bias", "disorder-prone"]
},
composition: {
type: "object",
description: "Custom amino acid composition (frequencies should sum to 1)",
additionalProperties: {
type: "number",
minimum: 0,
maximum: 1
}
},
outputFormat: {
type: "string",
description: "Output format: 'fasta' or 'plain'",
enum: ["fasta", "plain"]
}
},
required: ["length"]
},
},
async handler({
length,
count = 1,
seed,
model = "random",
composition,
outputFormat = "fasta"
}: {
length: number;
count?: number;
seed?: number;
model?: string;
composition?: Record<string, number>;
outputFormat?: string;
}) {
const generator = new SequenceGenerator(seed);
const sequences = [];
for (let i = 0; i < count; i++) {
let sequence: string;
switch (model) {
case "random":
if (composition) {
sequence = generateCustomComposition(length, composition, generator);
} else {
sequence = generator.generateRandomProtein(length);
}
break;
case "hydrophobic-bias":
sequence = generateHydrophobicBiased(length, generator);
break;
case "disorder-prone":
sequence = generateDisorderProne(length, generator);
break;
default:
sequence = generator.generateRandomProtein(length);
}
const analysis = analyzeProtein(sequence);
sequences.push({
id: `sim_protein_${i + 1}`,
sequence,
length: sequence.length,
model,
...analysis
});
}
let output = '';
if (outputFormat === 'fasta') {
output = sequences.map(seq =>
`>${seq.id} length=${seq.length} hydrophobic=${seq.hydrophobicRatio}% model=${seq.model}\n${seq.sequence}`
).join('\n\n');
} else {
output = sequences.map(seq => seq.sequence).join('\n');
}
const stats = {
totalSequences: sequences.length,
averageLength: Math.round(sequences.reduce((sum, seq) => sum + seq.length, 0) / sequences.length),
averageHydrophobic: Math.round(sequences.reduce((sum, seq) => sum + seq.hydrophobicRatio, 0) / sequences.length * 100) / 100,
model,
seed: seed || "random"
};
return {
content: [{
type: "text",
text: JSON.stringify({
statistics: stats,
sequences: outputFormat === 'fasta' ? output : sequences,
rawOutput: outputFormat === 'plain' ? output : undefined
}, null, 2)
}]
};
}
};
function generateCustomComposition(length: number, composition: Record<string, number>, generator: SequenceGenerator): string {
const aminoAcids = Object.keys(composition);
const frequencies = Object.values(composition);
const cumulativeFreq = frequencies.reduce((acc, freq, i) => {
acc.push((acc[i - 1] || 0) + freq);
return acc;
}, [] as number[]);
let sequence = '';
for (let i = 0; i < length; i++) {
const rand = Math.random();
const index = cumulativeFreq.findIndex(cumFreq => rand < cumFreq);
sequence += aminoAcids[index] || 'A';
}
return sequence;
}
function generateHydrophobicBiased(length: number, generator: SequenceGenerator): string {
const hydrophobic = ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'];
const hydrophilic = ['R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'K', 'P', 'S', 'T'];
let sequence = '';
for (let i = 0; i < length; i++) {
if (Math.random() < 0.6) {
sequence += hydrophobic[Math.floor(Math.random() * hydrophobic.length)];
} else {
sequence += hydrophilic[Math.floor(Math.random() * hydrophilic.length)];
}
}
return sequence;
}
function generateDisorderProne(length: number, generator: SequenceGenerator): string {
const disorderProne = ['A', 'R', 'G', 'Q', 'S', 'P', 'E', 'K'];
const orderProne = ['V', 'I', 'Y', 'F', 'W', 'L'];
const neutral = ['N', 'D', 'C', 'H', 'M', 'T'];
let sequence = '';
for (let i = 0; i < length; i++) {
const rand = Math.random();
if (rand < 0.5) {
sequence += disorderProne[Math.floor(Math.random() * disorderProne.length)];
} else if (rand < 0.75) {
sequence += neutral[Math.floor(Math.random() * neutral.length)];
} else {
sequence += orderProne[Math.floor(Math.random() * orderProne.length)];
}
}
return sequence;
}
function analyzeProtein(sequence: string): {
hydrophobicRatio: number;
chargedRatio: number;
aromaticRatio: number;
composition: Record<string, number>;
} {
const hydrophobic = new Set(['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W']);
const charged = new Set(['R', 'K', 'D', 'E']);
const aromatic = new Set(['F', 'Y', 'W']);
const composition: Record<string, number> = {};
let hydrophobicCount = 0;
let chargedCount = 0;
let aromaticCount = 0;
for (const aa of sequence) {
composition[aa] = (composition[aa] || 0) + 1;
if (hydrophobic.has(aa)) hydrophobicCount++;
if (charged.has(aa)) chargedCount++;
if (aromatic.has(aa)) aromaticCount++;
}
return {
hydrophobicRatio: Math.round((hydrophobicCount / sequence.length) * 10000) / 100,
chargedRatio: Math.round((chargedCount / sequence.length) * 10000) / 100,
aromaticRatio: Math.round((aromaticCount / sequence.length) * 10000) / 100,
composition: Object.fromEntries(
Object.entries(composition).map(([aa, count]) => [
aa,
Math.round((count / sequence.length) * 10000) / 100
])
)
};
}