import { describe, it, expect, beforeEach } from 'vitest';
import { simulateFastq } from '../../src/tools/simulateFastq';
import { TestUtils } from '../utils/testHelpers';
describe('simulateFastq Integration Tests', () => {
const realGenomeSequence = 'ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCAGGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACGGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCTGGTCCGGGGATGAAAGGGATGGTCGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGCATCAGTTTCTGACGATTAATGGCGGCAACGCCACCAGGAAGGTGCTGGCTTCGGCTGATACGACCCAGATCTACGCTCCCCCGCCGTCCATGAAATCGAATGCCAATGATCCTGCCATATTCGCCAATCCGGATGGCATGACCAATGACCAGGCCAACACCGTGCGCATCGTGAAAGACAGTTCCAACCCCGGCAACCAGATCAACGCATTCGAAAAATCCACGAAAGCAGTCGTCGGCGAATACAAGAAGCCCGACAGTGCTTTCAACTTCGCTGCCCTCTTCGTCGATGGCATGGAAGCCGATGTGGATAAGATGGAGTTGGTTCGCGCGTTCAAGAAGCCAGGCGCAGTAGATGATGATTACCAGGAAAAGATCCCCGATGGCTTTGCCATTCAGATGTCGAACCCCGGCAATGGTGATGTCGCCGCGATCAAGAAAGCTGTCAAGCTGGGCAAGGGCAACGATGTGGTCGATGATGGTCGGTTCGAAACAGGCAAAACGATTCACGATGGCGTTGATGGCATGGCCCGGGACAACCGGACGGTGATCAATGCCGTGCTAGATTCGATCGACGTTGACATCGAAAAGTACACCGACAACGTGCCAGTTGGCGTGGCCTTCTACGAGGCCGGAAAAGACGAGTCCGGCGATTCCGGCAAGTTGCTGAAGATCGCCCCTGGCGGTGGCGGCATCGATGGCGTGAAGGTCATCGAAAGCACGCGTCTGAAGTAG';
describe('Real-world sequence simulation', () => {
it('should simulate reads from a realistic genome sequence', async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence,
readLength: 150,
coverage: 30,
readType: 'paired-end',
insertSize: 300,
insertSizeStd: 50,
errorRate: 0.01,
mutationRate: 0.001,
qualityModel: 'illumina',
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
// Validate basic statistics
expect(data.statistics.totalReads).toBeGreaterThan(0);
expect(data.statistics.coverage).toBeCloseTo(30, 0);
expect(data.statistics.readType).toBe('paired-end');
// Validate reads structure
expect(Array.isArray(data.reads)).toBe(true);
expect(data.reads.length).toBeGreaterThan(0);
// Check paired-end structure
data.reads.forEach((readPair: any) => {
expect(readPair.read1).toBeDefined();
expect(readPair.read2).toBeDefined();
expect(readPair.read1.sequence.length).toBe(150);
expect(readPair.read2.sequence.length).toBe(150);
expect(readPair.read1.qualities.length).toBe(150);
expect(readPair.read2.qualities.length).toBe(150);
});
});
it('should handle large reference sequences efficiently', async () => {
const largeReference = realGenomeSequence.repeat(10); // ~25kb sequence
const { result, timeMs } = await TestUtils.measureExecutionTime(async () => {
return await simulateFastq.handler({
referenceSequence: largeReference,
readLength: 100,
coverage: 5,
seed: 12345
});
});
const data = JSON.parse(result.content[0].text);
// Should complete in reasonable time (less than 10 seconds)
expect(timeMs).toBeLessThan(10000);
expect(data.statistics.totalReads).toBeGreaterThan(0);
});
});
describe('Platform-specific quality models', () => {
const platforms = [
{ name: 'illumina', expectedMinQ: 20, expectedMaxQ: 40 },
{ name: '454', expectedMinQ: 15, expectedMaxQ: 35 },
{ name: 'ion-torrent', expectedMinQ: 10, expectedMaxQ: 25 },
{ name: 'pacbio', expectedMinQ: 8, expectedMaxQ: 18 }
];
platforms.forEach(platform => {
it(`should generate appropriate quality ranges for ${platform.name}`, async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 500),
readLength: 100,
coverage: 5,
qualityModel: platform.name,
errorRate: 0.01,
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
// Analyze quality scores
const allQualities: number[] = [];
data.reads.forEach((read: any) => {
const qualities = read.qualities || read.read1?.qualities;
if (qualities) {
for (let i = 0; i < qualities.length; i++) {
allQualities.push(qualities.charCodeAt(i) - 33);
}
}
});
expect(allQualities.length).toBeGreaterThan(0);
const minQ = Math.min(...allQualities);
const maxQ = Math.max(...allQualities);
const avgQ = allQualities.reduce((sum, q) => sum + q, 0) / allQualities.length;
// Quality ranges should be appropriate for platform
expect(minQ).toBeGreaterThanOrEqual(0);
expect(maxQ).toBeLessThanOrEqual(93);
expect(avgQ).toBeGreaterThan(platform.expectedMinQ - 5);
expect(avgQ).toBeLessThan(platform.expectedMaxQ + 5);
});
});
});
describe('Error rate validation', () => {
it('should maintain expected error rates across large simulations', async () => {
const errorRates = [0.001, 0.01, 0.05];
for (const errorRate of errorRates) {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence,
readLength: 100,
coverage: 20,
errorRate,
mutationRate: 0, // No biological mutations
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
// Count reads with errors
let readsWithErrors = 0;
let totalReads = 0;
data.reads.forEach((read: any) => {
if (read.errors) readsWithErrors++;
totalReads++;
if (read.read1?.errors) readsWithErrors++;
if (read.read2?.errors) readsWithErrors++;
if (read.read1 && read.read2) totalReads++; // Paired-end
});
if (totalReads > 0) {
const observedErrorRate = readsWithErrors / totalReads;
// Should be within reasonable range of expected error rate (more tolerance for statistical variation)
expect(observedErrorRate).toBeLessThanOrEqual(errorRate * 10);
}
}
});
});
describe('Coverage accuracy', () => {
it('should achieve target coverage within acceptable tolerance', async () => {
const targetCoverages = [1, 5, 10, 50];
for (const targetCoverage of targetCoverages) {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 1000), // Use consistent length
readLength: 100,
coverage: targetCoverage,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
const actualCoverage = data.statistics.coverage;
// Should be within 20% of target
const tolerance = Math.max(0.2, targetCoverage * 0.2);
expect(Math.abs(actualCoverage - targetCoverage)).toBeLessThanOrEqual(tolerance);
}
});
});
describe('Insert size distribution for paired-end reads', () => {
it('should generate insert sizes following normal distribution', async () => {
const meanInsert = 250;
const stdInsert = 30;
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence,
readLength: 75,
coverage: 10,
readType: 'paired-end',
insertSize: meanInsert,
insertSizeStd: stdInsert,
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
const insertSizes: number[] = [];
data.reads.forEach((readPair: any) => {
if (readPair.read1?.insertSize) {
insertSizes.push(readPair.read1.insertSize);
}
});
expect(insertSizes.length).toBeGreaterThan(0);
const meanObserved = insertSizes.reduce((sum, size) => sum + size, 0) / insertSizes.length;
const variance = insertSizes.reduce((sum, size) => sum + Math.pow(size - meanObserved, 2), 0) / insertSizes.length;
const stdObserved = Math.sqrt(variance);
// Mean should be close to target (within 10%)
expect(Math.abs(meanObserved - meanInsert) / meanInsert).toBeLessThan(0.1);
// Standard deviation should be reasonable
expect(stdObserved).toBeGreaterThan(stdInsert * 0.5);
expect(stdObserved).toBeLessThan(stdInsert * 2.0);
});
});
describe('FASTQ format compliance', () => {
it('should generate valid FASTQ format for single-end reads', async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 500),
readLength: 100,
coverage: 3,
readType: 'single-end',
seed: 12345,
outputFormat: 'fastq'
});
const data = JSON.parse(result.content[0].text);
const fastqOutput = data.fastqOutput;
expect(fastqOutput).toBeDefined();
expect(typeof fastqOutput).toBe('string');
// Parse FASTQ records
const records = fastqOutput.trim().split('\n@').filter(r => r.trim());
records.forEach((record, index) => {
const lines = (index === 0 ? record : '@' + record).split('\n');
expect(lines.length).toBe(4);
expect(lines[0]).toMatch(/^@sim_read_\d+$/); // Header
expect(lines[1]).toMatch(/^[ATGC]+$/); // Sequence
expect(lines[2]).toBe('+'); // Plus line
expect(lines[3].length).toBe(lines[1].length); // Quality length matches sequence
// Validate quality scores are in valid range
for (let i = 0; i < lines[3].length; i++) {
const qScore = lines[3].charCodeAt(i);
expect(qScore).toBeGreaterThanOrEqual(33);
expect(qScore).toBeLessThanOrEqual(126);
}
});
});
it('should generate valid FASTQ format for paired-end reads', async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 500),
readLength: 75,
coverage: 2,
readType: 'paired-end',
seed: 12345,
outputFormat: 'fastq'
});
const data = JSON.parse(result.content[0].text);
const fastqOutput = data.fastqOutput;
// Should contain both /1 and /2 reads
expect(fastqOutput).toContain('/1');
expect(fastqOutput).toContain('/2');
// Count read pairs
const read1Count = (fastqOutput.match(/@sim_read_\d+\/1/g) || []).length;
const read2Count = (fastqOutput.match(/@sim_read_\d+\/2/g) || []).length;
expect(read1Count).toBe(read2Count);
expect(read1Count).toBeGreaterThan(0);
});
});
describe('Mutation vs error differentiation', () => {
it('should distinguish between biological mutations and sequencing errors', async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 1000),
readLength: 100,
coverage: 10,
errorRate: 0.02,
mutationRate: 0.01,
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
let mutationCount = 0;
let errorCount = 0;
data.reads.forEach((read: any) => {
if (read.mutations) mutationCount++;
if (read.errors) errorCount++;
// For paired-end
if (read.read1?.mutations) mutationCount++;
if (read.read1?.errors) errorCount++;
if (read.read2?.mutations) mutationCount++;
if (read.read2?.errors) errorCount++;
});
// Both mutations and errors should occur with these rates
expect(mutationCount).toBeGreaterThan(0);
expect(errorCount).toBeGreaterThan(0);
});
});
describe('Reproducibility', () => {
it('should produce identical results with same seed across multiple runs', async () => {
const params = {
referenceSequence: realGenomeSequence.substring(0, 500),
readLength: 100,
coverage: 5,
readType: 'paired-end' as const,
seed: 98765
};
const results = await Promise.all([
simulateFastq.handler(params),
simulateFastq.handler(params),
simulateFastq.handler(params)
]);
// All results should be identical
const firstResult = results[0].content[0].text;
results.forEach(result => {
expect(result.content[0].text).toBe(firstResult);
});
});
it('should produce different results with different seeds', async () => {
const baseParams = {
referenceSequence: realGenomeSequence.substring(0, 500),
readLength: 100,
coverage: 5
};
const result1 = await simulateFastq.handler({ ...baseParams, seed: 11111 });
const result2 = await simulateFastq.handler({ ...baseParams, seed: 22222 });
expect(result1.content[0].text).not.toBe(result2.content[0].text);
});
});
describe('Edge cases and robustness', () => {
it('should handle very short reference sequences', async () => {
const shortRef = 'ATCGATCG';
const result = await simulateFastq.handler({
referenceSequence: shortRef,
readLength: 6,
coverage: 2,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.totalReads).toBeGreaterThan(0);
});
it('should handle high coverage requests', async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 200),
readLength: 50,
coverage: 100,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.coverage).toBeGreaterThan(50);
expect(data.statistics.totalReads).toBeGreaterThan(0);
});
it('should handle zero mutation and error rates', async () => {
const result = await simulateFastq.handler({
referenceSequence: realGenomeSequence.substring(0, 300),
readLength: 75,
coverage: 3,
errorRate: 0,
mutationRate: 0,
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
// Should not have mutations or errors
data.reads.forEach((read: any) => {
expect(read.mutations).toBe(false);
expect(read.errors).toBe(false);
});
});
});
});