import { describe, it, expect, beforeEach } from 'vitest';
import { simulateFastq } from '../../src/tools/simulateFastq';
import { SequenceGenerator } from '../../src/utils/sequenceUtils';
import { TestUtils } from '../utils/testHelpers';
describe('simulateFastq Tool', () => {
const testReference = 'ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG';
describe('Basic functionality', () => {
it('should generate FASTQ reads with correct statistics', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 100,
coverage: 10,
seed: 12345
});
expect(result.content).toHaveLength(1);
expect(result.content[0].type).toBe('text');
const data = JSON.parse(result.content[0].text);
expect(data.statistics.readLength).toBe(100);
expect(data.statistics.coverage).toBeCloseTo(10, 0);
expect(data.statistics.readType).toBe('single-end');
});
it('should generate single-end reads by default', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 5,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.readType).toBe('single-end');
expect(data.statistics.totalReads).toBeGreaterThan(0);
});
it('should generate paired-end reads when requested', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 5,
readType: 'paired-end',
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.readType).toBe('paired-end');
expect(data.statistics.totalReads).toBeGreaterThan(0);
expect(data.statistics.totalReads % 2).toBe(0); // Should be even for paired reads
});
it('should use seed for reproducible results', async () => {
const seed = 54321;
const result1 = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 75,
coverage: 3,
seed
});
const result2 = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 75,
coverage: 3,
seed
});
expect(result1.content[0].text).toBe(result2.content[0].text);
});
});
describe('Quality models', () => {
const testCases = ['illumina', '454', 'ion-torrent', 'pacbio'];
testCases.forEach(qualityModel => {
it(`should support ${qualityModel} quality model`, async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 2,
qualityModel,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.qualityModel).toBe(qualityModel);
});
});
});
describe('Error and mutation rates', () => {
it('should apply sequencing errors', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 100,
coverage: 5,
errorRate: 0.1, // High error rate
mutationRate: 0, // No biological mutations
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.errorRate).toBe(0.1);
// With high error rate, some reads should have errors
if (data.reads && data.reads.length > 0) {
const hasErrors = data.reads.some((read: any) => read.errors === true);
expect(hasErrors).toBe(true);
}
});
it('should apply biological mutations', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 100,
coverage: 5,
errorRate: 0, // No sequencing errors
mutationRate: 0.05, // High mutation rate
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.mutationRate).toBe(0.05);
// With high mutation rate, some reads should have mutations
if (data.reads && data.reads.length > 0) {
const hasMutations = data.reads.some((read: any) => read.mutations === true);
expect(hasMutations).toBe(true);
}
});
});
describe('Output formats', () => {
it('should output FASTQ format by default', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 2,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.fastqOutput).toBeDefined();
expect(typeof data.fastqOutput).toBe('string');
// Should start with FASTQ record
expect(data.fastqOutput).toMatch(/^@sim_read_/);
expect(data.fastqOutput).toContain('\n+\n');
});
it('should output JSON format when requested', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 2,
outputFormat: 'json',
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.reads).toBeDefined();
expect(Array.isArray(data.reads)).toBe(true);
expect(data.fastqOutput).toBeUndefined();
});
});
describe('Coverage calculations', () => {
it('should generate appropriate number of reads for coverage', async () => {
const coverageValues = [1, 5, 10, 20];
for (const targetCoverage of coverageValues) {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 100,
coverage: targetCoverage,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
const actualCoverage = data.statistics.coverage;
// Should be within 50% of target coverage (coverage calculation can vary with discrete reads)
expect(actualCoverage).toBeCloseTo(targetCoverage, 0);
const relativeError = Math.abs(actualCoverage - targetCoverage) / targetCoverage;
expect(relativeError).toBeLessThan(0.5);
}
});
});
describe('Paired-end specifics', () => {
it('should generate reads with correct insert sizes', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 3,
readType: 'paired-end',
insertSize: 200,
insertSizeStd: 20,
outputFormat: 'json',
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.reads).toBeDefined();
if (data.reads && data.reads.length > 0) {
// Check that reads have insert size information
const firstPair = data.reads[0];
expect(firstPair.read1.insertSize).toBeDefined();
expect(firstPair.read2.insertSize).toBeDefined();
expect(firstPair.read1.insertSize).toBe(firstPair.read2.insertSize);
// Insert size should be reasonable
expect(firstPair.read1.insertSize).toBeGreaterThan(100);
expect(firstPair.read1.insertSize).toBeLessThan(500);
}
});
it('should generate forward and reverse reads', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 2,
readType: 'paired-end',
outputFormat: 'json',
seed: 12345
});
const data = JSON.parse(result.content[0].text);
if (data.reads && data.reads.length > 0) {
const firstPair = data.reads[0];
expect(firstPair.read1.strand).toBe('+');
expect(firstPair.read2.strand).toBe('-');
expect(firstPair.read1.id).toContain('/1');
expect(firstPair.read2.id).toContain('/2');
}
});
});
describe('Input validation', () => {
it('should handle minimum read length', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 1,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.readLength).toBe(50);
});
it('should handle short reference sequences', async () => {
const shortRef = 'ATCGATCGATCG';
const result = await simulateFastq.handler({
referenceSequence: shortRef,
readLength: 10,
coverage: 2,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.totalReads).toBeGreaterThan(0);
});
it('should handle edge case error rates', async () => {
const testCases = [
{ errorRate: 0.0 },
{ errorRate: 0.1 }
];
for (const testCase of testCases) {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 1,
...testCase,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.errorRate).toBe(testCase.errorRate);
}
});
});
describe('Citation and metadata', () => {
it('should include proper citation in statistics', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 50,
coverage: 1,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.citation).toContain('Stephens et al. (2016)');
expect(data.statistics.citation).toContain('PLOS ONE');
expect(data.statistics.citation).toContain('10.1371/journal.pone.0167047');
});
it('should include all required statistics', async () => {
const result = await simulateFastq.handler({
referenceSequence: testReference,
readLength: 75,
coverage: 5,
readType: 'paired-end',
errorRate: 0.02,
mutationRate: 0.001,
qualityModel: 'illumina',
seed: 12345
});
const data = JSON.parse(result.content[0].text);
const stats = data.statistics;
expect(stats.totalReads).toBeDefined();
expect(stats.readLength).toBe(75);
expect(stats.coverage).toBeDefined();
expect(stats.readType).toBe('paired-end');
expect(stats.errorRate).toBe(0.02);
expect(stats.mutationRate).toBe(0.001);
expect(stats.qualityModel).toBe('illumina');
expect(stats.seed).toBe(12345);
expect(stats.citation).toBeDefined();
});
});
describe('Tool definition', () => {
it('should have correct tool definition structure', () => {
expect(simulateFastq.definition.name).toBe('simulate_fastq_file');
expect(simulateFastq.definition.description).toContain('Simulate FASTQ sequencing reads');
expect(simulateFastq.definition.description).toContain('NEAT methodology');
expect(simulateFastq.definition.description).toContain('Stephens et al. (2016)');
expect(simulateFastq.definition.inputSchema.type).toBe('object');
expect(simulateFastq.definition.inputSchema.required).toEqual(['referenceSequence', 'readLength', 'coverage']);
});
it('should have proper parameter definitions', () => {
const props = simulateFastq.definition.inputSchema.properties;
expect(props.referenceSequence.type).toBe('string');
expect(props.readLength.type).toBe('number');
expect(props.readLength.minimum).toBe(50);
expect(props.readLength.maximum).toBe(300);
expect(props.coverage.type).toBe('number');
expect(props.coverage.minimum).toBe(1);
expect(props.coverage.maximum).toBe(1000);
expect(props.readType.enum).toEqual(['single-end', 'paired-end']);
expect(props.qualityModel.enum).toEqual(['illumina', '454', 'ion-torrent', 'pacbio']);
expect(props.outputFormat.enum).toEqual(['fastq', 'json']);
});
});
});
describe('simulateFastq Helper Functions', () => {
let generator: SequenceGenerator;
beforeEach(() => {
generator = new SequenceGenerator(12345);
});
describe('Quality score generation', () => {
it('should generate valid quality scores for different platforms', () => {
const platforms = ['illumina', '454', 'ion-torrent', 'pacbio'];
platforms.forEach(platform => {
// Test both error and non-error conditions
[true, false].forEach(hasError => {
// Note: This tests the internal generateQualityScore function indirectly
// through the main simulation
expect(true).toBe(true); // Placeholder - actual testing done through integration
});
});
});
});
describe('Reverse complement function', () => {
it('should correctly reverse complement DNA sequences', () => {
// Note: Testing internal reverseComplement function indirectly
const testCases = [
{ input: 'ATCG', expected: 'CGAT' },
{ input: 'AAAA', expected: 'TTTT' },
{ input: 'GCGC', expected: 'GCGC' }
];
// This would need to be tested through paired-end generation
// where reverse complement is used
expect(true).toBe(true); // Placeholder
});
});
describe('FASTQ format validation', () => {
it('should generate valid FASTQ records', async () => {
const result = await simulateFastq.handler({
referenceSequence: 'ATCGATCGATCGATCGATCGATCGATCGATCGATCG',
readLength: 20,
coverage: 1,
seed: 12345
});
const data = JSON.parse(result.content[0].text);
const fastqOutput = data.fastqOutput;
// Validate FASTQ format
const records = fastqOutput.split('\n@').map(record =>
record.startsWith('@') ? record : '@' + record
);
records.forEach(record => {
if (record.trim()) {
const lines = record.trim().split('\n');
expect(lines.length).toBe(4); // Header, sequence, +, quality
expect(lines[0]).toMatch(/^@sim_read_\d+/); // Header
expect(lines[1]).toMatch(/^[ATGC]+$/); // Sequence
expect(lines[2]).toBe('+'); // Plus line
expect(lines[3].length).toBe(lines[1].length); // Quality same length as sequence
}
});
});
it('should generate quality strings with valid ASCII range', async () => {
const result = await simulateFastq.handler({
referenceSequence: 'ATCGATCGATCGATCGATCGATCGATCGATCGATCG',
readLength: 50,
coverage: 2,
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
if (data.reads && data.reads.length > 0) {
data.reads.forEach((read: any) => {
const qualities = read.qualities || read.read1?.qualities;
if (qualities) {
for (let i = 0; i < qualities.length; i++) {
const charCode = qualities.charCodeAt(i);
expect(charCode).toBeGreaterThanOrEqual(33); // Minimum FASTQ quality
expect(charCode).toBeLessThanOrEqual(126); // Maximum printable ASCII
}
}
});
}
});
});
describe('Statistical validation', () => {
it('should maintain expected error rates across reads', async () => {
const errorRate = 0.05;
const result = await simulateFastq.handler({
referenceSequence: 'ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG',
readLength: 100,
coverage: 10,
errorRate,
mutationRate: 0, // No biological mutations
seed: 12345,
outputFormat: 'json'
});
const data = JSON.parse(result.content[0].text);
expect(data.statistics.errorRate).toBe(errorRate);
// Statistical validation would require analyzing actual vs expected error counts
// This is a placeholder for more detailed statistical testing
expect(true).toBe(true);
});
});
});