validate-content.ts•53.1 kB
import { Tool } from '@modelcontextprotocol/sdk/types.js';
import * as fs from 'fs/promises';
import * as path from 'path';
import { exec } from 'child_process';
import { promisify } from 'util';
import { handleMemoryRecall } from '../memory/index.js';
// ESM-compatible dirname replacement - fallback for test environments
function getDirname(): string {
// Use process.cwd() as fallback for all environments to avoid import.meta issues
return process.cwd();
}
const currentDir = getDirname();
const execAsync = promisify(exec);
interface ValidationOptions {
contentPath: string;
analysisId?: string;
validationType: 'accuracy' | 'completeness' | 'compliance' | 'all';
includeCodeValidation: boolean;
confidence: 'strict' | 'moderate' | 'permissive';
}
interface ConfidenceMetrics {
overall: number;
breakdown: {
technologyDetection: number;
frameworkVersionAccuracy: number;
codeExampleRelevance: number;
architecturalAssumptions: number;
businessContextAlignment: number;
};
riskFactors: RiskFactor[];
}
interface RiskFactor {
type: 'high' | 'medium' | 'low';
category: string;
description: string;
impact: string;
mitigation: string;
}
interface UncertaintyFlag {
area: string;
severity: 'low' | 'medium' | 'high' | 'critical';
description: string;
potentialImpact: string;
clarificationNeeded: string;
fallbackStrategy: string;
}
interface ValidationIssue {
type: 'error' | 'warning' | 'info';
category: 'accuracy' | 'completeness' | 'compliance' | 'performance';
location: {
file: string;
line?: number;
section?: string;
};
description: string;
evidence: string[];
suggestedFix: string;
confidence: number;
}
interface CodeValidationResult {
overallSuccess: boolean;
exampleResults: ExampleValidation[];
confidence: number;
}
interface ExampleValidation {
example: string;
compilationSuccess: boolean;
executionSuccess: boolean;
issues: ValidationIssue[];
confidence: number;
}
export interface ValidationResult {
success: boolean;
confidence: ConfidenceMetrics;
issues: ValidationIssue[];
uncertainties: UncertaintyFlag[];
codeValidation?: CodeValidationResult;
recommendations: string[];
nextSteps: string[];
}
class ContentAccuracyValidator {
private projectContext: any;
private tempDir: string;
constructor() {
this.tempDir = path.join(currentDir, '.tmp');
}
async validateContent(options: ValidationOptions): Promise<ValidationResult> {
const result: ValidationResult = {
success: false,
confidence: this.initializeConfidenceMetrics(),
issues: [],
uncertainties: [],
recommendations: [],
nextSteps: [],
};
// Load project context if analysis ID provided
if (options.analysisId) {
this.projectContext = await this.loadProjectContext(options.analysisId);
}
// Determine if we should analyze application code vs documentation
const isApplicationValidation = await this.shouldAnalyzeApplicationCode(options.contentPath);
// Perform different types of validation based on request
if (options.validationType === 'all' || options.validationType === 'accuracy') {
await this.validateAccuracy(options.contentPath, result);
}
if (options.validationType === 'all' || options.validationType === 'completeness') {
await this.validateCompleteness(options.contentPath, result);
}
if (options.validationType === 'all' || options.validationType === 'compliance') {
if (isApplicationValidation) {
await this.validateApplicationStructureCompliance(options.contentPath, result);
} else {
await this.validateDiataxisCompliance(options.contentPath, result);
}
}
// Code validation if requested
if (options.includeCodeValidation) {
result.codeValidation = await this.validateCodeExamples(options.contentPath);
// Set code example relevance confidence based on code validation results
if (result.codeValidation) {
const successRate =
result.codeValidation.exampleResults.length > 0
? result.codeValidation.exampleResults.filter((e) => e.compilationSuccess).length /
result.codeValidation.exampleResults.length
: 1;
result.confidence.breakdown.codeExampleRelevance = Math.round(successRate * 100);
}
} else {
// If code validation is skipped, assume reasonable confidence
result.confidence.breakdown.codeExampleRelevance = 75;
}
// Set framework version accuracy based on technology detection confidence
result.confidence.breakdown.frameworkVersionAccuracy = Math.min(
90,
result.confidence.breakdown.technologyDetection + 10,
);
// Set architectural assumptions confidence based on file structure and content analysis
const filesAnalyzed = await this.getMarkdownFiles(options.contentPath);
const hasStructuredContent = filesAnalyzed.length > 3; // Basic heuristic
result.confidence.breakdown.architecturalAssumptions = hasStructuredContent ? 80 : 60;
// Calculate overall confidence and success
this.calculateOverallMetrics(result);
// Generate recommendations and next steps
this.generateRecommendations(result, options);
return result;
}
private initializeConfidenceMetrics(): ConfidenceMetrics {
return {
overall: 0,
breakdown: {
technologyDetection: 0,
frameworkVersionAccuracy: 0,
codeExampleRelevance: 0,
architecturalAssumptions: 0,
businessContextAlignment: 0,
},
riskFactors: [],
};
}
private async loadProjectContext(analysisId: string): Promise<any> {
// Try to get analysis from memory system first
try {
const memoryRecall = await handleMemoryRecall({
query: analysisId,
type: 'analysis',
limit: 1,
});
// Handle the memory recall result structure
if (memoryRecall && memoryRecall.memories && memoryRecall.memories.length > 0) {
const memory = memoryRecall.memories[0];
// Handle wrapped content structure
if (memory.data && memory.data.content && Array.isArray(memory.data.content)) {
// Extract the JSON from the first text content
const firstContent = memory.data.content[0];
if (firstContent && firstContent.type === 'text' && firstContent.text) {
try {
return JSON.parse(firstContent.text);
} catch (parseError) {
console.warn('Failed to parse analysis content from memory:', parseError);
}
}
}
// Try direct content or data access
if (memory.content) {
return memory.content;
}
if (memory.data) {
return memory.data;
}
}
} catch (error) {
console.warn('Failed to retrieve from memory system:', error);
}
// Fallback to reading from cached analysis file
try {
const analysisPath = path.join('.documcp', 'analyses', `${analysisId}.json`);
const content = await fs.readFile(analysisPath, 'utf-8');
return JSON.parse(content);
} catch {
// Return default context if no analysis found
return {
metadata: { projectName: 'unknown', primaryLanguage: 'JavaScript' },
technologies: {},
dependencies: { packages: [] },
};
}
}
private async validateAccuracy(contentPath: string, result: ValidationResult): Promise<void> {
const files = await this.getMarkdownFiles(contentPath);
for (const file of files) {
const content = await fs.readFile(file, 'utf-8');
// Check for common accuracy issues
await this.checkTechnicalAccuracy(file, content, result);
await this.checkFrameworkVersionCompatibility(file, content, result);
await this.checkCommandAccuracy(file, content, result);
await this.checkLinkValidity(file, content, result);
}
// Update confidence based on findings
this.updateAccuracyConfidence(result);
}
private async checkTechnicalAccuracy(
filePath: string,
content: string,
result: ValidationResult,
): Promise<void> {
// Check for deprecated patterns
const deprecatedPatterns = [
{ pattern: /npm install -g/, suggestion: 'Use npx instead of global installs' },
{ pattern: /var\s+\w+/, suggestion: 'Use const or let instead of var' },
{ pattern: /function\(\)/, suggestion: 'Consider using arrow functions' },
{ pattern: /http:\/\//, suggestion: 'Use HTTPS URLs for security' },
];
for (const { pattern, suggestion } of deprecatedPatterns) {
if (pattern.test(content)) {
result.issues.push({
type: 'warning',
category: 'accuracy',
location: { file: path.basename(filePath) },
description: `Potentially outdated pattern detected: ${pattern.source}`,
evidence: [content.match(pattern)?.[0] || ''],
suggestedFix: suggestion,
confidence: 80,
});
}
}
// Check for missing error handling in code examples
const codeBlocks = this.extractCodeBlocks(content);
for (const block of codeBlocks) {
if (block.language === 'javascript' || block.language === 'typescript') {
if (
block.code.includes('await') &&
!block.code.includes('try') &&
!block.code.includes('catch')
) {
result.issues.push({
type: 'warning',
category: 'accuracy',
location: { file: path.basename(filePath) },
description: 'Async code without error handling',
evidence: [block.code.substring(0, 100)],
suggestedFix: 'Add try-catch blocks for async operations',
confidence: 90,
});
}
}
}
}
private async checkFrameworkVersionCompatibility(
filePath: string,
content: string,
result: ValidationResult,
): Promise<void> {
if (!this.projectContext) return;
// Check if mentioned versions align with project dependencies
const versionPattern = /@(\d+\.\d+\.\d+)/g;
const matches = content.match(versionPattern);
if (matches) {
for (const match of matches) {
const version = match.replace('@', '');
result.uncertainties.push({
area: 'version-compatibility',
severity: 'medium',
description: `Version ${version} mentioned in documentation`,
potentialImpact: 'May not match actual project dependencies',
clarificationNeeded: 'Verify version compatibility with project',
fallbackStrategy: 'Use generic version-agnostic examples',
});
}
}
}
private async checkCommandAccuracy(
filePath: string,
content: string,
result: ValidationResult,
): Promise<void> {
const codeBlocks = this.extractCodeBlocks(content);
for (const block of codeBlocks) {
if (block.language === 'bash' || block.language === 'sh') {
// Check for common command issues
const commands = block.code
.split('\n')
.filter((line) => line.trim() && !line.startsWith('#'));
for (const command of commands) {
// Check for potentially dangerous commands
const dangerousPatterns = [/rm -rf \//, /sudo rm/, /chmod 777/, /> \/dev\/null 2>&1/];
for (const pattern of dangerousPatterns) {
if (pattern.test(command)) {
result.issues.push({
type: 'error',
category: 'accuracy',
location: { file: path.basename(filePath) },
description: 'Potentially dangerous command in documentation',
evidence: [command],
suggestedFix: 'Review and provide safer alternative',
confidence: 95,
});
}
}
// Check for non-portable commands (Windows vs Unix)
if (command.includes('\\') && command.includes('/')) {
result.issues.push({
type: 'warning',
category: 'accuracy',
location: { file: path.basename(filePath) },
description: 'Mixed path separators in command',
evidence: [command],
suggestedFix: 'Use consistent path separators or provide OS-specific examples',
confidence: 85,
});
}
}
}
}
}
private async checkLinkValidity(
filePath: string,
content: string,
result: ValidationResult,
): Promise<void> {
const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g;
const links: Array<{ text: string; url: string }> = [];
let match;
while ((match = linkPattern.exec(content)) !== null) {
links.push({ text: match[1], url: match[2] });
}
for (const link of links) {
// Check internal links
if (!link.url.startsWith('http')) {
const targetPath = path.resolve(path.dirname(filePath), link.url);
try {
await fs.access(targetPath);
} catch {
result.issues.push({
type: 'error',
category: 'accuracy',
location: { file: path.basename(filePath) },
description: `Broken internal link: ${link.url}`,
evidence: [link.text],
suggestedFix: 'Fix the link path or create the missing file',
confidence: 100,
});
}
}
// Flag external links for manual verification
if (link.url.startsWith('http')) {
result.uncertainties.push({
area: 'external-links',
severity: 'low',
description: `External link: ${link.url}`,
potentialImpact: 'Link may become outdated or broken',
clarificationNeeded: 'Verify link is still valid',
fallbackStrategy: 'Archive important external content locally',
});
}
}
}
private async validateCompleteness(contentPath: string, result: ValidationResult): Promise<void> {
const files = await this.getMarkdownFiles(contentPath);
const structure = await this.analyzeDiataxisStructure(contentPath);
// Check for missing essential sections
const requiredSections = ['tutorials', 'how-to', 'reference', 'explanation'];
const missingSections = requiredSections.filter(
(section) => !structure.sections.includes(section),
);
if (missingSections.length > 0) {
result.issues.push({
type: 'warning',
category: 'completeness',
location: { file: 'documentation structure' },
description: `Missing Diataxis sections: ${missingSections.join(', ')}`,
evidence: structure.sections,
suggestedFix: 'Add missing Diataxis sections for complete documentation',
confidence: 100,
});
}
// Check content depth in each section
for (const section of structure.sections) {
const sectionFiles = files.filter((f) => f.includes(`/${section}/`));
if (sectionFiles.length < 2) {
result.issues.push({
type: 'info',
category: 'completeness',
location: { file: section },
description: `Limited content in ${section} section`,
evidence: [`Only ${sectionFiles.length} files`],
suggestedFix: 'Consider adding more comprehensive coverage',
confidence: 75,
});
}
}
// Update completeness confidence
result.confidence.breakdown.businessContextAlignment = Math.max(
0,
100 - missingSections.length * 25,
);
}
private async validateDiataxisCompliance(
contentPath: string,
result: ValidationResult,
): Promise<void> {
const files = await this.getMarkdownFiles(contentPath);
for (const file of files) {
const content = await fs.readFile(file, 'utf-8');
const section = this.identifyDiataxisSection(file);
if (section) {
await this.checkSectionCompliance(file, content, section, result);
}
}
}
private async validateApplicationStructureCompliance(
contentPath: string,
result: ValidationResult,
): Promise<void> {
// Analyze application source code for Diataxis compliance
await this.validateSourceCodeDocumentation(contentPath, result);
await this.validateApplicationArchitecture(contentPath, result);
await this.validateInlineDocumentationPatterns(contentPath, result);
}
private async validateSourceCodeDocumentation(
contentPath: string,
result: ValidationResult,
): Promise<void> {
const sourceFiles = await this.getSourceFiles(contentPath);
for (const file of sourceFiles) {
const content = await fs.readFile(file, 'utf-8');
// Check for proper JSDoc/TSDoc documentation
await this.checkInlineDocumentationQuality(file, content, result);
// Check for README files and their structure
if (file.endsWith('README.md')) {
await this.validateReadmeStructure(file, content, result);
}
// Check for proper module/class documentation
await this.checkModuleDocumentation(file, content, result);
}
}
private async validateApplicationArchitecture(
contentPath: string,
result: ValidationResult,
): Promise<void> {
// Check if the application structure supports different types of documentation
const hasToolsDir = await this.pathExists(path.join(contentPath, 'tools'));
const hasTypesDir = await this.pathExists(path.join(contentPath, 'types'));
// Check for workflows directory (currently not used but may be useful for future validation)
// const hasWorkflowsDir = await this.pathExists(path.join(contentPath, 'workflows'));
if (!hasToolsDir) {
result.issues.push({
type: 'warning',
category: 'compliance',
location: { file: 'application structure' },
description:
'No dedicated tools directory found - may impact reference documentation organization',
evidence: ['Missing /tools directory'],
suggestedFix: 'Organize tools into dedicated directory for better reference documentation',
confidence: 80,
});
}
if (!hasTypesDir) {
result.issues.push({
type: 'info',
category: 'compliance',
location: { file: 'application structure' },
description: 'No types directory found - may impact API reference documentation',
evidence: ['Missing /types directory'],
suggestedFix: 'Consider organizing types for better API documentation',
confidence: 70,
});
}
}
private async validateInlineDocumentationPatterns(
contentPath: string,
result: ValidationResult,
): Promise<void> {
const sourceFiles = await this.getSourceFiles(contentPath);
for (const file of sourceFiles) {
const content = await fs.readFile(file, 'utf-8');
// Check for proper function documentation that could support tutorials
const functions = this.extractFunctions(content);
for (const func of functions) {
if (func.isExported && !func.hasDocumentation) {
result.issues.push({
type: 'warning',
category: 'compliance',
location: { file: path.basename(file), line: func.line },
description: `Exported function '${func.name}' lacks documentation`,
evidence: [func.signature],
suggestedFix: 'Add JSDoc/TSDoc documentation to support tutorial and reference content',
confidence: 85,
});
}
}
// Check for proper error handling documentation
const errorPatterns = content.match(/throw new \w*Error/g);
if (errorPatterns && errorPatterns.length > 0) {
const hasErrorDocs = content.includes('@throws') || content.includes('@error');
if (!hasErrorDocs) {
result.issues.push({
type: 'info',
category: 'compliance',
location: { file: path.basename(file) },
description: 'Error throwing code found without error documentation',
evidence: errorPatterns,
suggestedFix: 'Document error conditions to support troubleshooting guides',
confidence: 75,
});
}
}
}
}
private identifyDiataxisSection(filePath: string): string | null {
const sections = ['tutorials', 'how-to', 'reference', 'explanation'];
for (const section of sections) {
if (filePath.includes(`/${section}/`)) {
return section;
}
}
return null;
}
private async checkSectionCompliance(
filePath: string,
content: string,
section: string,
result: ValidationResult,
): Promise<void> {
const complianceRules = this.getDiataxisComplianceRules(section);
for (const rule of complianceRules) {
if (!rule.check(content)) {
result.issues.push({
type: 'warning',
category: 'compliance',
location: { file: path.basename(filePath), section },
description: rule.message,
evidence: [rule.evidence?.(content) || ''],
suggestedFix: rule.fix,
confidence: rule.confidence,
});
}
}
}
private getDiataxisComplianceRules(section: string) {
const rules: any = {
tutorials: [
{
check: (content: string) =>
content.includes('## Prerequisites') || content.includes('## Requirements'),
message: 'Tutorial should include prerequisites section',
fix: 'Add a prerequisites or requirements section',
confidence: 90,
},
{
check: (content: string) => /step|Step|STEP/.test(content),
message: 'Tutorial should be organized in clear steps',
fix: 'Structure content with numbered steps or clear progression',
confidence: 85,
},
{
check: (content: string) => content.includes('```'),
message: 'Tutorial should include practical code examples',
fix: 'Add code blocks with working examples',
confidence: 80,
},
],
'how-to': [
{
check: (content: string) => /how to|How to|HOW TO/.test(content),
message: 'How-to guide should focus on specific tasks',
fix: 'Frame content around achieving specific goals',
confidence: 75,
},
{
check: (content: string) => content.length > 500,
message: 'How-to guide should provide detailed guidance',
fix: 'Expand with more detailed instructions',
confidence: 70,
},
],
reference: [
{
check: (content: string) => /##|###/.test(content),
message: 'Reference should be well-structured with clear sections',
fix: 'Add proper headings and organization',
confidence: 95,
},
{
check: (content: string) => /\|.*\|/.test(content),
message: 'Reference should include tables for structured information',
fix: 'Consider using tables for parameters, options, etc.',
confidence: 60,
},
],
explanation: [
{
check: (content: string) => content.includes('why') || content.includes('Why'),
message: 'Explanation should address the "why" behind concepts',
fix: 'Include rationale and context for decisions/concepts',
confidence: 80,
},
{
check: (content: string) => content.length > 800,
message: 'Explanation should provide in-depth coverage',
fix: 'Expand with more comprehensive explanation',
confidence: 70,
},
],
};
return rules[section] || [];
}
private async validateCodeExamples(contentPath: string): Promise<CodeValidationResult> {
const files = await this.getMarkdownFiles(contentPath);
const allExamples: ExampleValidation[] = [];
for (const file of files) {
const content = await fs.readFile(file, 'utf-8');
const codeBlocks = this.extractCodeBlocks(content);
for (const block of codeBlocks) {
if (this.isValidatableLanguage(block.language)) {
const validation = await this.validateCodeBlock(block, file);
allExamples.push(validation);
}
}
}
return {
overallSuccess: allExamples.every((e) => e.compilationSuccess),
exampleResults: allExamples,
confidence: this.calculateCodeValidationConfidence(allExamples),
};
}
private extractCodeBlocks(
content: string,
): Array<{ language: string; code: string; id: string }> {
const codeBlockPattern = /```(\w+)?\n([\s\S]*?)```/g;
const blocks: Array<{ language: string; code: string; id: string }> = [];
let match;
let index = 0;
while ((match = codeBlockPattern.exec(content)) !== null) {
blocks.push({
language: match[1] || 'text',
code: match[2].trim(),
id: `block-${index++}`,
});
}
return blocks;
}
private isValidatableLanguage(language: string): boolean {
const validatable = ['javascript', 'typescript', 'js', 'ts', 'json', 'bash', 'sh'];
return validatable.includes(language.toLowerCase());
}
private async validateCodeBlock(
block: { language: string; code: string; id: string },
filePath: string,
): Promise<ExampleValidation> {
const validation: ExampleValidation = {
example: block.id,
compilationSuccess: false,
executionSuccess: false,
issues: [],
confidence: 0,
};
try {
if (block.language === 'typescript' || block.language === 'ts') {
await this.validateTypeScriptCode(block.code, validation);
} else if (block.language === 'javascript' || block.language === 'js') {
await this.validateJavaScriptCode(block.code, validation);
} else if (block.language === 'json') {
await this.validateJSONCode(block.code, validation);
} else if (block.language === 'bash' || block.language === 'sh') {
await this.validateBashCode(block.code, validation);
}
} catch (error: any) {
validation.issues.push({
type: 'error',
category: 'accuracy',
location: { file: path.basename(filePath) },
description: `Code validation failed: ${error.message}`,
evidence: [block.code.substring(0, 100)],
suggestedFix: 'Review and fix syntax errors',
confidence: 95,
});
}
return validation;
}
private async validateTypeScriptCode(code: string, validation: ExampleValidation): Promise<void> {
// Ensure temp directory exists
await fs.mkdir(this.tempDir, { recursive: true });
const tempFile = path.join(this.tempDir, `temp-${Date.now()}.ts`);
try {
// Write code to temporary file
await fs.writeFile(tempFile, code, 'utf-8');
// Try to compile with TypeScript
const { stderr } = await execAsync(`npx tsc --noEmit --skipLibCheck ${tempFile}`);
if (stderr && stderr.includes('error')) {
validation.issues.push({
type: 'error',
category: 'accuracy',
location: { file: 'code-example' },
description: 'TypeScript compilation error',
evidence: [stderr],
suggestedFix: 'Fix TypeScript syntax and type errors',
confidence: 90,
});
} else {
validation.compilationSuccess = true;
validation.confidence = 85;
}
} catch (error: any) {
if (error.stderr && error.stderr.includes('error')) {
validation.issues.push({
type: 'error',
category: 'accuracy',
location: { file: 'code-example' },
description: 'TypeScript compilation failed',
evidence: [error.stderr],
suggestedFix: 'Fix compilation errors',
confidence: 95,
});
}
} finally {
// Clean up temp file
try {
await fs.unlink(tempFile);
} catch {
// Ignore cleanup errors
}
}
}
private async validateJavaScriptCode(code: string, validation: ExampleValidation): Promise<void> {
try {
// Basic syntax check using Node.js
new Function(code);
validation.compilationSuccess = true;
validation.confidence = 75;
} catch (error: any) {
validation.issues.push({
type: 'error',
category: 'accuracy',
location: { file: 'code-example' },
description: `JavaScript syntax error: ${error.message}`,
evidence: [code.substring(0, 100)],
suggestedFix: 'Fix JavaScript syntax errors',
confidence: 90,
});
}
}
private async validateJSONCode(code: string, validation: ExampleValidation): Promise<void> {
try {
JSON.parse(code);
validation.compilationSuccess = true;
validation.confidence = 95;
} catch (error: any) {
validation.issues.push({
type: 'error',
category: 'accuracy',
location: { file: 'code-example' },
description: `Invalid JSON: ${error.message}`,
evidence: [code.substring(0, 100)],
suggestedFix: 'Fix JSON syntax errors',
confidence: 100,
});
}
}
private async validateBashCode(code: string, validation: ExampleValidation): Promise<void> {
// Basic bash syntax validation
const lines = code.split('\n').filter((line) => line.trim() && !line.startsWith('#'));
for (const line of lines) {
// Check for basic syntax issues
if (line.includes('&&') && line.includes('||')) {
validation.issues.push({
type: 'warning',
category: 'accuracy',
location: { file: 'code-example' },
description: 'Complex command chaining may be confusing',
evidence: [line],
suggestedFix: 'Consider breaking into separate commands or adding explanation',
confidence: 60,
});
}
// Check for unquoted variables in dangerous contexts
if (line.includes('rm') && /\$\w+/.test(line) && !/'.*\$.*'/.test(line)) {
validation.issues.push({
type: 'warning',
category: 'accuracy',
location: { file: 'code-example' },
description: 'Unquoted variable in potentially dangerous command',
evidence: [line],
suggestedFix: 'Quote variables to prevent word splitting',
confidence: 80,
});
}
}
validation.compilationSuccess =
validation.issues.filter((i) => i.type === 'error').length === 0;
validation.confidence = validation.compilationSuccess ? 70 : 20;
}
private calculateCodeValidationConfidence(examples: ExampleValidation[]): number {
if (examples.length === 0) return 0;
const totalConfidence = examples.reduce((sum, ex) => sum + ex.confidence, 0);
return Math.round(totalConfidence / examples.length);
}
public async getMarkdownFiles(contentPath: string, maxDepth: number = 5): Promise<string[]> {
const files: string[] = [];
const excludedDirs = new Set([
'node_modules',
'.git',
'dist',
'build',
'.next',
'.nuxt',
'coverage',
'.tmp',
'tmp',
'.cache',
'.vscode',
'.idea',
'logs',
'.logs',
'.npm',
'.yarn',
]);
const scan = async (dir: string, currentDepth: number = 0): Promise<void> => {
if (currentDepth > maxDepth) return;
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
// Skip excluded directories
if (excludedDirs.has(entry.name) || entry.name.startsWith('.')) {
continue;
}
// Prevent symlink loops
try {
const stats = await fs.lstat(fullPath);
if (stats.isSymbolicLink()) {
continue;
}
} catch {
continue;
}
await scan(fullPath, currentDepth + 1);
} else if (entry.name.endsWith('.md')) {
files.push(fullPath);
// Limit total files to prevent memory issues
if (files.length > 500) {
console.warn('Markdown file limit reached (500), stopping scan');
return;
}
}
}
} catch (error) {
// Skip directories that can't be read
console.warn(`Warning: Could not read directory ${dir}:`, error);
}
};
try {
await scan(contentPath);
} catch (error) {
console.warn('Error scanning directory:', error);
}
return files;
}
private async getSourceFiles(contentPath: string, maxDepth: number = 5): Promise<string[]> {
const files: string[] = [];
const excludedDirs = new Set([
'node_modules',
'.git',
'dist',
'build',
'.next',
'.nuxt',
'coverage',
'.tmp',
'tmp',
'.cache',
'.vscode',
'.idea',
'logs',
'.logs',
'.npm',
'.yarn',
]);
const scan = async (dir: string, currentDepth: number = 0): Promise<void> => {
if (currentDepth > maxDepth) return;
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
// Skip excluded directories
if (excludedDirs.has(entry.name) || entry.name.startsWith('.')) {
continue;
}
// Prevent symlink loops
try {
const stats = await fs.lstat(fullPath);
if (stats.isSymbolicLink()) {
continue;
}
} catch {
continue;
}
await scan(fullPath, currentDepth + 1);
} else if (
entry.name.endsWith('.ts') ||
entry.name.endsWith('.js') ||
entry.name.endsWith('.md')
) {
files.push(fullPath);
// Limit total files to prevent memory issues
if (files.length > 1000) {
console.warn('File limit reached (1000), stopping scan');
return;
}
}
}
} catch (error) {
// Skip directories that can't be read
console.warn(`Warning: Could not read directory ${dir}:`, error);
}
};
try {
await scan(contentPath);
} catch (error) {
console.warn('Error scanning directory:', error);
}
return files;
}
private async pathExists(filePath: string): Promise<boolean> {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}
private extractFunctions(content: string): Array<{
name: string;
line: number;
signature: string;
isExported: boolean;
hasDocumentation: boolean;
}> {
const functions: Array<{
name: string;
line: number;
signature: string;
isExported: boolean;
hasDocumentation: boolean;
}> = [];
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Match function declarations and exports
const functionMatch = line.match(/^(export\s+)?(async\s+)?function\s+(\w+)/);
const arrowMatch = line.match(/^(export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(async\s+)?\(/);
if (functionMatch) {
const name = functionMatch[3];
const isExported = !!functionMatch[1];
const hasDocumentation = this.checkForDocumentation(lines, i);
functions.push({
name,
line: i + 1,
signature: line.trim(),
isExported,
hasDocumentation,
});
} else if (arrowMatch) {
const name = arrowMatch[2];
const isExported = !!arrowMatch[1];
const hasDocumentation = this.checkForDocumentation(lines, i);
functions.push({
name,
line: i + 1,
signature: line.trim(),
isExported,
hasDocumentation,
});
}
}
return functions;
}
private async checkInlineDocumentationQuality(
_file: string,
_content: string,
_result: ValidationResult,
): Promise<void> {
// Implementation for checking JSDoc/TSDoc quality
// This could check for proper parameter documentation, return types, etc.
}
private async validateReadmeStructure(
_file: string,
content: string,
result: ValidationResult,
): Promise<void> {
// Check if README follows good structure
const hasTitle = /^#\s+/.test(content);
const hasDescription = content.includes('## Description') || content.includes('## Overview');
const hasInstallation = content.includes('## Installation') || content.includes('## Setup');
const hasUsage = content.includes('## Usage') || content.includes('## Getting Started');
if (!hasTitle) {
result.issues.push({
type: 'warning',
category: 'compliance',
location: { file: 'README.md' },
description: 'README missing clear title',
evidence: ['No H1 heading found'],
suggestedFix: 'Add clear title with # heading',
confidence: 90,
});
}
if (!hasDescription && !hasInstallation && !hasUsage) {
result.issues.push({
type: 'warning',
category: 'compliance',
location: { file: 'README.md' },
description: 'README lacks essential sections (description, installation, usage)',
evidence: ['Missing standard README sections'],
suggestedFix:
'Add sections for description, installation, and usage following Diataxis principles',
confidence: 85,
});
}
}
private async checkModuleDocumentation(
_file: string,
_content: string,
_result: ValidationResult,
): Promise<void> {
// Implementation for checking module-level documentation
// This could check for file-level JSDoc, proper exports documentation, etc.
}
private checkForDocumentation(lines: string[], functionLineIndex: number): boolean {
// Look backwards from the function line to find documentation
let checkIndex = functionLineIndex - 1;
// Skip empty lines
while (checkIndex >= 0 && lines[checkIndex].trim() === '') {
checkIndex--;
}
// Check if we found the end of a JSDoc comment
if (checkIndex >= 0 && lines[checkIndex].trim() === '*/') {
// Look backwards to find the start of the JSDoc block
let jsDocStart = checkIndex;
while (jsDocStart >= 0) {
const line = lines[jsDocStart].trim();
if (line.startsWith('/**')) {
return true; // Found complete JSDoc block
}
if (!line.startsWith('*') && line !== '*/') {
break; // Not part of JSDoc block
}
jsDocStart--;
}
}
// Also check for single-line JSDoc comments
if (
checkIndex >= 0 &&
lines[checkIndex].trim().startsWith('/**') &&
lines[checkIndex].includes('*/')
) {
return true;
}
return false;
}
private async shouldAnalyzeApplicationCode(contentPath: string): Promise<boolean> {
// Check if the path contains application source code vs documentation
const hasSrcDir = await this.pathExists(path.join(contentPath, 'src'));
const hasPackageJson = await this.pathExists(path.join(contentPath, 'package.json'));
const hasTypescriptFiles = (await this.getSourceFiles(contentPath)).some((file) =>
file.endsWith('.ts'),
);
// If path ends with 'src' or is a project root with src/, analyze as application code
if (
contentPath.endsWith('/src') ||
contentPath.endsWith('\\src') ||
(hasSrcDir && hasPackageJson)
) {
return true;
}
// If path contains TypeScript/JavaScript files and package.json, treat as application code
if (hasTypescriptFiles && hasPackageJson) {
return true;
}
// If path is specifically a documentation directory, analyze as documentation
if (contentPath.includes('/docs') || contentPath.includes('\\docs')) {
return false;
}
return false;
}
private async analyzeDiataxisStructure(contentPath: string): Promise<{ sections: string[] }> {
const sections: string[] = [];
try {
const entries = await fs.readdir(contentPath, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
const dirName = entry.name;
if (['tutorials', 'how-to', 'reference', 'explanation'].includes(dirName)) {
sections.push(dirName);
}
}
}
} catch {
// Directory doesn't exist
}
return { sections };
}
private updateAccuracyConfidence(result: ValidationResult): void {
const errorCount = result.issues.filter((i) => i.type === 'error').length;
const warningCount = result.issues.filter((i) => i.type === 'warning').length;
// Base confidence starts high and decreases with issues
let confidence = 95;
confidence -= errorCount * 20;
confidence -= warningCount * 5;
confidence = Math.max(0, confidence);
result.confidence.breakdown.technologyDetection = confidence;
}
private calculateOverallMetrics(result: ValidationResult): void {
const breakdown = result.confidence.breakdown;
const values = Object.values(breakdown).filter((v) => v > 0);
if (values.length > 0) {
result.confidence.overall = Math.round(values.reduce((a, b) => a + b, 0) / values.length);
}
// Determine overall success
const criticalIssues = result.issues.filter((i) => i.type === 'error').length;
result.success = criticalIssues === 0;
// Add risk factors based on issues
if (criticalIssues > 0) {
result.confidence.riskFactors.push({
type: 'high',
category: 'accuracy',
description: `${criticalIssues} critical accuracy issues found`,
impact: 'Users may encounter broken examples or incorrect information',
mitigation: 'Fix all critical issues before publication',
});
}
const uncertaintyCount = result.uncertainties.length;
if (uncertaintyCount > 5) {
result.confidence.riskFactors.push({
type: 'medium',
category: 'completeness',
description: `${uncertaintyCount} areas requiring clarification`,
impact: 'Documentation may lack specificity for user context',
mitigation: 'Address high-priority uncertainties with user input',
});
}
}
private generateRecommendations(result: ValidationResult, _options: ValidationOptions): void {
const recommendations: string[] = [];
const nextSteps: string[] = [];
// Generate recommendations based on issues found
const errorCount = result.issues.filter((i) => i.type === 'error').length;
if (errorCount > 0) {
recommendations.push(`Fix ${errorCount} critical accuracy issues before publication`);
nextSteps.push('Review and resolve all error-level validation issues');
}
const warningCount = result.issues.filter((i) => i.type === 'warning').length;
if (warningCount > 0) {
recommendations.push(`Address ${warningCount} potential accuracy concerns`);
nextSteps.push('Review warning-level issues and apply fixes where appropriate');
}
const uncertaintyCount = result.uncertainties.filter(
(u) => u.severity === 'high' || u.severity === 'critical',
).length;
if (uncertaintyCount > 0) {
recommendations.push(`Clarify ${uncertaintyCount} high-uncertainty areas`);
nextSteps.push('Gather user input on areas flagged for clarification');
}
// Code validation recommendations
if (result.codeValidation && !result.codeValidation.overallSuccess) {
recommendations.push('Fix code examples that fail compilation or execution tests');
nextSteps.push('Test all code examples in appropriate development environment');
}
// Completeness recommendations
const missingCompliance = result.issues.filter((i) => i.category === 'compliance').length;
if (missingCompliance > 0) {
recommendations.push('Improve Diataxis framework compliance for better user experience');
nextSteps.push('Restructure content to better align with Diataxis principles');
}
// General recommendations based on confidence level
if (result.confidence.overall < 70) {
recommendations.push(
'Overall confidence is below recommended threshold - consider comprehensive review',
);
nextSteps.push('Conduct manual review of generated content before publication');
}
if (recommendations.length === 0) {
recommendations.push('Content validation passed - ready for publication');
nextSteps.push('Deploy documentation and monitor for user feedback');
}
result.recommendations = recommendations;
result.nextSteps = nextSteps;
}
}
export const validateDiataxisContent: Tool = {
name: 'validate_diataxis_content',
description:
'Validate the accuracy, completeness, and compliance of generated Diataxis documentation',
inputSchema: {
type: 'object',
properties: {
contentPath: {
type: 'string',
description: 'Path to the documentation directory to validate',
},
analysisId: {
type: 'string',
description: 'Optional repository analysis ID for context-aware validation',
},
validationType: {
type: 'string',
enum: ['accuracy', 'completeness', 'compliance', 'all'],
default: 'all',
description: 'Type of validation to perform',
},
includeCodeValidation: {
type: 'boolean',
default: true,
description: 'Whether to validate code examples for correctness',
},
confidence: {
type: 'string',
enum: ['strict', 'moderate', 'permissive'],
default: 'moderate',
description: 'Validation confidence level - stricter levels catch more issues',
},
},
required: ['contentPath'],
},
};
export async function handleValidateDiataxisContent(args: any): Promise<ValidationResult> {
const validator = new ContentAccuracyValidator();
// Add timeout protection to prevent infinite hangs
const timeoutMs = 120000; // 2 minutes
let timeoutHandle: NodeJS.Timeout;
const timeoutPromise = new Promise<ValidationResult>((_, reject) => {
timeoutHandle = setTimeout(() => {
reject(
new Error(
`Validation timed out after ${
timeoutMs / 1000
} seconds. This may be due to a large directory structure. Try validating a smaller subset or specific directory.`,
),
);
}, timeoutMs);
});
const validationPromise = validator.validateContent(args);
try {
const result = await Promise.race([validationPromise, timeoutPromise]);
clearTimeout(timeoutHandle!);
return result;
} catch (error: any) {
clearTimeout(timeoutHandle!);
// Return a partial result with error information
return {
success: false,
confidence: {
overall: 0,
breakdown: {
technologyDetection: 0,
frameworkVersionAccuracy: 0,
codeExampleRelevance: 0,
architecturalAssumptions: 0,
businessContextAlignment: 0,
},
riskFactors: [
{
type: 'high',
category: 'validation',
description: 'Validation process failed or timed out',
impact: 'Unable to complete content validation',
mitigation: 'Try validating a smaller directory or specific subset of files',
},
],
},
issues: [],
uncertainties: [],
recommendations: [
'Validation failed or timed out',
'Consider validating smaller directory subsets',
'Check for very large files or deep directory structures',
`Error: ${error.message}`,
],
nextSteps: [
'Verify the content path is correct and accessible',
'Try validating specific subdirectories instead of the entire project',
'Check for circular symlinks or very deep directory structures',
],
};
}
}
interface GeneralValidationResult {
success: boolean;
linksChecked: number;
brokenLinks: string[];
codeBlocksValidated: number;
codeErrors: string[];
recommendations: string[];
summary: string;
}
export async function validateGeneralContent(args: any): Promise<GeneralValidationResult> {
const {
contentPath,
validationType = 'all',
includeCodeValidation = true,
followExternalLinks = false,
} = args;
const result: GeneralValidationResult = {
success: true,
linksChecked: 0,
brokenLinks: [],
codeBlocksValidated: 0,
codeErrors: [],
recommendations: [],
summary: '',
};
try {
// Get all markdown files
const validator = new ContentAccuracyValidator();
const files = await validator.getMarkdownFiles(contentPath);
// Check links if requested
if (validationType === 'all' || validationType === 'links') {
for (const file of files) {
const content = await fs.readFile(file, 'utf-8');
const links = extractLinksFromMarkdown(content);
for (const link of links) {
result.linksChecked++;
// Skip external links unless explicitly requested
if (link.startsWith('http') && !followExternalLinks) continue;
// Check internal links
if (!link.startsWith('http')) {
const fullPath = path.resolve(path.dirname(file), link);
try {
await fs.access(fullPath);
} catch {
result.brokenLinks.push(`${path.basename(file)}: ${link}`);
result.success = false;
}
}
}
}
}
// Validate code blocks if requested
if (includeCodeValidation && (validationType === 'all' || validationType === 'code')) {
for (const file of files) {
const content = await fs.readFile(file, 'utf-8');
const codeBlocks = extractCodeBlocks(content);
for (const block of codeBlocks) {
result.codeBlocksValidated++;
// Basic syntax validation
if (block.language && block.code.trim()) {
if (block.language === 'javascript' || block.language === 'js') {
try {
// Basic JS syntax check - look for common issues
if (block.code.includes('console.log') && !block.code.includes(';')) {
result.codeErrors.push(`${path.basename(file)}: Missing semicolon in JS code`);
}
} catch (error) {
result.codeErrors.push(`${path.basename(file)}: JS syntax error - ${error}`);
result.success = false;
}
}
}
}
}
}
// Generate recommendations
if (result.brokenLinks.length > 0) {
result.recommendations.push(`Fix ${result.brokenLinks.length} broken internal links`);
result.recommendations.push('Run documentation build to catch additional link issues');
}
if (result.codeErrors.length > 0) {
result.recommendations.push(`Review and fix ${result.codeErrors.length} code syntax issues`);
}
if (result.success) {
result.recommendations.push('Content validation passed - no critical issues found');
}
// Create summary
result.summary = `Validated ${files.length} files, ${result.linksChecked} links, ${
result.codeBlocksValidated
} code blocks. ${
result.success
? 'PASSED'
: `ISSUES FOUND: ${result.brokenLinks.length + result.codeErrors.length}`
}`;
return result;
} catch (error) {
result.success = false;
result.recommendations.push(`Validation failed: ${error}`);
result.summary = `Validation error: ${error}`;
return result;
}
}
// Helper function to extract links from markdown
function extractLinksFromMarkdown(content: string): string[] {
const linkRegex = /\[([^\]]*)\]\(([^)]+)\)/g;
const links: string[] = [];
let match;
while ((match = linkRegex.exec(content)) !== null) {
links.push(match[2]); // The URL part
}
return links;
}
// Helper function to extract code blocks from markdown
function extractCodeBlocks(content: string): { language: string; code: string }[] {
const codeBlockRegex = /```(\w+)?\n([\s\S]*?)```/g;
const blocks: { language: string; code: string }[] = [];
let match;
while ((match = codeBlockRegex.exec(content)) !== null) {
blocks.push({
language: match[1] || 'text',
code: match[2],
});
}
return blocks;
}