// Command extract service for US1 (T036)
// Extracts shell commands from markdown and classifies risk
import { classifyRisk } from '../core/risk-classify.mjs';
import { ValidationError } from '../utils/errors.mjs';
export function createCommandExtractService({ logger }) {
logger = logger || { log: () => {} };
function extractCommands(chunks) {
if (!Array.isArray(chunks)) {
throw new ValidationError('chunks array required');
}
const allCommands = [];
for (const chunk of chunks) {
const commands = extractFromChunk(chunk);
allCommands.push(...commands.map(cmd => ({
command: cmd,
source: {
docId: chunk.docId,
chunkId: chunk.id,
heading: chunk.heading
}
})));
}
// Classify commands using risk classifier
const commandTexts = allCommands.map(c => c.command);
const { risks, safeOps } = classifyRisk(commandTexts.join('\n'));
const classified = {
safe: [],
risky: []
};
for (const cmdObj of allCommands) {
const isRisky = risks.some(risk =>
cmdObj.command.toLowerCase().includes(risk.toLowerCase())
);
if (isRisky) {
classified.risky.push({
...cmdObj,
rollbackHint: 'VERIFY_ROLLBACK_MANUALLY'
});
} else {
classified.safe.push(cmdObj);
}
}
logger.log('commands.extracted', {
total: allCommands.length,
safe: classified.safe.length,
risky: classified.risky.length
});
return classified;
}
function extractFromChunk(chunk) {
if (!chunk?.text) return [];
const commands = [];
// Extract code blocks
const codeBlocks = chunk.text.match(/```[\s\S]*?```/g) || [];
for (const block of codeBlocks) {
const cleaned = block.replace(/```\w*\n?|```/g, '').trim();
if (cleaned) {
// Split multi-line commands
const lines = cleaned.split('\n').map(l => l.trim()).filter(Boolean);
commands.push(...lines);
}
}
// Extract shell prompt lines
const shellLines = chunk.text.match(/^\s*[$#]\s+.+$/gm) || [];
for (const line of shellLines) {
const cleaned = line.replace(/^\s*[$#]\s+/, '').trim();
if (cleaned) commands.push(cleaned);
}
return commands.filter(Boolean);
}
return Object.freeze({ extractCommands });
}
export default { createCommandExtractService };