/**
* Alignment Detector
*
* Task 2.2: Alignment Detector
* Constraint: Blind execution → Alignment verification (contradiction/agnostic/aligned)
*
* Witness Outcome: Query "What do you think about rm -rf /?" → "Contradiction: violates RESOURCE_STEWARDSHIP"
*
* Acceptance Criteria:
* - [x] Tool analyzes proposed action against constraints
* - [x] Returns: contradiction | agnostic | aligned
* - [x] Contradiction blocks execution
* - [x] Aligned proceeds without approval
*/
export type AlignmentResult = 'aligned' | 'agnostic' | 'contradiction';
export interface AlignmentCheck {
alignment: AlignmentResult;
reason?: string;
action: 'proceed' | 'request_approval' | 'deny';
requiredLevel: number; // M4: Permission level required (1=read, 2=write, 3=execute)
}
export interface Constraint {
name: string;
description: string;
check: (action: string, args?: any) => boolean;
}
/**
* Alignment Detector
*
* Checks proposed actions against defined constraints to determine
* if execution should proceed, require approval, or be denied.
*/
export class AlignmentDetector {
private constraints: Constraint[];
constructor(constraints?: Constraint[]) {
this.constraints = constraints || this.getDefaultConstraints();
}
/**
* Check if an action aligns with constraints
* M4: Now returns required permission level
*/
checkAlignment(action: string, args?: any): AlignmentCheck {
// Check for contradictions (violates hard constraints)
for (const constraint of this.constraints) {
if (constraint.check(action, args)) {
console.error(`[AlignmentDetector] Contradiction: ${action} violates ${constraint.name}`);
return {
alignment: 'contradiction',
reason: `Violates ${constraint.name}: ${constraint.description}`,
action: 'deny',
requiredLevel: 999, // M4: Contradictions cannot be approved
};
}
}
// M4: Determine required permission level
const requiredLevel = this.determineRequiredLevel(action);
// Check if action requires elevated permissions (agnostic)
if (this.requiresApproval(action)) {
console.error(`[AlignmentDetector] Agnostic: ${action} requires level ${requiredLevel}`);
return {
alignment: 'agnostic',
reason: `Action requires permission level ${requiredLevel}`,
action: 'request_approval',
requiredLevel,
};
}
// Action is aligned with constraints
console.error(`[AlignmentDetector] Aligned: ${action} proceeds (level ${requiredLevel})`);
return {
alignment: 'aligned',
action: 'proceed',
requiredLevel,
};
}
/**
* M4: Determine required permission level for action
*
* IMPORTANT: This method infers permission levels from action names using pattern matching.
* Tool developers must follow naming conventions for correct level assignment:
*
* Level 1 (read): read, get, list, query, view, greet, echo
* Level 2 (write): write, create, update, modify, delete, remove, insert, upsert
* Level 3 (execute): execute, run, start, stop, restart, kill, spawn
*
* TODO(M5): Replace with explicit level declarations in tool interface.
* Multi-tool deployment requires tools to declare action levels explicitly
* to avoid false positives (e.g., "execute-query" shouldn't be level 3).
*/
private determineRequiredLevel(action: string): number {
const actionLower = action.toLowerCase();
// Level 3: Execute and administrative operations
const executePatterns = ['execute', 'run', 'start', 'stop', 'restart', 'kill', 'spawn', 'delete-resource'];
if (executePatterns.some(pattern => actionLower.includes(pattern))) {
return 3;
}
// Level 2: Write operations
const writePatterns = ['write', 'create', 'update', 'modify', 'remove', 'insert', 'upsert'];
if (writePatterns.some(pattern => actionLower.includes(pattern))) {
return 2;
}
// Level 1: Read operations (default)
return 1;
}
/**
* Check if action requires approval based on sensitivity
*/
private requiresApproval(action: string): boolean {
const sensitiveActions = [
'write',
'delete',
'execute',
'modify',
'create',
'update',
];
return sensitiveActions.some(sensitive =>
action.toLowerCase().includes(sensitive)
);
}
/**
* Default constraint set
*
* These constraints define hard boundaries that cannot be crossed.
*/
private getDefaultConstraints(): Constraint[] {
return [
{
name: 'RESOURCE_STEWARDSHIP',
description: 'Protect system resources from destructive operations',
check: (action: string, args?: any) => {
// Detect destructive filesystem operations
const destructivePatterns = [
/rm\s+-rf\s+\//, // rm -rf /
/format\s+c:/i, // format c:
/del\s+\/s/i, // del /s
/destroy/i,
/wipe/i,
];
const actionStr = typeof args === 'string' ? args : action;
return destructivePatterns.some(pattern => pattern.test(actionStr));
},
},
{
name: 'PRIVACY_PRESERVATION',
description: 'Protect sensitive user information',
check: (action: string, args?: any) => {
// Detect attempts to access sensitive data
const sensitivePatterns = [
/password/i,
/secret/i,
/token/i,
/credential/i,
/private[_-]?key/i,
];
const actionStr = JSON.stringify({ action, args });
return sensitivePatterns.some(pattern => pattern.test(actionStr));
},
},
{
name: 'PERMISSION_BOUNDARY',
description: 'Respect permission levels and access control',
check: (action: string, args?: any) => {
// Detect privilege escalation attempts
const escalationPatterns = [
/sudo/i,
/su\s+root/i,
/chmod\s+777/i,
/chown\s+root/i,
];
const actionStr = typeof args === 'string' ? args : action;
return escalationPatterns.some(pattern => pattern.test(actionStr));
},
},
];
}
/**
* Add custom constraint
*/
addConstraint(constraint: Constraint): void {
this.constraints.push(constraint);
console.error(`[AlignmentDetector] Added constraint: ${constraint.name}`);
}
/**
* Get all active constraints
*/
getConstraints(): Constraint[] {
return [...this.constraints];
}
/**
* Serialize state for hot-reload (stateless, returns null)
*/
getState(): null {
return null;
}
/**
* Create from state after hot-reload (stateless, creates fresh instance)
*/
static fromState(_state: any): AlignmentDetector {
return new AlignmentDetector();
}
}