/**
* Fact extraction from content
*/
import type { MemoryType } from '../types/index.js';
/**
* Check if content qualifies as a fact
*/
export function isFact(content: string): boolean {
const trimmed = content.trim();
// Too short
if (trimmed.length < 5) {
return false;
}
// Questions
if (/^\s*\w+\s+(is|are|do|does|can|could|would|should|will|has|have)\s+.*\?/i.test(trimmed)) {
return false;
}
// Commands
if (/^(please|kindly|can you|could you|would you)\s+/i.test(trimmed)) {
return false;
}
// Greetings
if (/^(hello|hi|hey|goodbye|bye|thanks|thank you)/i.test(trimmed)) {
return false;
}
// Temporary dialogue
if (/^(let me|i think|maybe|perhaps|possibly)/i.test(trimmed)) {
return false;
}
// Seems like a fact
return true;
}
/**
* Classify memory type based on content
*/
export function classifyMemoryType(content: string, entities: string[]): MemoryType {
const trimmed = content.toLowerCase();
// Relationship indicators
const relationshipPatterns = [
/\b(depends on|requires|needs|uses|extends|implements)\b/i,
/\b(reports to|works with|manages|leads|owns)\b/i,
/\b(caused by|results in|leads to|triggers)\b/i,
/\b(related to|associated with|connected to|linked to)\b/i,
/\b(is a|is an|part of|member of|belongs to)\b/i,
];
const hasRelationship = relationshipPatterns.some((pattern) => pattern.test(trimmed));
if (hasRelationship && entities.length >= 2) {
return 'relationship';
}
// Entity indicators (description of a single thing)
const entityPatterns = [
/^([A-Z][a-zA-Z\s]+)\s+-\s+/, // "Name - description"
/^([A-Z][a-zA-Z\s]+)\s+is\s+(a|an)\s+/i, // "Name is a..."
/\b(person|organization|company|team|project|tool|library|framework)\b/i,
];
const hasEntityIndicator = entityPatterns.some((pattern) => pattern.test(trimmed));
if (hasEntityIndicator && entities.length === 1) {
return 'entity';
}
// Default to fact
return 'fact';
}
/**
* Detect user preferences in content
*/
export function isUserPreference(content: string): boolean {
const preferencePatterns = [
/\b(prefer|prefers|like|likes|want|wants|choose|chooses|favor|favors)\b/i,
/\b(my preference|my choice|i use|i usually|i typically)\b/i,
/\b(always|never|usually|typically|generally)\s+(use|uses|do|does)\b/i,
];
return preferencePatterns.some((pattern) => pattern.test(content));
}
/**
* Detect explicit vs implicit facts
*/
export function isExplicit(content: string): boolean {
// Explicit facts use definite language
const explicitPatterns = [
/\b(is|are|was|were|will be|has|have|must|shall)\b/i,
/\b(definitely|certainly|absolutely|clearly|obviously)\b/i,
/\b(always|never|every|all|none)\b/i,
];
// Implicit facts use hedging language
const implicitPatterns = [
/\b(might|may|could|possibly|perhaps|maybe|probably)\b/i,
/\b(seems|appears|looks like|suggests|indicates)\b/i,
/\b(i think|i believe|i guess|in my opinion)\b/i,
];
const hasExplicit = explicitPatterns.some((pattern) => pattern.test(content));
const hasImplicit = implicitPatterns.some((pattern) => pattern.test(content));
// Explicit if has explicit patterns and no implicit patterns
return hasExplicit && !hasImplicit;
}
/**
* Calculate content complexity (0-1)
*/
export function calculateComplexity(content: string): number {
let complexity = 0;
// Length factor (longer = more complex)
const length = content.length;
if (length > 100) complexity += 0.3;
else if (length > 50) complexity += 0.2;
else if (length > 20) complexity += 0.1;
// Word count factor
const words = content.split(/\s+/).length;
if (words > 20) complexity += 0.2;
else if (words > 10) complexity += 0.1;
// Technical terms (capitalized words, acronyms, technical patterns)
const technicalTerms = content.match(/\b[A-Z][a-z]+|[A-Z]{2,}|[a-z]+\.[a-z]+\(\)/g) || [];
if (technicalTerms.length > 5) complexity += 0.3;
else if (technicalTerms.length > 2) complexity += 0.2;
else if (technicalTerms.length > 0) complexity += 0.1;
// Numbers, dates, specific details
const specifics = content.match(/\b\d+|v\d+\.\d+|\d{4}-\d{2}-\d{2}\b/g) || [];
if (specifics.length > 0) complexity += 0.2;
return Math.min(complexity, 1.0);
}
/**
* Normalize content for storage
*/
export function normalizeContent(content: string): string {
return content
.trim()
.replace(/\s+/g, ' ') // Normalize whitespace
.replace(/\n+/g, ' ') // Remove newlines
.replace(/[""]/g, '"') // Normalize quotes
.replace(/['']/g, "'"); // Normalize apostrophes
}
/**
* Validate content before storage
*/
export interface ValidationResult {
valid: boolean;
errors: string[];
warnings: string[];
}
export function validateContent(content: string, type: MemoryType): ValidationResult {
const errors: string[] = [];
const warnings: string[] = [];
// Check length
if (content.length < 5) {
errors.push('Content too short (minimum 5 characters)');
}
if (content.length > 10000) {
errors.push('Content too long (maximum 10,000 characters)');
}
// Check if it's actually a fact
if (!isFact(content)) {
warnings.push('Content may not be a factual statement');
}
// Type-specific validation
if (type === 'entity') {
// Entities should have a clear name
if (!/^[A-Z]/.test(content)) {
warnings.push('Entity content should start with a capital letter');
}
}
if (type === 'relationship') {
// Relationships should mention connection
const hasConnection = /\b(depend|require|need|use|extend|implement|report|work|manage|lead|own|cause|result|lead|trigger|relate|associate|connect|link|is|part|member|belong)\w*\b/i.test(content);
if (!hasConnection) {
warnings.push('Relationship content should describe a connection');
}
}
return {
valid: errors.length === 0,
errors,
warnings,
};
}