/**
* Auto-Flag Content Detection System
* Conservative approach: Flag for review, never auto-delete
*
* Detects potentially problematic content based on:
* - Spam patterns (repeated posts, excessive links)
* - Hate speech (slurs, targeted harassment)
* - Excessive profanity
* - Suspicious patterns (ALL CAPS, excessive punctuation)
*/
import type { UserProfile } from '@/types/forum';
export interface FlagResult {
shouldFlag: boolean;
priority: 'critical' | 'high' | 'normal' | 'low';
reason: string;
confidence: number; // 0.0 to 1.0
checks: CheckResult[];
}
interface CheckResult {
type: string;
triggered: boolean;
confidence: number;
details?: string;
}
// Conservative keyword lists (only clear violations)
const HATE_SPEECH_KEYWORDS: string[] = [
// Slurs and clear hate speech
// Note: Implementation should use more comprehensive lists
// This is a minimal example for demonstration
];
const SPAM_PATTERNS = [
/\b(buy now|click here|limited offer|act now)\b/gi,
/\b(viagra|cialis|casino|lottery)\b/gi,
/http[s]?:\/\/[^\s]{3,}/gi, // URL pattern
];
const PROFANITY_KEYWORDS: string[] = [
// Excessive profanity detection (3+ in one post)
// Note: Implementation should use more comprehensive lists
];
/**
* Main auto-flagging function
* Returns flag decision with priority and confidence
*/
export function autoFlagContent(content: string, author?: UserProfile): FlagResult {
const checks: CheckResult[] = [];
// Run all checks
checks.push(checkSpam(content));
checks.push(checkHateSpeech(content));
checks.push(checkProfanity(content));
checks.push(checkSuspiciousPatterns(content));
if (author) {
checks.push(checkAuthorBehavior(author));
}
// Aggregate results
const triggeredChecks = checks.filter((c) => c.triggered);
// No violations detected
if (triggeredChecks.length === 0) {
return {
shouldFlag: false,
priority: 'low',
reason: '',
confidence: 0,
checks,
};
}
// Determine priority and confidence
const maxConfidence = Math.max(...triggeredChecks.map((c) => c.confidence));
const avgConfidence =
triggeredChecks.reduce((sum, c) => sum + c.confidence, 0) / triggeredChecks.length;
// Conservative approach: Only flag if high confidence or multiple checks fail
let shouldFlag = false;
let priority: 'critical' | 'high' | 'normal' | 'low' = 'normal';
if (maxConfidence >= 0.9 || triggeredChecks.length >= 3) {
shouldFlag = true;
priority = maxConfidence >= 0.9 ? 'critical' : 'high';
} else if (maxConfidence >= 0.7) {
shouldFlag = true;
priority = 'high';
} else if (triggeredChecks.length >= 2) {
shouldFlag = true;
priority = 'normal';
}
// Build reason string
const reasons = triggeredChecks.map((c) => c.details || c.type).join(', ');
return {
shouldFlag,
priority,
reason: `Auto-flagged: ${reasons}`,
confidence: avgConfidence,
checks,
};
}
/**
* Check for spam patterns
*/
function checkSpam(content: string): CheckResult {
let spamScore = 0;
const details: string[] = [];
// Check for excessive links (>3 URLs)
const urlMatches = content.match(/http[s]?:\/\/[^\s]+/gi) || [];
if (urlMatches.length > 3) {
spamScore += 0.3;
details.push(`${urlMatches.length} URLs`);
}
// Check for spam keywords
for (const pattern of SPAM_PATTERNS) {
if (pattern.test(content)) {
spamScore += 0.2;
details.push('spam keywords');
break;
}
}
// Check for repeated characters (e.g., "!!!!!" or "????")
const repeatedChars = content.match(/(.)\1{5,}/g);
if (repeatedChars && repeatedChars.length > 0) {
spamScore += 0.1;
details.push('repeated characters');
}
const confidence = Math.min(spamScore, 1.0);
return {
type: 'spam',
triggered: confidence > 0.5,
confidence,
details: details.length > 0 ? details.join(', ') : undefined,
};
}
/**
* Check for hate speech
* Conservative: Only flag clear slurs and targeted harassment
*/
function checkHateSpeech(content: string): CheckResult {
let hateScore = 0;
const details: string[] = [];
// Check for hate speech keywords
const lowerContent = content.toLowerCase();
for (const keyword of HATE_SPEECH_KEYWORDS) {
if (lowerContent.includes(keyword)) {
hateScore = 1.0; // Instant high confidence for clear slurs
details.push('hate speech detected');
break;
}
}
// Check for targeted harassment patterns (e.g., "you are [slur]")
const harassmentPatterns = [
/you (are|is) (a |an )?(stupid|idiot|moron)/i,
/kill yourself/i,
/kys\b/i,
];
for (const pattern of harassmentPatterns) {
if (pattern.test(content)) {
hateScore = Math.max(hateScore, 0.8);
details.push('targeted harassment');
break;
}
}
return {
type: 'hate_speech',
triggered: hateScore > 0.7,
confidence: hateScore,
details: details.length > 0 ? details.join(', ') : undefined,
};
}
/**
* Check for excessive profanity
* Only flag if >3 profane words in one post
*/
function checkProfanity(content: string): CheckResult {
const lowerContent = content.toLowerCase();
let profanityCount = 0;
for (const word of PROFANITY_KEYWORDS) {
const regex = new RegExp(`\\b${word}\\b`, 'gi');
const matches = lowerContent.match(regex);
if (matches) {
profanityCount += matches.length;
}
}
// Conservative: Only flag if excessive (>3 words)
const triggered = profanityCount > 3;
const confidence = triggered ? Math.min(profanityCount / 5, 1.0) : 0;
return {
type: 'profanity',
triggered,
confidence,
details: triggered ? `${profanityCount} profane words` : undefined,
};
}
/**
* Check for suspicious patterns
*/
function checkSuspiciousPatterns(content: string): CheckResult {
let suspicionScore = 0;
const details: string[] = [];
// ALL CAPS (>70% uppercase, >20 characters)
const alphaChars = content.replace(/[^a-zA-Z]/g, '');
if (alphaChars.length > 20) {
const uppercaseChars = content.replace(/[^A-Z]/g, '');
const uppercaseRatio = uppercaseChars.length / alphaChars.length;
if (uppercaseRatio > 0.7) {
suspicionScore += 0.3;
details.push('excessive caps');
}
}
// Excessive punctuation (!!!!, ????)
const punctuationCount = (content.match(/[!?]{3,}/g) || []).length;
if (punctuationCount > 3) {
suspicionScore += 0.2;
details.push('excessive punctuation');
}
// Very short low-effort posts (<10 characters, not a reply)
if (content.trim().length < 10) {
suspicionScore += 0.1;
details.push('very short content');
}
const confidence = Math.min(suspicionScore, 1.0);
return {
type: 'suspicious_patterns',
triggered: confidence > 0.4,
confidence,
details: details.length > 0 ? details.join(', ') : undefined,
};
}
/**
* Check author behavior patterns
* (e.g., new account, low reputation)
*/
function checkAuthorBehavior(author: UserProfile): CheckResult {
let behaviorScore = 0;
const details: string[] = [];
// Very new account (<24 hours old)
const accountAge = Date.now() - new Date(author.created_at).getTime();
const hoursOld = accountAge / (1000 * 60 * 60);
if (hoursOld < 24) {
behaviorScore += 0.2;
details.push('new account');
}
// Low reputation (<0)
if (author.reputation_score < 0) {
behaviorScore += 0.3;
details.push('negative reputation');
}
// No profile information (bot indicator)
if (!author.display_name && !author.bio && !author.avatar_url) {
behaviorScore += 0.1;
details.push('no profile');
}
const confidence = Math.min(behaviorScore, 1.0);
return {
type: 'author_behavior',
triggered: confidence > 0.5,
confidence,
details: details.length > 0 ? details.join(', ') : undefined,
};
}
/**
* Get human-readable explanation of flag result
*/
export function getFlagExplanation(result: FlagResult): string {
if (!result.shouldFlag) {
return 'Content passed all checks';
}
const triggeredChecks = result.checks.filter((c) => c.triggered);
const checkNames = triggeredChecks.map((c) => c.type.replace(/_/g, ' ')).join(', ');
return `Flagged for review (${result.priority} priority, ${Math.round(result.confidence * 100)}% confidence): ${checkNames}`;
}