import { getPageInstance } from '../browser-manager.js';
export interface SmartSelectorGeneratorArgs {
url?: string;
description: string;
context?: string;
}
export interface ContentClassificationArgs {
url?: string;
categories?: { name: string; keywords: string[] }[];
}
/**
* Smart Selector Generator - AI-powered CSS selector generation (Heuristic)
*/
export async function handleSmartSelectorGenerator(args: SmartSelectorGeneratorArgs) {
const { url, description, context } = args;
try {
const page = getPageInstance();
if (!page) {
throw new Error('Browser not initialized. Call browser_init first.');
}
if (url && page.url() !== url) {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
}
const result = await page.evaluate((desc: string, ctx: string) => {
const elements = Array.from(document.querySelectorAll('*'));
const scores: any[] = [];
const keywords = desc.toLowerCase().split(/\s+/);
elements.forEach((el: Element) => {
let score = 0;
const text = el.textContent?.toLowerCase() || '';
const tag = el.tagName.toLowerCase();
const className = el.className || '';
const id = el.id || '';
// Score based on text content matching
keywords.forEach(keyword => {
if (text.includes(keyword)) score += 10;
if (typeof className === 'string' && className.includes(keyword)) score += 5;
if (id.includes(keyword)) score += 5;
});
// Score based on context
if (ctx) {
const contextKeywords = ctx.toLowerCase().split(/\s+/);
contextKeywords.forEach(keyword => {
if (text.includes(keyword)) score += 3;
});
}
// Prefer semantic elements
const semanticTags = ['button', 'input', 'a', 'nav', 'header', 'footer', 'article', 'section'];
if (semanticTags.includes(tag)) score += 2;
if (score > 0) {
// Generate selector
let selector = tag;
if (id) selector = `#${CSS.escape(id)}`;
else if (typeof className === 'string' && className.trim()) selector = `${tag}.${CSS.escape(className.trim().split(/\s+/)[0])}`;
scores.push({ selector, score, text: text.substring(0, 100), element: tag });
}
});
// Sort by score
scores.sort((a, b) => b.score - a.score);
return {
bestMatch: scores[0] || null,
alternatives: scores.slice(1, 6),
totalCandidates: scores.length
};
}, description, context || '');
const resultText = `🤖 Smart Selector Generated\n\nBest Match: ${JSON.stringify(result.bestMatch, null, 2)}\nAlternatives: ${JSON.stringify(result.alternatives, null, 2)}\nTotal Candidates: ${result.totalCandidates}`;
return { content: [{ type: 'text', text: resultText }] };
} catch (error: any) {
return { content: [{ type: 'text', text: `❌ Error: ${error.message}` }], isError: true };
}
}
/**
* Content Classification - Classify webpage content into categories
*/
export async function handleContentClassification(args: ContentClassificationArgs) {
const { url, categories } = args;
try {
const page = getPageInstance();
if (!page) {
throw new Error('Browser not initialized. Call browser_init first.');
}
if (url && page.url() !== url) {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
}
const content = await page.evaluate(() => {
return {
title: document.title,
text: document.body.innerText,
metaDescription: document.querySelector('meta[name="description"]')?.getAttribute('content') || '',
metaKeywords: document.querySelector('meta[name="keywords"]')?.getAttribute('content') || '',
headings: Array.from(document.querySelectorAll('h1, h2, h3')).map(h => h.textContent).join(' ')
};
});
const allText = [content.title, content.text, content.metaDescription, content.metaKeywords, content.headings].join(' ').toLowerCase();
// Define default categories if not provided
const defaultCategories = [
{ name: 'E-commerce', keywords: ['shop', 'buy', 'cart', 'price', 'product', 'order', 'checkout'] },
{ name: 'News', keywords: ['news', 'article', 'breaking', 'report', 'journalist', 'story'] },
{ name: 'Blog', keywords: ['blog', 'post', 'author', 'comment', 'share', 'subscribe'] },
{ name: 'Social Media', keywords: ['follow', 'like', 'share', 'post', 'friend', 'profile'] },
{ name: 'Educational', keywords: ['learn', 'course', 'tutorial', 'education', 'study', 'lesson'] },
{ name: 'Entertainment', keywords: ['video', 'movie', 'music', 'game', 'play', 'watch'] },
{ name: 'Business', keywords: ['business', 'company', 'service', 'enterprise', 'solution'] },
{ name: 'Technology', keywords: ['tech', 'software', 'app', 'code', 'developer', 'api'] }
];
const categoriesToUse = categories || defaultCategories;
const scores = categoriesToUse.map((cat: any) => {
// Handle both string categories (from inputs) and object categories (from default)
const isString = typeof cat === 'string';
const name = isString ? cat : cat.name;
let keywords: string[] = [];
if (isString) {
// For simple string categories, use the category name itself as the keyword
keywords = [name.toLowerCase()];
} else {
keywords = Array.isArray(cat.keywords) ? cat.keywords : cat.keywords.split(',');
}
let score = 0;
keywords.forEach((keyword: string) => {
const regex = new RegExp(keyword.trim().toLowerCase(), 'g');
const matches = allText.match(regex);
score += matches ? matches.length : 0;
});
return { category: name, score };
});
scores.sort((a, b) => b.score - a.score);
const match = scores[0];
const totalScore = scores.reduce((sum, s) => sum + s.score, 0) || 1;
const confidence = match.score / totalScore;
const resultText = `✅ Content Classification\n\nPrimary Category: ${match.category} (Score: ${match.score})\nConfidence: ${(confidence * 100).toFixed(2)}%\n\nAll Categories:\n${JSON.stringify(scores.slice(0, 5), null, 2)}`;
return {
content: [{ type: 'text', text: resultText }]
};
} catch (error: any) {
return { content: [{ type: 'text', text: `❌ Error: ${error.message}` }], isError: true };
}
}