Skip to main content
Glama
navigation.service.ts•16.1 kB
import { BrowsingSessionService } from './browsing-session.service.js'; import { EnhancedContentExtractor } from './enhanced-content-extractor.service.js'; import { NavigationOptions, ExtractedLink, BrowsingPath, FollowLinksParams, FollowLinksResult, EnhancedWebpageContent } from '../types/enhanced-types.js'; /** * NavigationService * * Provides contextual navigation capabilities that simulate how humans browse the web. * This service can follow relevant links, navigate back and forth between pages, * and identify relationships between content across different pages. */ export class NavigationService { private browsingSessionService: BrowsingSessionService; private contentExtractor: EnhancedContentExtractor; constructor( browsingSessionService: BrowsingSessionService, contentExtractor: EnhancedContentExtractor ) { this.browsingSessionService = browsingSessionService; this.contentExtractor = contentExtractor; } /** * Follow links from a starting URL based on relevance to keywords */ public async followLinks( sessionId: string, params: FollowLinksParams ): Promise<FollowLinksResult> { const { url, keywords, maxLinksToFollow, depth, stayOnDomain } = params; // Create or get a session let session = this.browsingSessionService.getSession(sessionId); if (!session) { session = this.browsingSessionService.createSession(keywords?.join(', ')); } // Visit the starting URL // Use enhanced content extractor directly for better extraction const startContent = await this.contentExtractor.extractEnhancedContent(url); // Then add it to the session await this.browsingSessionService.visitUrl(sessionId, url); // Initialize result const result: FollowLinksResult = { startUrl: url, pagesVisited: [{ url, title: startContent.title, relevance: 1.0, summary: startContent.summary || this.generatePageSummary(startContent) }], navigationPath: [url], relatedTopics: [] }; if (depth <= 0 || maxLinksToFollow <= 0) { return result; } // Use the links we already extracted from the starting page const extractedLinks = startContent.links || []; console.error(`Found ${extractedLinks.length} links on starting page`); if (extractedLinks.length === 0) { console.error('No links found on starting page, returning early'); return result; } console.error(`Sample links: ${extractedLinks.slice(0, 5).map(l => `${l.text} -> ${l.url}`).join(', ')}`); // If we have keywords, filter links by relevance const options: NavigationOptions = { followLinks: true, maxDepth: depth, relevanceThreshold: 0.1, // Lower threshold to be less restrictive filterByKeywords: keywords, excludeDomains: [], includeDomains: stayOnDomain ? [new URL(url).hostname] : [] }; // Score and filter the links we already have const scoredLinks = extractedLinks.map(link => { // Convert relative URLs to absolute URLs let absoluteUrl = link.url; try { if (!link.url.startsWith('http')) { absoluteUrl = new URL(link.url, url).href; } } catch (error) { console.error(`Failed to convert URL ${link.url} to absolute:`, error); } return { ...link, url: absoluteUrl, relevanceScore: this.calculateLinkRelevance({...link, url: absoluteUrl}, keywords || [], startContent) }; }); console.error(`Scored ${scoredLinks.length} links. Scores: ${scoredLinks.slice(0, 10).map(l => `${l.text}:${l.relevanceScore.toFixed(2)}`).join(', ')}`); // Filter by relevance threshold and other criteria let filteredLinks = scoredLinks.filter(link => link.relevanceScore >= options.relevanceThreshold ); console.error(`After relevance filtering (>=${options.relevanceThreshold}): ${filteredLinks.length} links`); // Apply domain filtering if specified if (options.includeDomains && options.includeDomains.length > 0) { filteredLinks = filteredLinks.filter(link => { try { const linkDomain = new URL(link.url).hostname; return options.includeDomains!.some(domain => linkDomain.includes(domain)); } catch { return false; } }); } // Apply keyword filtering if specified if (options.filterByKeywords && options.filterByKeywords.length > 0) { filteredLinks = filteredLinks.filter(link => { const textToCheck = `${link.text} ${link.context || ''}`.toLowerCase(); return options.filterByKeywords!.some(keyword => textToCheck.includes(keyword.toLowerCase()) ); }); } // Sort links by relevance const sortedLinks = filteredLinks.sort((a, b) => (b.relevanceScore || 0) - (a.relevanceScore || 0) ); console.error(`After filtering: ${sortedLinks.length} relevant links (scores: ${sortedLinks.slice(0, 5).map(l => l.relevanceScore.toFixed(2)).join(', ')})`); // Only follow the top N links const linksToFollow = sortedLinks.slice(0, maxLinksToFollow); // Follow each link up to the specified depth const visitedUrls = new Set<string>([url]); // Follow links recursively await this.followLinksRecursive( sessionId, linksToFollow, visitedUrls, depth - 1, options, result, url ); // Extract related topics from all visited pages result.relatedTopics = this.extractRelatedTopics(result.pagesVisited); return result; } /** * Recursively follow links up to the specified depth */ private async followLinksRecursive( sessionId: string, links: ExtractedLink[], visitedUrls: Set<string>, remainingDepth: number, options: NavigationOptions, result: FollowLinksResult, parentUrl: string ): Promise<void> { console.error(`Following ${links.length} links at depth ${remainingDepth}`); if (remainingDepth <= 0 || links.length === 0) { return; } for (const link of links) { if (visitedUrls.has(link.url)) { continue; } try { // Use enhanced content extractor to get content and links from this specific page const content = await this.contentExtractor.extractEnhancedContent(link.url); // Add to visited URLs visitedUrls.add(link.url); // Add to result result.pagesVisited.push({ url: link.url, title: content.title, relevance: this.calculateRelevanceScore(content, options.filterByKeywords || []), summary: content.summary || this.generatePageSummary(content) }); result.navigationPath.push(link.url); // If we have more depth to go, extract and analyze links from THIS page if (remainingDepth >= 1 && content.links && content.links.length > 0) { console.error(`Processing ${content.links.length} links from ${link.url} at depth ${remainingDepth}`); // Calculate relevance scores for all links const scoredLinks = content.links.map(pageLink => ({ ...pageLink, relevanceScore: this.calculateLinkRelevance(pageLink, options.filterByKeywords || [], content) })); // Filter links based on options let pageLinks = scoredLinks; // Apply domain filtering if specified if (options.includeDomains && options.includeDomains.length > 0) { pageLinks = pageLinks.filter(pageLink => { try { const linkDomain = new URL(pageLink.url).hostname; return options.includeDomains!.some(domain => linkDomain.includes(domain)); } catch { return false; } }); } // Apply keyword filtering if specified if (options.filterByKeywords && options.filterByKeywords.length > 0) { pageLinks = pageLinks.filter(pageLink => { const textToCheck = `${pageLink.text} ${pageLink.context || ''}`.toLowerCase(); return options.filterByKeywords!.some(keyword => textToCheck.includes(keyword.toLowerCase()) ); }); } // Apply relevance threshold if (options.relevanceThreshold && options.relevanceThreshold > 0) { pageLinks = pageLinks.filter(pageLink => pageLink.relevanceScore >= options.relevanceThreshold! ); } // Sort by relevance and limit to top 3 for deeper exploration const sortedPageLinks = pageLinks .sort((a, b) => b.relevanceScore - a.relevanceScore) .slice(0, Math.min(3, Math.max(1, 4 - remainingDepth))); // Fewer links at deeper levels console.error(`Following ${sortedPageLinks.length} relevant links from ${link.url} (scores: ${sortedPageLinks.map(l => l.relevanceScore.toFixed(2)).join(', ')})`); // Recursively follow these links if (sortedPageLinks.length > 0) { await this.followLinksRecursive( sessionId, sortedPageLinks, visitedUrls, remainingDepth - 1, options, result, link.url ); } } // Also add to browsing session for tracking (after processing links) await this.browsingSessionService.visitUrl(sessionId, link.url, parentUrl); } catch (error) { console.error(`Error following link ${link.url}:`, error); } } } /** * Generate navigation paths between two URLs */ public async findNavigationPaths( sessionId: string, fromUrl: string, toUrl: string ): Promise<BrowsingPath[]> { const session = this.browsingSessionService.getSession(sessionId); if (!session) { throw new Error(`Session ${sessionId} not found`); } // Get paths from the browsing session service return this.browsingSessionService.generateNavigationPaths(sessionId, fromUrl, toUrl); } /** * Get suggested links from a page based on context */ public async getSuggestedLinks( sessionId: string, url: string, context?: string, maxSuggestions: number = 5 ): Promise<ExtractedLink[]> { const session = this.browsingSessionService.getSession(sessionId); if (!session) { throw new Error(`Session ${sessionId} not found`); } // Get content if not already visited if (session.currentUrl !== url) { // Use enhanced content extractor directly for better extraction const content = await this.contentExtractor.extractEnhancedContent(url); // Then add it to the session await this.browsingSessionService.visitUrl(sessionId, url); } // Get relevant links const options: NavigationOptions = { followLinks: true, maxDepth: 1, relevanceThreshold: 0.2, filterByKeywords: context ? context.split(/\s+/) : undefined }; const { links } = await this.browsingSessionService.followRelevantLinks(sessionId, options); // Sort by relevance and return top N return links .sort((a, b) => (b.relevanceScore || 0) - (a.relevanceScore || 0)) .slice(0, maxSuggestions); } /** * Extract related topics from visited pages */ private extractRelatedTopics(pagesVisited: Array<{ url: string; title: string; relevance: number; summary: string; }>): string[] { // This is a placeholder implementation // A real implementation would analyze content and extract common themes const topics = new Set<string>(); pagesVisited.forEach(page => { // Extract keywords from title const titleWords = page.title .toLowerCase() .replace(/[^\w\s]/g, '') .split(/\s+/) .filter(word => word.length > 3); titleWords.forEach(word => topics.add(word)); }); return Array.from(topics).slice(0, 10); } /** * Generate a summary of a webpage */ private generatePageSummary(content: EnhancedWebpageContent): string { // Simple implementation - use the first few sentences if (content.content) { const sentences = content.content.split(/(?<=[.!?])\s+/); const firstSentences = sentences.slice(0, 3).join(' '); return firstSentences + (sentences.length > 3 ? '...' : ''); } return `Summary of ${content.title}`; } /** * Calculate relevance score for page content based on keywords */ private calculateRelevanceScore(content: EnhancedWebpageContent, keywords: string[]): number { if (keywords.length === 0) return 0.7; // Default good score if no keywords const textToAnalyze = `${content.title} ${content.description} ${content.content || ''}`.toLowerCase(); let score = 0; let totalKeywords = keywords.length; keywords.forEach(keyword => { const keywordLower = keyword.toLowerCase(); const regex = new RegExp(`\\b${keywordLower}\\b`, 'gi'); const matches = textToAnalyze.match(regex); if (matches) { // Score based on frequency and position let keywordScore = Math.min(matches.length * 0.1, 0.3); // Up to 0.3 for frequency // Bonus for title matches if (content.title.toLowerCase().includes(keywordLower)) { keywordScore += 0.2; } // Bonus for description matches if (content.description && content.description.toLowerCase().includes(keywordLower)) { keywordScore += 0.1; } score += keywordScore; } }); // Normalize by number of keywords and cap at 1.0 return Math.min(score / totalKeywords, 1.0); } /** * Calculate relevance score for individual links */ private calculateLinkRelevance(link: ExtractedLink, keywords: string[], parentContent: EnhancedWebpageContent): number { if (keywords.length === 0) return 0.6; // Default score if no keywords const linkText = `${link.text} ${link.context || ''}`.toLowerCase(); let score = 0; keywords.forEach(keyword => { const keywordLower = keyword.toLowerCase(); // Direct text match if (linkText.includes(keywordLower)) { score += 0.3; } // URL keyword match if (link.url.toLowerCase().includes(keywordLower)) { score += 0.2; } // Context relevance if (link.context && link.context.toLowerCase().includes(keywordLower)) { score += 0.2; } }); // Bonus for internal links (same domain) try { const linkDomain = new URL(link.url).hostname; const parentDomain = new URL(parentContent.url).hostname; if (linkDomain === parentDomain) { score += 0.1; } } catch { // Ignore URL parsing errors } // Penalty for very short link text (likely navigation) if (link.text.length < 5) { score *= 0.5; } // Bonus for longer, descriptive text if (link.text.length > 20) { score += 0.1; } return Math.min(score / keywords.length, 1.0); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mixelpixx/Google-Research-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server