Google Search MCP Server

167

source-quality.service.ts•9.23 kB

import { URL } from 'url'; import { SearchResult, WebpageContent } from '../types.js'; export enum SourceType { ACADEMIC = 'academic', OFFICIAL_DOCS = 'official_documentation', NEWS = 'news', BLOG = 'blog', FORUM = 'forum', SOCIAL = 'social_media', COMMERCIAL = 'commercial', UNKNOWN = 'unknown' } export interface SourceQuality { url: string; domain: string; type: SourceType; authority_score: number; // 0.0 - 1.0 recency_score: number; // 0.0 - 1.0 credibility_score: number; // Combined score author?: string; publication_date?: string; last_updated?: string; } export class SourceQualityService { /** * Assess the quality of a source */ assessSource(url: string, content?: WebpageContent | string): SourceQuality { const domain = this.extractDomain(url); const sourceType = this.classifySourceType(url, domain); const authorityScore = this.assessAuthority(domain, sourceType); const recencyScore = this.assessRecency(content); const author = this.extractAuthor(content); const pubDate = this.extractPublicationDate(content); // Combined credibility score (weighted average) const credibilityScore = (authorityScore * 0.6) + (recencyScore * 0.4); return { url, domain, type: sourceType, authority_score: authorityScore, recency_score: recencyScore, credibility_score: credibilityScore, author, publication_date: pubDate }; } /** * Rank sources by credibility */ rankSources(sources: SearchResult[], contents?: Map<string, WebpageContent>): SearchResult[] { const scoredSources = sources.map(source => { const content = contents?.get(source.link); const quality = this.assessSource(source.link, content); return { ...source, quality_score: quality.credibility_score, source_type: quality.type, authority: quality.authority_score }; }); // Sort by quality score (highest first) return scoredSources.sort((a, b) => (b.quality_score || 0) - (a.quality_score || 0)); } /** * Extract domain from URL */ private extractDomain(url: string): string { try { const parsed = new URL(url); return parsed.hostname.replace(/^www\./, ''); } catch { return ''; } } /** * Classify the type of source */ private classifySourceType(url: string, domain: string): SourceType { const domainLower = domain.toLowerCase(); const urlLower = url.toLowerCase(); // Academic if (domainLower.match(/\.edu$|scholar|arxiv|ieee|acm\.org|pubmed|sciencedirect/i)) { return SourceType.ACADEMIC; } // Official documentation if (domainLower.match(/docs\.|documentation|developer|github\.com\/docs|microsoft\.com\/docs|python\.org|mozilla\.org|w3\.org/i)) { return SourceType.OFFICIAL_DOCS; } // News if (domainLower.match(/\.news|times|post|reuters|ap\.org|bbc\.|cnn\.|npr\.org|guardian|wsj|bloomberg/i)) { return SourceType.NEWS; } // Forums/Community if (domainLower.match(/stackoverflow|reddit|forum|discuss|community|hackernews|news\.ycombinator/i)) { return SourceType.FORUM; } // Social media if (domainLower.match(/twitter|linkedin|facebook|instagram|tiktok|x\.com/i)) { return SourceType.SOCIAL; } // Blogs if (domainLower.match(/blog|medium\.com|dev\.to|hashnode|substack/i)) { return SourceType.BLOG; } // Commercial if (domainLower.match(/\.com$/) && !domainLower.match(/github|gitlab/i)) { return SourceType.COMMERCIAL; } return SourceType.UNKNOWN; } /** * Assess domain authority */ private assessAuthority(domain: string, sourceType: SourceType): number { // Base score by source type const typeScores: Record<SourceType, number> = { [SourceType.ACADEMIC]: 0.95, [SourceType.OFFICIAL_DOCS]: 0.90, [SourceType.NEWS]: 0.70, [SourceType.BLOG]: 0.50, [SourceType.FORUM]: 0.45, [SourceType.SOCIAL]: 0.30, [SourceType.COMMERCIAL]: 0.40, [SourceType.UNKNOWN]: 0.35 }; let baseScore = typeScores[sourceType]; // Boost for high-authority domains const highAuthorityDomains = [ 'github.com', 'stackoverflow.com', 'microsoft.com', 'python.org', 'mozilla.org', 'w3.org', 'ietf.org', 'arxiv.org', 'ieee.org', 'acm.org', 'stanford.edu', 'mit.edu', 'nature.com', 'science.org', 'nytimes.com', 'reuters.com', 'bbc.com', 'npr.org' ]; for (const authDomain of highAuthorityDomains) { if (domain.includes(authDomain)) { baseScore = Math.min(1.0, baseScore + 0.1); break; } } // Boost for .gov, .edu, .org domains if (domain.match(/\.gov$/)) { baseScore = Math.min(1.0, baseScore + 0.15); } else if (domain.match(/\.edu$/)) { baseScore = Math.min(1.0, baseScore + 0.10); } else if (domain.match(/\.org$/)) { baseScore = Math.min(1.0, baseScore + 0.05); } return baseScore; } /** * Assess content recency */ private assessRecency(content?: WebpageContent | string): number { if (!content) return 0.5; // Unknown recency const contentStr = typeof content === 'string' ? content : content.content; // Extract year mentions from content const yearMatches = contentStr.match(/\b(20\d{2})\b/g); if (!yearMatches || yearMatches.length === 0) { return 0.5; // Unknown recency } // Get the most recent year mentioned const latestYear = Math.max(...yearMatches.map(y => parseInt(y))); const currentYear = new Date().getFullYear(); const age = currentYear - latestYear; // Score based on age if (age === 0) return 1.0; // Current year if (age === 1) return 0.9; // Last year if (age <= 2) return 0.7; // 2 years old if (age <= 3) return 0.5; // 3 years old if (age <= 5) return 0.3; // 5 years old return 0.1; // Older than 5 years } /** * Extract author from content */ private extractAuthor(content?: WebpageContent | string): string | undefined { if (!content) return undefined; const contentStr = typeof content === 'string' ? content : content.content; // Common patterns for author names const patterns = [ /by\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)/, /author:\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)/i, /written by\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)/i, ]; for (const pattern of patterns) { const match = contentStr.slice(0, 2000).match(pattern); if (match) { return match[1]; } } // Check meta tags if content is WebpageContent if (typeof content !== 'string' && content.meta_tags?.author) { return content.meta_tags.author; } return undefined; } /** * Extract publication date from content */ private extractPublicationDate(content?: WebpageContent | string): string | undefined { if (!content) return undefined; const contentStr = typeof content === 'string' ? content : content.content; // Look for date patterns in first 2000 characters const datePatterns = [ /published[:\s]+(\w+\s+\d{1,2},?\s+\d{4})/i, /(\w+\s+\d{1,2},?\s+\d{4})/, /(\d{4}-\d{2}-\d{2})/, /(\d{1,2}\/\d{1,2}\/\d{4})/ ]; for (const pattern of datePatterns) { const match = contentStr.slice(0, 2000).match(pattern); if (match) { try { // Try to parse the date const date = new Date(match[1]); if (!isNaN(date.getTime())) { return date.toISOString().split('T')[0]; } } catch { continue; } } } return undefined; } /** * Calculate quality metrics for a set of sources */ calculateQualityMetrics(sources: SourceQuality[]): { source_diversity: number; average_authority: number; content_freshness: number; total_sources: number; } { if (sources.length === 0) { return { source_diversity: 0, average_authority: 0, content_freshness: 0, total_sources: 0 }; } // Source diversity (unique domains / total sources) const uniqueDomains = new Set(sources.map(s => s.domain)); const sourceDiversity = uniqueDomains.size / sources.length; // Average authority const averageAuthority = sources.reduce((sum, s) => sum + s.authority_score, 0) / sources.length; // Content freshness (average recency) const contentFreshness = sources.reduce((sum, s) => sum + s.recency_score, 0) / sources.length; return { source_diversity: Math.round(sourceDiversity * 100) / 100, average_authority: Math.round(averageAuthority * 100) / 100, content_freshness: Math.round(contentFreshness * 100) / 100, total_sources: sources.length }; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mixelpixx/Google-Search-MCP-Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server