Web Research Assistant

Overview Schema Related Servers Score Discussions

comparison.py•16.1 KiB

"""Technology comparison and analysis.""" from __future__ import annotations import asyncio import re from dataclasses import dataclass, field from typing import Any from .config import clamp_text @dataclass class TechInfo: """Information about a technology.""" name: str category: str performance: str | None = None learning_curve: str | None = None ecosystem: str | None = None popularity: str | None = None features: list[str] = field(default_factory=list) use_cases: str | None = None maintenance: str | None = None sources: list[str] = field(default_factory=list) # Default aspects by category CATEGORY_ASPECTS = { "framework": [ "performance", "learning_curve", "ecosystem", "popularity", "features", ], "library": ["performance", "features", "ecosystem", "popularity"], "database": ["performance", "data_model", "scaling", "use_cases", "ecosystem"], "language": ["performance", "learning_curve", "ecosystem", "use_cases"], "tool": ["performance", "features", "ecosystem"], } DEFAULT_ASPECTS = ["performance", "features", "ecosystem", "popularity"] class TechComparator: """Compare technologies based on gathered data.""" def __init__(self, searcher, github_client, registry_client): """Initialize with existing clients. Args: searcher: SearxSearcher instance github_client: GitHubClient instance registry_client: PackageRegistryClient instance """ self.searcher = searcher self.github_client = github_client self.registry_client = registry_client async def gather_info(self, technology: str, category: str, aspects: list[str]) -> TechInfo: """Gather information about a single technology. Uses multiple sources: - Web search for general info and comparisons - GitHub for popularity metrics (if applicable) - Package registries for ecosystem data (if applicable) Args: technology: Name of the technology (e.g., "React") category: Category (framework, database, etc.) aspects: List of aspects to gather (performance, ecosystem, etc.) Returns: TechInfo object with gathered data """ info = TechInfo(name=technology, category=category) # Gather all data in parallel for speed tasks = [] # Task 1: Overview search tasks.append(self._search_overview(info, technology, category)) # Task 2: Search for all aspects in parallel for aspect in aspects: tasks.append(self._search_aspect(info, aspect, technology, category)) # Task 3: GitHub metrics tasks.append(self._gather_github_metrics(info)) # Task 4: Package metrics tasks.append(self._gather_package_metrics(info)) # Run all in parallel await asyncio.gather(*tasks, return_exceptions=True) return info async def _search_overview(self, info: TechInfo, technology: str, category: str) -> None: """Search for overview information.""" overview_query = f"{technology} {category} overview features" try: overview_results = await self.searcher.search( overview_query, category="general", max_results=3 ) if overview_results: for result in overview_results: if result.snippet: if result.url not in info.sources: info.sources.append(result.url) # Extract features features = self._extract_features(result.snippet) info.features.extend(features) except Exception: pass async def _search_aspect( self, info: TechInfo, aspect: str, technology: str, category: str ) -> None: """Search for a specific aspect.""" search_term = self._aspect_to_search_term(aspect, technology, category) try: results = await self.searcher.search(search_term, category="general", max_results=2) if results: aspect_info = self._extract_aspect_info(results, aspect) if aspect_info: setattr(info, aspect, aspect_info) for result in results: if result.url not in info.sources: info.sources.append(result.url) except Exception: pass def _aspect_to_search_term(self, aspect: str, technology: str, category: str) -> str: """Convert aspect to search term. Args: aspect: The aspect to search for technology: Technology name category: Category Returns: Search query string """ # Improved search queries with "vs" to find comparison articles aspect_map = { "performance": f"{technology} {category} performance fast slow", "learning_curve": f"{technology} easy hard learn beginner", "ecosystem": f"{technology} packages libraries plugins ecosystem size", "popularity": f"{technology} popular usage market share", "features": f"{technology} features advantages benefits", "use_cases": f"{technology} best for use cases when to use", "maintenance": f"{technology} active maintained updates", "data_model": f"{technology} data model relational document", "scaling": f"{technology} scale scalability horizontal vertical", } return aspect_map.get(aspect, f"{technology} {aspect} {category}") def _extract_features(self, text: str) -> list[str]: """Extract feature mentions from text. Args: text: Text to extract from Returns: List of feature strings """ features = [] # Look for common feature patterns # "features include:", "key features:", "supports X, Y, Z" patterns = [ r"features?[:\s]+([^.!?]+)", r"supports?[:\s]+([^.!?]+)", r"capabilities?[:\s]+([^.!?]+)", ] for pattern in patterns: matches = re.findall(pattern, text, re.IGNORECASE) for match in matches: # Split on common delimiters items = re.split(r"[,;]", match) for item in items: item = item.strip() if item and len(item) < 100: # Reasonable feature length features.append(item) return features[:5] # Limit to 5 features def _extract_aspect_info(self, results, aspect: str) -> str | None: """Extract information about a specific aspect from search results. Args: results: Search results aspect: The aspect to extract Returns: Extracted information or None """ if not results: return None # Combine snippets from all results snippets = [r.snippet for r in results if r.snippet] if not snippets: return None # Try to extract key sentences related to the aspect aspect_keywords = { "performance": ["fast", "slow", "speed", "performance", "benchmark"], "learning_curve": [ "easy", "hard", "learn", "beginner", "simple", "complex", ], "ecosystem": ["packages", "libraries", "plugins", "ecosystem", "community"], "popularity": ["popular", "used", "adoption", "market"], "features": ["features", "supports", "includes", "offers"], "use_cases": ["best for", "use for", "ideal for", "suited"], "maintenance": ["maintained", "updated", "active", "development"], "data_model": ["relational", "document", "key-value", "graph"], "scaling": ["scale", "scalability", "horizontal", "vertical"], } keywords = aspect_keywords.get(aspect, []) # Find sentences mentioning the aspect relevant_sentences = [] for snippet in snippets: sentences = re.split(r"[.!?]+", snippet) for sentence in sentences: sentence = sentence.strip() if any(keyword in sentence.lower() for keyword in keywords): relevant_sentences.append(sentence) if relevant_sentences: # Join top 2 most relevant sentences text = ". ".join(relevant_sentences[:2]) text = " ".join(text.split()) # Normalize whitespace text = clamp_text(text, 250) # Slightly longer for context return text # Fallback: use first snippet text = snippets[0] text = " ".join(text.split()) text = clamp_text(text, 200) return text if text else None async def _gather_github_metrics(self, info: TechInfo) -> None: """Try to gather GitHub metrics for the technology. Args: info: TechInfo object to update """ name_lower = info.name.lower() # Expanded GitHub repo patterns to try repo_patterns = [ f"{name_lower}/{name_lower}", # react/react f"{name_lower}js/{name_lower}", # vuejs/vue f"{name_lower}js/{name_lower}js", # vite/vitejs f"facebook/{name_lower}", # facebook/react f"{name_lower}org/{name_lower}", # svelte/svelte (svelteorg?) f"{name_lower}-lang/{name_lower}", # rust-lang/rust f"python/{name_lower}", # python/cpython f"golang/{name_lower}", # golang/go f"{name_lower}/{name_lower}-core", f"{name_lower}/{name_lower}.js", ] for pattern in repo_patterns: try: # Split pattern into owner/repo if "/" not in pattern: continue owner, repo = pattern.split("/", 1) repo_info = await self.github_client.get_repo_info(owner, repo) if repo_info and repo_info.stars: # Must have stars to be valid # Add popularity metrics stars = f"{repo_info.stars:,}" forks = f"{repo_info.forks:,}" if repo_info.forks else "0" # Append to existing popularity or create new github_pop = f"GitHub: {stars} stars, {forks} forks" if info.popularity: info.popularity = f"{info.popularity}; {github_pop}" else: info.popularity = github_pop # Add maintenance info if repo_info.last_commit_date: info.maintenance = f"Last commit: {repo_info.last_commit_date}" # Add source if repo_info.url and repo_info.url not in info.sources: info.sources.append(repo_info.url) break # Found it! except Exception: continue async def _gather_package_metrics(self, info: TechInfo) -> None: """Try to gather package registry metrics. Args: info: TechInfo object to update """ # Try npm first (most common for web frameworks) registries = ["npm", "pypi"] for registry in registries: try: pkg_info = None if registry == "npm": pkg_info = await self.registry_client.search_npm(info.name.lower()) elif registry == "pypi": pkg_info = await self.registry_client.search_pypi(info.name.lower()) if pkg_info: # Add download metrics to popularity if pkg_info.downloads: downloads = pkg_info.downloads current_pop = info.popularity or "" info.popularity = f"{current_pop}, {registry.upper()}: {downloads}".strip( ", " ) # Add ecosystem info if pkg_info.dependencies: dep_count = len(pkg_info.dependencies) current_eco = info.ecosystem or "" info.ecosystem = f"{current_eco}, {dep_count} dependencies".strip(", ") break # Found it! except Exception: continue def compare(self, tech_infos: list[TechInfo], aspects: list[str]) -> dict[str, Any]: """Structure comparison from gathered info. Args: tech_infos: List of TechInfo objects aspects: List of aspects to compare Returns: Structured comparison dictionary """ comparison: dict[str, Any] = { "technologies": [t.name for t in tech_infos], "category": tech_infos[0].category if tech_infos else "unknown", "aspects": {}, "summary": {}, "sources": [], } # Build aspect comparison for aspect in aspects: comparison["aspects"][aspect] = {} for tech in tech_infos: value = getattr(tech, aspect, None) if value: if isinstance(value, list): # Join list items comparison["aspects"][aspect][tech.name] = ", ".join(value[:3]) # Max 3 else: comparison["aspects"][aspect][tech.name] = value else: comparison["aspects"][aspect][tech.name] = "Information not found" # Build summaries for tech in tech_infos: summary = self._generate_summary(tech) comparison["summary"][tech.name] = summary # Collect unique sources all_sources = [] for tech in tech_infos: if tech.sources: for source in tech.sources[:2]: # Max 2 sources per tech if source not in all_sources: all_sources.append(source) comparison["sources"] = all_sources[:10] # Max 10 total sources return comparison def _generate_summary(self, tech: TechInfo) -> str: """Generate a best-for summary for a technology. Args: tech: TechInfo object Returns: Summary string """ summary_parts = [] # Analyze gathered data if tech.use_cases: summary_parts.append(tech.use_cases[:80]) # Add features if we have them if tech.features: feature_str = ", ".join(tech.features[:2]) summary_parts.append(f"Key features: {feature_str}") # Add popularity if significant if tech.popularity and "stars" in tech.popularity: summary_parts.append("Popular choice") if summary_parts: return "; ".join(summary_parts)[:150] return "General-purpose solution" def detect_category(technologies: list[str]) -> str: """Auto-detect category from technology names. Args: technologies: List of technology names Returns: Detected category or "auto" """ # Common frameworks frameworks = { "react", "vue", "angular", "svelte", "nextjs", "nuxt", "gatsby", "remix", } # Common databases databases = { "postgresql", "postgres", "mysql", "mongodb", "redis", "sqlite", "cassandra", } # Common languages languages = {"python", "javascript", "typescript", "go", "rust", "java", "c++"} tech_lower = [t.lower() for t in technologies] # Check for frameworks if any(t in frameworks for t in tech_lower): return "framework" # Check for databases if any(t in databases for t in tech_lower): return "database" # Check for languages if any(t in languages for t in tech_lower): return "language" return "library" # Default to library

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/elad12390/web-research-assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

comparison.py•16.1 KiB