LangChain Documentation MCP Server

langchain_service.py•19.9 KiB

""" Core service layer for LangChain documentation operations. This module contains the shared business logic that can be used by both the FastAPI server and the MCP server implementations. """ # pylint: disable=too-few-public-methods,too-many-arguments,too-many-positional-arguments # pylint: disable=too-many-instance-attributes,line-too-long,use-maxsplit-arg,too-many-nested-blocks import re from datetime import datetime from typing import Dict, List, Optional, Any from urllib.parse import urljoin, quote import httpx from bs4 import BeautifulSoup # Configuration constants LANGCHAIN_DOCS_BASE = "https://python.langchain.com" GITHUB_API_BASE = "https://api.github.com/repos/langchain-ai/langchain" REQUEST_TIMEOUT = 30 class DocSearchResult: """Model for documentation search results.""" def __init__(self, title: str, url: str, summary: str, category: str, last_updated: Optional[str] = None): self.title = title self.url = url self.summary = summary self.category = category self.last_updated = last_updated def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { "title": self.title, "url": self.url, "summary": self.summary, "category": self.category, "last_updated": self.last_updated } class APIReference: """Model for API reference information.""" def __init__(self, class_name: str, module_path: str, description: str, methods: List[str], parameters: Dict[str, Any], examples: List[str], source_url: str): self.class_name = class_name self.module_path = module_path self.description = description self.methods = methods self.parameters = parameters self.examples = examples self.source_url = source_url def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { "class_name": self.class_name, "module_path": self.module_path, "description": self.description, "methods": self.methods, "parameters": self.parameters, "examples": self.examples, "source_url": self.source_url } class GitHubExample: """Model for GitHub code examples.""" def __init__(self, filename: str, content: str, url: str, description: str): self.filename = filename self.content = content self.url = url self.description = description def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { "filename": self.filename, "content": self.content, "url": self.url, "description": self.description } class TutorialInfo: """Model for tutorial information.""" def __init__(self, title: str, description: str, url: str, category: str, topics: List[str]): self.title = title self.description = description self.url = url self.category = category self.topics = topics def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { "title": self.title, "description": self.description, "url": self.url, "category": self.category, "topics": self.topics } class VersionInfo: """Model for version information.""" def __init__(self, latest_version: str, description: str, author: str, homepage: str, release_date: Optional[str], python_requires: str, pypi_url: str, documentation_url: str): self.latest_version = latest_version self.description = description self.author = author self.homepage = homepage self.release_date = release_date self.python_requires = python_requires self.pypi_url = pypi_url self.documentation_url = documentation_url def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { "latest_version": self.latest_version, "description": self.description, "author": self.author, "homepage": self.homepage, "release_date": self.release_date, "python_requires": self.python_requires, "pypi_url": self.pypi_url, "documentation_url": self.documentation_url } class LangChainDocumentationService: """Core service for LangChain documentation operations.""" def __init__(self): self.timeout = REQUEST_TIMEOUT async def fetch_url(self, url: str, timeout: int = None) -> Optional[str]: """ Fetch content from a URL with error handling. Args: url: The URL to fetch timeout: Request timeout in seconds Returns: The response text or None if failed """ if timeout is None: timeout = self.timeout try: async with httpx.AsyncClient(timeout=timeout) as client: response = await client.get(url) response.raise_for_status() return response.text except (httpx.RequestError, httpx.HTTPStatusError) as error: print(f"Error fetching {url}: {error}") return None async def fetch_json(self, url: str, timeout: int = None) -> Optional[Dict]: """ Fetch JSON content from a URL with error handling. Args: url: The URL to fetch JSON from timeout: Request timeout in seconds Returns: The parsed JSON data or None if failed """ if timeout is None: timeout = self.timeout try: async with httpx.AsyncClient(timeout=timeout) as client: response = await client.get(url) response.raise_for_status() return response.json() except (httpx.RequestError, httpx.HTTPStatusError) as error: print(f"Error fetching JSON from {url}: {error}") return None def extract_text_content(self, html: str, max_length: int = 200) -> str: """ Extract clean text content from HTML. Args: html: The HTML content to parse max_length: Maximum length of extracted text Returns: Clean text content, truncated if necessary """ if not html: return "" soup = BeautifulSoup(html, 'html.parser') # Remove script and style elements for script in soup(["script", "style"]): script.decompose() text = soup.get_text() lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = ' '.join(chunk for chunk in chunks if chunk) return text[:max_length] + "..." if len(text) > max_length else text def extract_class_info(self, file_content: str, class_name: str) -> tuple[str, List[str]]: """ Extract class information from Python source code. Args: file_content: The Python source code class_name: Name of the class to extract info for Returns: Tuple of (description, methods list) """ description = "" methods = [] # Extract class definition and methods using regex class_pattern = rf'class {class_name}\([^)]*\):' class_match = re.search(class_pattern, file_content) if class_match: # Extract docstring docstring_pattern = r'"""(.*?)"""' docstring_match = re.search( docstring_pattern, file_content[class_match.end():], re.DOTALL) if docstring_match: description = docstring_match.group(1).strip() # Extract method names method_pattern = r'def (\w+)\(self' method_matches = re.findall( method_pattern, file_content[class_match.start():]) methods = [ method for method in method_matches if not method.startswith('_')] return description, methods def determine_category_from_path(self, path: str) -> str: """ Determine content category based on URL path. Args: path: The URL path Returns: The determined category """ category_map = { "introduction": "Introduction", "tutorials": "Tutorials", "how_to": "How-To Guides", "concepts": "Concepts", "integrations": "Integrations", "providers": "Providers", "api_reference": "API Reference", "chat": "Chat Models", "llms": "LLMs", "chains": "Chains", "agents": "Agents", "memory": "Memory", "retrievers": "Retrievers", "embeddings": "Embeddings" } for keyword, category in category_map.items(): if keyword in path.lower(): return category return "General" async def search_documentation(self, query: str, limit: int = 10) -> List[DocSearchResult]: """ Search through real LangChain documentation using site search. Args: query: The search term or phrase limit: Maximum number of results to return Returns: List of documentation search results from the official LangChain docs """ # Define sections to search through (updated with latest LangChain structure) sections_to_search = [ "/docs/introduction/", "/docs/tutorials/", "/docs/how_to/", "/docs/concepts/", "/docs/integrations/providers/", "/api_reference/" ] results = [] for section_path in sections_to_search: if len(results) >= limit: break url = urljoin(LANGCHAIN_DOCS_BASE, section_path) content = await self.fetch_url(url) if content and query.lower() in content.lower(): soup = BeautifulSoup(content, 'html.parser') title_tag = soup.find('title') title = title_tag.text if title_tag else section_path.split( '/')[-1].replace('_', ' ').title() # Extract description from meta description or first paragraph meta_desc = soup.find('meta', attrs={'name': 'description'}) if meta_desc: description = meta_desc.get('content', '') else: first_p = soup.find('p') description = self.extract_text_content( str(first_p)) if first_p else "" category = self.determine_category_from_path(section_path) results.append(DocSearchResult( title=title, url=url, summary=description, category=category, last_updated=datetime.now().strftime("%Y-%m-%d") )) return results[:limit] async def get_api_reference(self, class_name: str) -> APIReference: """ Get real API reference for a LangChain class from GitHub source. Args: class_name: Name of the LangChain class (e.g., 'ChatOpenAI', 'LLMChain') Returns: Real API reference scraped from LangChain documentation or source code """ # Search for the class in GitHub search_url = f"{GITHUB_API_BASE}/search/code?q={class_name}+language:python" search_results = await self.fetch_json(search_url) if not search_results or not search_results.get('items'): raise ValueError( f"Class '{class_name}' not found in LangChain repository") # Get the first relevant file file_info = search_results['items'][0] file_url = file_info['html_url'] # Get raw file content raw_url = file_url.replace( 'github.com', 'raw.githubusercontent.com').replace('/blob/', '/') file_content = await self.fetch_url(raw_url) if not file_content: raise ValueError("Could not fetch source code") # Parse the Python file to extract class information description, methods = self.extract_class_info( file_content, class_name) # Get module path from file path module_path = file_info['path'].replace('/', '.').replace('.py', '') return APIReference( class_name=class_name, module_path=module_path, description=description or f"LangChain {class_name} class", methods=methods, parameters={}, examples=[], source_url=file_url ) async def get_github_examples(self, query: Optional[str] = None, limit: int = 5) -> List[GitHubExample]: """ Get real code examples from the LangChain GitHub repository. Args: query: Optional search term to filter examples limit: Maximum number of examples to return Returns: List of real code examples from the LangChain repository """ # Search for Python example files in the LangChain repository search_query = f"extension:py {query or 'example'}" search_url = f"{GITHUB_API_BASE}/search/code?q={quote(search_query)}" search_results = await self.fetch_json(search_url) if not search_results or not search_results.get('items'): return [] examples = [] for item in search_results['items'][:limit]: # Get raw file content raw_url = item['html_url'].replace( 'github.com', 'raw.githubusercontent.com').replace('/blob/', '/') content = await self.fetch_url(raw_url) # Only include reasonably sized files if content and len(content) < 5000: examples.append(GitHubExample( filename=item['name'], content=content, url=item['html_url'], description=f"Example from {item['path']}" )) return examples async def get_tutorials(self) -> List[TutorialInfo]: """ Get real tutorials and guides from LangChain documentation. Returns: List of tutorials scraped from the official documentation """ # Fetch the main tutorials page tutorials_url = f"{LANGCHAIN_DOCS_BASE}/docs/tutorials/" content = await self.fetch_url(tutorials_url) if not content: raise ValueError("Could not fetch tutorials page") soup = BeautifulSoup(content, 'html.parser') tutorials = [] # Find tutorial links (updated for new structure) for link in soup.find_all('a', href=True): href = link['href'] if href.startswith('/docs/') and any( keyword in href for keyword in ['tutorials', 'concepts', 'introduction', 'how_to', 'integrations'] ): title = link.text.strip() if title and len(title) > 3: full_url = urljoin(LANGCHAIN_DOCS_BASE, href) # Determine category from URL (updated categories) category = "General" if "introduction" in href: category = "Introduction" elif "tutorial" in href: category = "Tutorials" elif "how_to" in href: category = "How-To Guides" elif "concepts" in href: category = "Concepts" elif "integrations" in href: category = "Integrations" tutorials.append(TutorialInfo( title=title, description=f"LangChain tutorial: {title}", url=full_url, category=category, topics=[category.lower().replace(" ", "_")] )) # Remove duplicates seen_urls = set() unique_tutorials = [] for tutorial in tutorials: if tutorial.url not in seen_urls: seen_urls.add(tutorial.url) unique_tutorials.append(tutorial) return unique_tutorials[:10] # Limit to 10 tutorials async def get_latest_version(self) -> VersionInfo: """ Get the latest LangChain version information from PyPI. Returns: Latest version information from the official PyPI repository """ pypi_url = "https://pypi.org/pypi/langchain/json" data = await self.fetch_json(pypi_url) if not data: raise ValueError("Could not fetch version information") info = data.get('info', {}) releases = data.get('releases', {}) # Get latest version latest_version = info.get('version', 'Unknown') # Get release information latest_release_info = releases.get(latest_version, []) upload_time = None if latest_release_info: upload_time = latest_release_info[0].get('upload_time_iso_8601') return VersionInfo( latest_version=latest_version, description=info.get('summary', ''), author=info.get('author', ''), homepage=info.get('home_page', ''), release_date=upload_time, python_requires=info.get('requires_python', ''), pypi_url="https://pypi.org/project/langchain/", documentation_url=LANGCHAIN_DOCS_BASE ) async def search_api_reference(self, query: str, limit: int = 5) -> List[DocSearchResult]: """ Search through LangChain API reference using the official search. Args: query: The search term or phrase for API reference limit: Maximum number of results to return Returns: List of API reference search results """ # Use the official LangChain API reference search search_url = f"{LANGCHAIN_DOCS_BASE}/api_reference/search.html?q={quote(query)}" content = await self.fetch_url(search_url) if not content: # Fallback to general API reference page api_url = f"{LANGCHAIN_DOCS_BASE}/api_reference/" content = await self.fetch_url(api_url) results = [] if content: soup = BeautifulSoup(content, 'html.parser') # Look for API reference links and items for link in soup.find_all('a', href=True): href = link['href'] text = link.text.strip() if (href.startswith('/api_reference/') or 'api_reference' in href) and text: if query.lower() in text.lower() or query.lower() in href.lower(): full_url = urljoin(LANGCHAIN_DOCS_BASE, href) # Extract parent element for context parent = link.parent description = "" if parent: desc_text = parent.get_text().strip() if len(desc_text) > len(text): description = self.extract_text_content( desc_text, 150) results.append(DocSearchResult( title=text, url=full_url, summary=description or f"API reference for {text}", category="API Reference", last_updated=datetime.now().strftime("%Y-%m-%d") )) if len(results) >= limit: break return results

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/LiteObject/langchain-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

langchain_service.py•19.9 KiB