MCP Localization Project

Overview Schema Related Servers Score Discussions

context7_docs.py•22.8 kB

#!/usr/bin/env python3 """ Context7 MCP 로컬화 - 라이브러리 문서 가져오기 Context7 호환 ID로부터 라이브러리 문서를 검색하고 제공하는 기능 주요 기능: - get_library_docs: 라이브러리 문서 검색 및 반환 - 다중 데이터 소스 지원 (로컬, 웹, API) - 지능형 캐싱 및 문서 처리 - 토큰 제한 및 최적화 """ import asyncio import aiohttp import aiofiles import json import time import re from typing import Dict, List, Optional, Any, Union from dataclasses import dataclass from pathlib import Path import logging from urllib.parse import urlparse, urljoin import xml.etree.ElementTree as ET from context7_base import Context7Database, LibraryInfo, LibraryDocs, Context7Utils logger = logging.getLogger(__name__) @dataclass class DocSource: """문서 소스 정보""" source_type: str # local, github, npm, pypi, web url: str priority: int # 우선순위 (낮을수록 높은 우선순위) format: str # markdown, html, json, xml @dataclass class DocResult: """문서 검색 결과""" library_id: str content: str topic: str tokens: int source: str metadata: Dict[str, Any] success: bool class LibraryDocsProvider: """라이브러리 문서 제공자""" def __init__(self, db: Context7Database): """ 문서 제공자 초기화 Args: db: Context7 데이터베이스 인스턴스 """ self.db = db self.session = None self.doc_sources = self._initialize_doc_sources() self.local_docs_path = Path("local_docs") self.local_docs_path.mkdir(exist_ok=True) def _initialize_doc_sources(self) -> Dict[str, List[DocSource]]: """문서 소스 초기화""" return { # JavaScript/TypeScript 라이브러리 "/axios/axios": [ DocSource("github", "https://raw.githubusercontent.com/axios/axios/main/README.md", 1, "markdown"), DocSource("npm", "https://registry.npmjs.org/axios", 2, "json"), DocSource("web", "https://axios-http.com/docs/intro", 3, "html") ], "/facebook/react": [ DocSource("github", "https://raw.githubusercontent.com/facebook/react/main/README.md", 1, "markdown"), DocSource("web", "https://react.dev/learn", 2, "html"), DocSource("npm", "https://registry.npmjs.org/react", 3, "json") ], "/vuejs/vue": [ DocSource("github", "https://raw.githubusercontent.com/vuejs/core/main/README.md", 1, "markdown"), DocSource("web", "https://vuejs.org/guide/", 2, "html"), DocSource("npm", "https://registry.npmjs.org/vue", 3, "json") ], "/expressjs/express": [ DocSource("github", "https://raw.githubusercontent.com/expressjs/express/master/Readme.md", 1, "markdown"), DocSource("web", "https://expressjs.com/en/starter/installing.html", 2, "html"), DocSource("npm", "https://registry.npmjs.org/express", 3, "json") ], # Python 라이브러리 "/psf/requests": [ DocSource("github", "https://raw.githubusercontent.com/psf/requests/main/README.md", 1, "markdown"), DocSource("web", "https://requests.readthedocs.io/en/latest/", 2, "html"), DocSource("pypi", "https://pypi.org/pypi/requests/json", 3, "json") ], "/numpy/numpy": [ DocSource("github", "https://raw.githubusercontent.com/numpy/numpy/main/README.md", 1, "markdown"), DocSource("web", "https://numpy.org/doc/stable/", 2, "html"), DocSource("pypi", "https://pypi.org/pypi/numpy/json", 3, "json") ], "/pallets/flask": [ DocSource("github", "https://raw.githubusercontent.com/pallets/flask/main/README.md", 1, "markdown"), DocSource("web", "https://flask.palletsprojects.com/en/3.0.x/", 2, "html"), DocSource("pypi", "https://pypi.org/pypi/flask/json", 3, "json") ], # Go 라이브러리 "/gin-gonic/gin": [ DocSource("github", "https://raw.githubusercontent.com/gin-gonic/gin/master/README.md", 1, "markdown"), DocSource("web", "https://gin-gonic.com/docs/", 2, "html") ], # Rust 라이브러리 "/serde-rs/serde": [ DocSource("github", "https://raw.githubusercontent.com/serde-rs/serde/master/README.md", 1, "markdown"), DocSource("web", "https://serde.rs/", 2, "html") ], } async def get_session(self) -> aiohttp.ClientSession: """HTTP 세션 가져오기""" if self.session is None or self.session.closed: timeout = aiohttp.ClientTimeout(total=30) headers = { 'User-Agent': 'Context7-Local/1.0 (Library Documentation Fetcher)' } self.session = aiohttp.ClientSession(timeout=timeout, headers=headers) return self.session async def close_session(self): """HTTP 세션 닫기""" if self.session and not self.session.closed: await self.session.close() async def get_library_docs( self, library_id: str, topic: str = "", tokens: int = 10000 ) -> DocResult: """ 라이브러리 문서 가져오기 Args: library_id: Context7 호환 라이브러리 ID topic: 특정 주제 (선택사항) tokens: 최대 토큰 수 Returns: DocResult: 문서 검색 결과 """ logger.info(f"📚 라이브러리 문서 검색 시작: {library_id} (topic: {topic})") # 1. 캐시된 문서 확인 cached_docs = await self.db.get_cached_docs(library_id, topic) if cached_docs: logger.info(f"📄 캐시된 문서 사용: {library_id}") return DocResult( library_id=library_id, content=cached_docs.content, topic=topic, tokens=cached_docs.tokens, source=f"cache ({cached_docs.source})", metadata={"cached": True, "timestamp": cached_docs.timestamp}, success=True ) # 2. 라이브러리 ID 검증 if not Context7Utils.validate_library_id(library_id): return DocResult( library_id=library_id, content="", topic=topic, tokens=0, source="error", metadata={"error": "Invalid library ID format"}, success=False ) # 3. 로컬 문서 확인 local_result = await self._get_local_docs(library_id, topic) if local_result.success: await self._cache_result(local_result) return local_result # 4. 웹에서 문서 가져오기 web_result = await self._get_web_docs(library_id, topic, tokens) if web_result.success: await self._cache_result(web_result) await self._save_local_docs(web_result) # 로컬 저장 return web_result # 5. 기본 정보 생성 return await self._generate_fallback_docs(library_id, topic) async def _get_local_docs(self, library_id: str, topic: str) -> DocResult: """로컬 문서 가져오기""" try: # 라이브러리 ID를 파일 경로로 변환 safe_id = library_id.replace("/", "_").replace(":", "_") local_file = self.local_docs_path / f"{safe_id}.md" if local_file.exists(): async with aiofiles.open(local_file, 'r', encoding='utf-8') as f: content = await f.read() # 주제별 필터링 if topic: content = self._filter_content_by_topic(content, topic) # 토큰 제한 적용 content = Context7Utils.sanitize_content(content) tokens = len(content.split()) * 1.3 # 토큰 근사치 logger.info(f"📁 로컬 문서 발견: {library_id}") return DocResult( library_id=library_id, content=content, topic=topic, tokens=int(tokens), source="local", metadata={"file_path": str(local_file)}, success=True ) except Exception as e: logger.warning(f"⚠️ 로컬 문서 읽기 실패: {e}") return DocResult(library_id, "", topic, 0, "local", {}, False) async def _get_web_docs(self, library_id: str, topic: str, max_tokens: int) -> DocResult: """웹에서 문서 가져오기""" sources = self.doc_sources.get(library_id, []) if not sources: # 기본 소스 생성 시도 sources = self._generate_default_sources(library_id) session = await self.get_session() for source in sorted(sources, key=lambda x: x.priority): try: logger.info(f"🌐 문서 소스 시도: {source.url}") async with session.get(source.url) as response: if response.status == 200: content = await response.text() # 형식에 따른 처리 processed_content = await self._process_content( content, source.format, topic, max_tokens ) if processed_content: tokens = len(processed_content.split()) * 1.3 logger.info(f"✅ 웹 문서 가져오기 성공: {source.url}") return DocResult( library_id=library_id, content=processed_content, topic=topic, tokens=int(tokens), source=f"web ({source.source_type})", metadata={ "url": source.url, "format": source.format, "status": response.status }, success=True ) except Exception as e: logger.warning(f"⚠️ 웹 문서 가져오기 실패 ({source.url}): {e}") continue return DocResult(library_id, "", topic, 0, "web", {}, False) async def _process_content( self, content: str, format: str, topic: str, max_tokens: int ) -> str: """콘텐츠 형식별 처리""" try: if format == "json": # JSON 데이터에서 유용한 정보 추출 data = json.loads(content) processed = self._extract_from_json(data, topic) elif format == "html": # HTML에서 텍스트 추출 processed = self._extract_from_html(content, topic) elif format == "markdown": # 마크다운 처리 processed = self._process_markdown(content, topic) elif format == "xml": # XML 처리 processed = self._extract_from_xml(content, topic) else: # 일반 텍스트 processed = content # 주제별 필터링 if topic and processed: processed = self._filter_content_by_topic(processed, topic) # 토큰 제한 적용 processed = Context7Utils.sanitize_content(processed, max_tokens) return processed except Exception as e: logger.error(f"❌ 콘텐츠 처리 실패: {e}") return "" def _extract_from_json(self, data: Dict[str, Any], topic: str) -> str: """JSON 데이터에서 정보 추출""" parts = [] # 기본 정보 if "name" in data: parts.append(f"# {data['name']}") if "description" in data: parts.append(f"\n## Description\n{data['description']}") if "version" in data: parts.append(f"\n**Version:** {data['version']}") # 설치 정보 if "name" in data: parts.append(f"\n## Installation\n```bash\nnpm install {data['name']}\n```") # README 내용 if "readme" in data: parts.append(f"\n## Documentation\n{data['readme'][:5000]}") # 길이 제한 # 키워드 if "keywords" in data and isinstance(data["keywords"], list): keywords = ", ".join(data["keywords"][:10]) parts.append(f"\n**Keywords:** {keywords}") # 의존성 if "dependencies" in data: deps = list(data["dependencies"].keys())[:5] if deps: parts.append(f"\n**Main Dependencies:** {', '.join(deps)}") return "\n".join(parts) def _extract_from_html(self, html: str, topic: str) -> str: """HTML에서 텍스트 추출 (간단한 처리)""" # HTML 태그 제거 import re # 스크립트와 스타일 제거 html = re.sub(r'<script.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE) html = re.sub(r'<style.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE) # HTML 태그 제거 text = re.sub(r'<[^>]+>', '', html) # 연속 공백 정리 text = re.sub(r'\s+', ' ', text) return text.strip() def _process_markdown(self, markdown: str, topic: str) -> str: """마크다운 처리""" # 기본적으로 마크다운은 그대로 반환 return markdown.strip() def _extract_from_xml(self, xml: str, topic: str) -> str: """XML에서 텍스트 추출""" try: root = ET.fromstring(xml) # XML에서 텍스트 추출 def extract_text(element): text = element.text or "" for child in element: text += " " + extract_text(child) if child.tail: text += " " + child.tail return text.strip() return extract_text(root) except Exception: return xml # XML 파싱 실패 시 원본 반환 def _filter_content_by_topic(self, content: str, topic: str) -> str: """주제별 콘텐츠 필터링""" if not topic: return content topic_lower = topic.lower() lines = content.split('\n') filtered_lines = [] # 주제 관련 섹션 찾기 in_relevant_section = False section_level = 0 for line in lines: line_lower = line.lower() # 헤딩 체크 if line.startswith('#'): current_level = len(line) - len(line.lstrip('#')) if topic_lower in line_lower: in_relevant_section = True section_level = current_level filtered_lines.append(line) elif in_relevant_section and current_level <= section_level: # 같은 레벨이나 상위 레벨 헤딩이면 관련 섹션 종료 in_relevant_section = False elif in_relevant_section: filtered_lines.append(line) elif in_relevant_section: filtered_lines.append(line) elif topic_lower in line_lower: # 주제 관련 라인 추가 filtered_lines.append(line) # 필터된 내용이 너무 적으면 원본 일부 반환 filtered_content = '\n'.join(filtered_lines) if len(filtered_content) < 100: return content[:5000] # 원본의 처음 부분 반환 return filtered_content def _generate_default_sources(self, library_id: str) -> List[DocSource]: """기본 소스 생성""" # library_id에서 org와 project 추출 parts = library_id.strip('/').split('/') if len(parts) >= 2: org, project = parts[0], parts[1] return [ DocSource("github", f"https://raw.githubusercontent.com/{org}/{project}/main/README.md", 1, "markdown"), DocSource("github", f"https://raw.githubusercontent.com/{org}/{project}/master/README.md", 2, "markdown"), DocSource("npm", f"https://registry.npmjs.org/{project}", 3, "json"), DocSource("pypi", f"https://pypi.org/pypi/{project}/json", 4, "json"), ] return [] async def _cache_result(self, result: DocResult): """결과를 캐시에 저장""" if result.success and result.content: docs = LibraryDocs( library_id=result.library_id, content=result.content, topic=result.topic, tokens=result.tokens, timestamp=time.time(), source=result.source ) await self.db.cache_docs(docs) async def _save_local_docs(self, result: DocResult): """웹에서 가져온 문서를 로컬에 저장""" if result.success and result.content: try: safe_id = result.library_id.replace("/", "_").replace(":", "_") local_file = self.local_docs_path / f"{safe_id}.md" async with aiofiles.open(local_file, 'w', encoding='utf-8') as f: await f.write(result.content) logger.info(f"💾 로컬 문서 저장: {local_file}") except Exception as e: logger.warning(f"⚠️ 로컬 문서 저장 실패: {e}") async def _generate_fallback_docs(self, library_id: str, topic: str) -> DocResult: """기본 정보 생성""" # 라이브러리 정보 조회 library_info = await self.db.search_libraries(library_id.split('/')[-1], limit=1) if library_info: lib = library_info[0] content_parts = [ f"# {lib.name}", f"\n**Library ID:** {library_id}", f"\n**Description:** {lib.description}", f"\n**Version:** {lib.version}", f"\n**Trust Score:** {lib.trust_score}/10", f"\n**Code Snippets Available:** {lib.code_snippets}", f"\n\n*Note: This is basic information from local database. For complete documentation, please check the official sources.*" ] if topic: content_parts.append(f"\n\n**Requested Topic:** {topic}") content_parts.append("\n*Topic-specific documentation was not found in available sources.*") content = "".join(content_parts) tokens = len(content.split()) * 1.3 return DocResult( library_id=library_id, content=content, topic=topic, tokens=int(tokens), source="fallback", metadata={"generated": True}, success=True ) # 최소한의 정보 content = f"# Library Documentation\n\n**Library ID:** {library_id}\n\n*Documentation not available in local sources.*" return DocResult( library_id=library_id, content=content, topic=topic, tokens=20, source="minimal", metadata={"error": "No documentation sources available"}, success=False ) # 테스트 함수 async def test_library_docs(): """라이브러리 문서 제공자 테스트""" print("🧪 라이브러리 문서 제공자 테스트 시작...") # 데이터베이스 및 문서 제공자 초기화 db = Context7Database("test_docs.db") docs_provider = LibraryDocsProvider(db) # 테스트 라이브러리 추가 test_library = LibraryInfo( library_id="/axios/axios", name="axios", description="Promise based HTTP client for the browser and node.js", version="1.6.2", trust_score=9.5, code_snippets=200 ) await db.add_library(test_library) # 테스트 케이스들 test_cases = [ ("/axios/axios", ""), # 기본 문서 ("/axios/axios", "usage"), # 특정 주제 ("/facebook/react", ""), # 다른 라이브러리 ("/nonexistent/lib", ""), # 존재하지 않는 라이브러리 ] for library_id, topic in test_cases: print(f"\n📚 테스트: {library_id} (topic: {topic or 'none'})") result = await docs_provider.get_library_docs(library_id, topic, tokens=5000) print(f"✅ 성공: {result.success}") print(f"📄 소스: {result.source}") print(f"🔢 토큰: {result.tokens}") print(f"📝 내용 길이: {len(result.content)} 문자") if result.content: # 내용의 첫 200자 출력 preview = result.content[:200].replace('\n', ' ') print(f"👀 미리보기: {preview}...") if result.metadata: print(f"📊 메타데이터: {result.metadata}") # 세션 정리 await docs_provider.close_session() print("\n🎯 라이브러리 문서 제공자 테스트 완료!") if __name__ == "__main__": asyncio.run(test_library_docs())

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Skynotdie/mky'

If you have feedback or need assistance with the MCP directory API, please join our Discord server