Skip to main content
Glama
Skynotdie

MCP Localization Project

by Skynotdie
real_context7_wrapper.py17.2 kB
#!/usr/bin/env python3 """ # 실제 Context7 래퍼 구현 # 실시간으로 최신 라이브러리 문서를 인터넷에서 가져와서 로컬 캐시에 저장 """ import aiohttp import asyncio import json import sqlite3 import time import hashlib from typing import Dict, List, Any, Optional from dataclasses import dataclass import logging from pathlib import Path import re from urllib.parse import urljoin, urlparse from bs4 import BeautifulSoup @dataclass class LibraryInfo: """라이브러리 정보""" name: str version: str docs_url: str github_url: Optional[str] = None npm_url: Optional[str] = None pypi_url: Optional[str] = None class RealContext7Wrapper: """실제 Context7 기능을 구현한 래퍼""" def __init__(self, cache_db: str = "real_context7_cache.db"): self.cache_db = cache_db self.logger = logging.getLogger("RealContext7Wrapper") self.session: Optional[aiohttp.ClientSession] = None self._init_cache_db() # 라이브러리별 문서 URL 패턴 self.doc_patterns = { 'javascript': { 'react': 'https://reactjs.org/docs/', 'vue': 'https://vuejs.org/guide/', 'angular': 'https://angular.io/docs', 'express': 'https://expressjs.com/en/api.html', 'axios': 'https://axios-http.com/docs/intro', 'lodash': 'https://lodash.com/docs/', 'moment': 'https://momentjs.com/docs/', 'jquery': 'https://api.jquery.com/', }, 'python': { 'django': 'https://docs.djangoproject.com/en/stable/', 'flask': 'https://flask.palletsprojects.com/en/latest/', 'fastapi': 'https://fastapi.tiangolo.com/', 'requests': 'https://requests.readthedocs.io/en/latest/', 'numpy': 'https://numpy.org/doc/stable/', 'pandas': 'https://pandas.pydata.org/docs/', 'matplotlib': 'https://matplotlib.org/stable/contents.html', 'tensorflow': 'https://www.tensorflow.org/api_docs/python', 'pytorch': 'https://pytorch.org/docs/stable/', }, 'java': { 'spring': 'https://docs.spring.io/spring-framework/docs/current/reference/html/', 'hibernate': 'https://hibernate.org/orm/documentation/', 'junit': 'https://junit.org/junit5/docs/current/user-guide/', }, 'go': { 'gin': 'https://gin-gonic.com/docs/', 'echo': 'https://echo.labstack.com/guide/', 'gorm': 'https://gorm.io/docs/', } } def _init_cache_db(self): """캐시 데이터베이스 초기화""" conn = sqlite3.connect(self.cache_db) cursor = conn.cursor() # 라이브러리 문서 캐시 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS library_docs ( library_name TEXT PRIMARY KEY, version TEXT, docs_content TEXT NOT NULL, docs_url TEXT, github_url TEXT, cached_at REAL NOT NULL, expires_at REAL NOT NULL, content_hash TEXT ) ''') # API 응답 캐시 테이블 cursor.execute(''' CREATE TABLE IF NOT EXISTS api_cache ( url_hash TEXT PRIMARY KEY, url TEXT NOT NULL, response_content TEXT NOT NULL, cached_at REAL NOT NULL, expires_at REAL NOT NULL ) ''') conn.commit() conn.close() async def _get_session(self) -> aiohttp.ClientSession: """HTTP 세션 가져오기""" if self.session is None or self.session.closed: # 실제 브라우저처럼 보이도록 User-Agent 설정 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } timeout = aiohttp.ClientTimeout(total=30) self.session = aiohttp.ClientSession( headers=headers, timeout=timeout, connector=aiohttp.TCPConnector(limit=10) ) return self.session async def search_library_docs(self, library_name: str, version: Optional[str] = None, force_refresh: bool = False) -> Dict[str, Any]: """라이브러리 문서 검색""" try: # 캐시 확인 if not force_refresh: cached_docs = self._get_cached_docs(library_name) if cached_docs: return { 'success': True, 'library': library_name, 'version': cached_docs.get('version', 'unknown'), 'content': cached_docs['content'], 'source': 'cache', 'urls': { 'docs': cached_docs.get('docs_url'), 'github': cached_docs.get('github_url') } } # 실시간 문서 가져오기 library_info = await self._fetch_library_info(library_name, version) docs_content = await self._fetch_docs_content(library_info) # 캐시 저장 self._cache_docs(library_name, library_info, docs_content) return { 'success': True, 'library': library_name, 'version': library_info.version, 'content': docs_content, 'source': 'live', 'urls': { 'docs': library_info.docs_url, 'github': library_info.github_url } } except Exception as e: self.logger.error(f"라이브러리 문서 검색 실패: {library_name}, 오류: {str(e)}") return { 'success': False, 'library': library_name, 'error': str(e), 'source': 'error' } async def _fetch_library_info(self, library_name: str, version: Optional[str] = None) -> LibraryInfo: """라이브러리 정보 가져오기""" # 1. 먼저 알려진 패턴에서 검색 for lang, patterns in self.doc_patterns.items(): if library_name.lower() in patterns: docs_url = patterns[library_name.lower()] # GitHub URL 추측 github_url = await self._find_github_url(library_name) # 버전 정보 가져오기 if not version: version = await self._get_latest_version(library_name) return LibraryInfo( name=library_name, version=version or 'latest', docs_url=docs_url, github_url=github_url ) # 2. 동적 검색 (npm, PyPI 등) return await self._dynamic_library_search(library_name, version) async def _dynamic_library_search(self, library_name: str, version: Optional[str] = None) -> LibraryInfo: """동적 라이브러리 검색""" session = await self._get_session() # npm 검색 시도 try: npm_url = f"https://registry.npmjs.org/{library_name}" async with session.get(npm_url) as response: if response.status == 200: data = await response.json() latest_version = data.get('dist-tags', {}).get('latest', version or 'latest') homepage = data.get('homepage', '') repository = data.get('repository', {}) github_url = None if isinstance(repository, dict): repo_url = repository.get('url', '') if 'github.com' in repo_url: github_url = repo_url.replace('git+', '').replace('.git', '') # 문서 URL 추측 docs_url = homepage if homepage else f"https://www.npmjs.com/package/{library_name}" return LibraryInfo( name=library_name, version=latest_version, docs_url=docs_url, github_url=github_url, npm_url=npm_url ) except Exception as e: self.logger.debug(f"npm 검색 실패: {library_name}, 오류: {str(e)}") # PyPI 검색 시도 try: pypi_url = f"https://pypi.org/pypi/{library_name}/json" async with session.get(pypi_url) as response: if response.status == 200: data = await response.json() info = data.get('info', {}) latest_version = info.get('version', version or 'latest') homepage = info.get('home_page', '') project_urls = info.get('project_urls', {}) # 문서 URL 찾기 docs_url = (project_urls.get('Documentation') or project_urls.get('Homepage') or homepage or f"https://pypi.org/project/{library_name}/") github_url = None for key, url in project_urls.items(): if 'github.com' in url.lower(): github_url = url break return LibraryInfo( name=library_name, version=latest_version, docs_url=docs_url, github_url=github_url, pypi_url=pypi_url ) except Exception as e: self.logger.debug(f"PyPI 검색 실패: {library_name}, 오류: {str(e)}") # 기본값 반환 return LibraryInfo( name=library_name, version=version or 'latest', docs_url=f"https://www.google.com/search?q={library_name}+documentation" ) async def _fetch_docs_content(self, library_info: LibraryInfo) -> str: """문서 내용 가져오기""" session = await self._get_session() try: # 문서 페이지 가져오기 async with session.get(library_info.docs_url) as response: if response.status == 200: html_content = await response.text() # HTML을 텍스트로 변환 soup = BeautifulSoup(html_content, 'html.parser') # 불필요한 요소 제거 for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']): tag.decompose() # 주요 문서 내용 추출 content_selectors = [ 'main', 'article', '.content', '.documentation', '#content', '.main-content', '.docs-content' ] main_content = None for selector in content_selectors: main_content = soup.select_one(selector) if main_content: break if not main_content: main_content = soup.find('body') if main_content: # 텍스트 정리 text = main_content.get_text(separator='\n', strip=True) lines = [line.strip() for line in text.split('\n') if line.strip()] # 길이 제한 (너무 긴 내용 방지) if len(lines) > 200: lines = lines[:200] lines.append("... (내용이 길어서 생략됨)") return '\n'.join(lines) return "문서 내용을 추출할 수 없습니다." else: return f"문서 페이지 접근 실패: HTTP {response.status}" except Exception as e: self.logger.error(f"문서 내용 가져오기 실패: {library_info.docs_url}, 오류: {str(e)}") return f"문서 내용 가져오기 실패: {str(e)}" async def _find_github_url(self, library_name: str) -> Optional[str]: """GitHub URL 찾기""" session = await self._get_session() try: # GitHub 검색 API 사용 search_url = f"https://api.github.com/search/repositories?q={library_name}&sort=stars&order=desc&per_page=5" async with session.get(search_url) as response: if response.status == 200: data = await response.json() items = data.get('items', []) for item in items: if item['name'].lower() == library_name.lower(): return item['html_url'] # 정확히 일치하는 것이 없으면 첫 번째 결과 반환 if items: return items[0]['html_url'] except Exception as e: self.logger.debug(f"GitHub URL 찾기 실패: {library_name}, 오류: {str(e)}") return None async def _get_latest_version(self, library_name: str) -> Optional[str]: """최신 버전 가져오기""" # 이미 _dynamic_library_search에서 처리되므로 여기서는 단순화 return None def _get_cached_docs(self, library_name: str) -> Optional[Dict[str, Any]]: """캐시된 문서 조회""" conn = sqlite3.connect(self.cache_db) cursor = conn.cursor() cursor.execute(''' SELECT version, docs_content, docs_url, github_url FROM library_docs WHERE library_name = ? AND expires_at > ? ''', (library_name, time.time())) row = cursor.fetchone() conn.close() if row: return { 'version': row[0], 'content': row[1], 'docs_url': row[2], 'github_url': row[3] } return None def _cache_docs(self, library_name: str, library_info: LibraryInfo, content: str): """문서 캐시 저장""" conn = sqlite3.connect(self.cache_db) cursor = conn.cursor() expires_at = time.time() + (12 * 60 * 60) # 12시간 후 만료 content_hash = hashlib.md5(content.encode()).hexdigest() cursor.execute(''' INSERT OR REPLACE INTO library_docs (library_name, version, docs_content, docs_url, github_url, cached_at, expires_at, content_hash) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', ( library_name, library_info.version, content, library_info.docs_url, library_info.github_url, time.time(), expires_at, content_hash )) conn.commit() conn.close() async def close(self): """리소스 정리""" if self.session and not self.session.closed: await self.session.close() # 사용 예시 async def demo_real_context7(): """실제 Context7 데모""" context7 = RealContext7Wrapper() print("=== 실제 Context7 래퍼 테스트 ===") # 실제 라이브러리 문서 검색 libraries = ['fastapi', 'react', 'django', 'express'] for lib in libraries: print(f"\n🔍 {lib} 문서 검색 중...") result = await context7.search_library_docs(lib) if result['success']: print(f"✅ 성공: {lib} v{result['version']}") print(f"📚 문서 URL: {result['urls']['docs']}") print(f"🐙 GitHub: {result['urls']['github']}") print(f"📄 내용 미리보기: {result['content'][:200]}...") else: print(f"❌ 실패: {result['error']}") await context7.close() if __name__ == "__main__": # 실제 Context7 테스트 asyncio.run(demo_real_context7())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Skynotdie/mky'

If you have feedback or need assistance with the MCP directory API, please join our Discord server