#!/usr/bin/env python3
"""
# 실제 Context7 래퍼 구현
# 실시간으로 최신 라이브러리 문서를 인터넷에서 가져와서 로컬 캐시에 저장
"""
import aiohttp
import asyncio
import json
import sqlite3
import time
import hashlib
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
import logging
from pathlib import Path
import re
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
@dataclass
class LibraryInfo:
"""라이브러리 정보"""
name: str
version: str
docs_url: str
github_url: Optional[str] = None
npm_url: Optional[str] = None
pypi_url: Optional[str] = None
class RealContext7Wrapper:
"""실제 Context7 기능을 구현한 래퍼"""
def __init__(self, cache_db: str = "real_context7_cache.db"):
self.cache_db = cache_db
self.logger = logging.getLogger("RealContext7Wrapper")
self.session: Optional[aiohttp.ClientSession] = None
self._init_cache_db()
# 라이브러리별 문서 URL 패턴
self.doc_patterns = {
'javascript': {
'react': 'https://reactjs.org/docs/',
'vue': 'https://vuejs.org/guide/',
'angular': 'https://angular.io/docs',
'express': 'https://expressjs.com/en/api.html',
'axios': 'https://axios-http.com/docs/intro',
'lodash': 'https://lodash.com/docs/',
'moment': 'https://momentjs.com/docs/',
'jquery': 'https://api.jquery.com/',
},
'python': {
'django': 'https://docs.djangoproject.com/en/stable/',
'flask': 'https://flask.palletsprojects.com/en/latest/',
'fastapi': 'https://fastapi.tiangolo.com/',
'requests': 'https://requests.readthedocs.io/en/latest/',
'numpy': 'https://numpy.org/doc/stable/',
'pandas': 'https://pandas.pydata.org/docs/',
'matplotlib': 'https://matplotlib.org/stable/contents.html',
'tensorflow': 'https://www.tensorflow.org/api_docs/python',
'pytorch': 'https://pytorch.org/docs/stable/',
},
'java': {
'spring': 'https://docs.spring.io/spring-framework/docs/current/reference/html/',
'hibernate': 'https://hibernate.org/orm/documentation/',
'junit': 'https://junit.org/junit5/docs/current/user-guide/',
},
'go': {
'gin': 'https://gin-gonic.com/docs/',
'echo': 'https://echo.labstack.com/guide/',
'gorm': 'https://gorm.io/docs/',
}
}
def _init_cache_db(self):
"""캐시 데이터베이스 초기화"""
conn = sqlite3.connect(self.cache_db)
cursor = conn.cursor()
# 라이브러리 문서 캐시 테이블
cursor.execute('''
CREATE TABLE IF NOT EXISTS library_docs (
library_name TEXT PRIMARY KEY,
version TEXT,
docs_content TEXT NOT NULL,
docs_url TEXT,
github_url TEXT,
cached_at REAL NOT NULL,
expires_at REAL NOT NULL,
content_hash TEXT
)
''')
# API 응답 캐시 테이블
cursor.execute('''
CREATE TABLE IF NOT EXISTS api_cache (
url_hash TEXT PRIMARY KEY,
url TEXT NOT NULL,
response_content TEXT NOT NULL,
cached_at REAL NOT NULL,
expires_at REAL NOT NULL
)
''')
conn.commit()
conn.close()
async def _get_session(self) -> aiohttp.ClientSession:
"""HTTP 세션 가져오기"""
if self.session is None or self.session.closed:
# 실제 브라우저처럼 보이도록 User-Agent 설정
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
timeout = aiohttp.ClientTimeout(total=30)
self.session = aiohttp.ClientSession(
headers=headers,
timeout=timeout,
connector=aiohttp.TCPConnector(limit=10)
)
return self.session
async def search_library_docs(self,
library_name: str,
version: Optional[str] = None,
force_refresh: bool = False) -> Dict[str, Any]:
"""라이브러리 문서 검색"""
try:
# 캐시 확인
if not force_refresh:
cached_docs = self._get_cached_docs(library_name)
if cached_docs:
return {
'success': True,
'library': library_name,
'version': cached_docs.get('version', 'unknown'),
'content': cached_docs['content'],
'source': 'cache',
'urls': {
'docs': cached_docs.get('docs_url'),
'github': cached_docs.get('github_url')
}
}
# 실시간 문서 가져오기
library_info = await self._fetch_library_info(library_name, version)
docs_content = await self._fetch_docs_content(library_info)
# 캐시 저장
self._cache_docs(library_name, library_info, docs_content)
return {
'success': True,
'library': library_name,
'version': library_info.version,
'content': docs_content,
'source': 'live',
'urls': {
'docs': library_info.docs_url,
'github': library_info.github_url
}
}
except Exception as e:
self.logger.error(f"라이브러리 문서 검색 실패: {library_name}, 오류: {str(e)}")
return {
'success': False,
'library': library_name,
'error': str(e),
'source': 'error'
}
async def _fetch_library_info(self, library_name: str, version: Optional[str] = None) -> LibraryInfo:
"""라이브러리 정보 가져오기"""
# 1. 먼저 알려진 패턴에서 검색
for lang, patterns in self.doc_patterns.items():
if library_name.lower() in patterns:
docs_url = patterns[library_name.lower()]
# GitHub URL 추측
github_url = await self._find_github_url(library_name)
# 버전 정보 가져오기
if not version:
version = await self._get_latest_version(library_name)
return LibraryInfo(
name=library_name,
version=version or 'latest',
docs_url=docs_url,
github_url=github_url
)
# 2. 동적 검색 (npm, PyPI 등)
return await self._dynamic_library_search(library_name, version)
async def _dynamic_library_search(self, library_name: str, version: Optional[str] = None) -> LibraryInfo:
"""동적 라이브러리 검색"""
session = await self._get_session()
# npm 검색 시도
try:
npm_url = f"https://registry.npmjs.org/{library_name}"
async with session.get(npm_url) as response:
if response.status == 200:
data = await response.json()
latest_version = data.get('dist-tags', {}).get('latest', version or 'latest')
homepage = data.get('homepage', '')
repository = data.get('repository', {})
github_url = None
if isinstance(repository, dict):
repo_url = repository.get('url', '')
if 'github.com' in repo_url:
github_url = repo_url.replace('git+', '').replace('.git', '')
# 문서 URL 추측
docs_url = homepage if homepage else f"https://www.npmjs.com/package/{library_name}"
return LibraryInfo(
name=library_name,
version=latest_version,
docs_url=docs_url,
github_url=github_url,
npm_url=npm_url
)
except Exception as e:
self.logger.debug(f"npm 검색 실패: {library_name}, 오류: {str(e)}")
# PyPI 검색 시도
try:
pypi_url = f"https://pypi.org/pypi/{library_name}/json"
async with session.get(pypi_url) as response:
if response.status == 200:
data = await response.json()
info = data.get('info', {})
latest_version = info.get('version', version or 'latest')
homepage = info.get('home_page', '')
project_urls = info.get('project_urls', {})
# 문서 URL 찾기
docs_url = (project_urls.get('Documentation') or
project_urls.get('Homepage') or
homepage or
f"https://pypi.org/project/{library_name}/")
github_url = None
for key, url in project_urls.items():
if 'github.com' in url.lower():
github_url = url
break
return LibraryInfo(
name=library_name,
version=latest_version,
docs_url=docs_url,
github_url=github_url,
pypi_url=pypi_url
)
except Exception as e:
self.logger.debug(f"PyPI 검색 실패: {library_name}, 오류: {str(e)}")
# 기본값 반환
return LibraryInfo(
name=library_name,
version=version or 'latest',
docs_url=f"https://www.google.com/search?q={library_name}+documentation"
)
async def _fetch_docs_content(self, library_info: LibraryInfo) -> str:
"""문서 내용 가져오기"""
session = await self._get_session()
try:
# 문서 페이지 가져오기
async with session.get(library_info.docs_url) as response:
if response.status == 200:
html_content = await response.text()
# HTML을 텍스트로 변환
soup = BeautifulSoup(html_content, 'html.parser')
# 불필요한 요소 제거
for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
tag.decompose()
# 주요 문서 내용 추출
content_selectors = [
'main', 'article', '.content', '.documentation',
'#content', '.main-content', '.docs-content'
]
main_content = None
for selector in content_selectors:
main_content = soup.select_one(selector)
if main_content:
break
if not main_content:
main_content = soup.find('body')
if main_content:
# 텍스트 정리
text = main_content.get_text(separator='\n', strip=True)
lines = [line.strip() for line in text.split('\n') if line.strip()]
# 길이 제한 (너무 긴 내용 방지)
if len(lines) > 200:
lines = lines[:200]
lines.append("... (내용이 길어서 생략됨)")
return '\n'.join(lines)
return "문서 내용을 추출할 수 없습니다."
else:
return f"문서 페이지 접근 실패: HTTP {response.status}"
except Exception as e:
self.logger.error(f"문서 내용 가져오기 실패: {library_info.docs_url}, 오류: {str(e)}")
return f"문서 내용 가져오기 실패: {str(e)}"
async def _find_github_url(self, library_name: str) -> Optional[str]:
"""GitHub URL 찾기"""
session = await self._get_session()
try:
# GitHub 검색 API 사용
search_url = f"https://api.github.com/search/repositories?q={library_name}&sort=stars&order=desc&per_page=5"
async with session.get(search_url) as response:
if response.status == 200:
data = await response.json()
items = data.get('items', [])
for item in items:
if item['name'].lower() == library_name.lower():
return item['html_url']
# 정확히 일치하는 것이 없으면 첫 번째 결과 반환
if items:
return items[0]['html_url']
except Exception as e:
self.logger.debug(f"GitHub URL 찾기 실패: {library_name}, 오류: {str(e)}")
return None
async def _get_latest_version(self, library_name: str) -> Optional[str]:
"""최신 버전 가져오기"""
# 이미 _dynamic_library_search에서 처리되므로 여기서는 단순화
return None
def _get_cached_docs(self, library_name: str) -> Optional[Dict[str, Any]]:
"""캐시된 문서 조회"""
conn = sqlite3.connect(self.cache_db)
cursor = conn.cursor()
cursor.execute('''
SELECT version, docs_content, docs_url, github_url
FROM library_docs
WHERE library_name = ? AND expires_at > ?
''', (library_name, time.time()))
row = cursor.fetchone()
conn.close()
if row:
return {
'version': row[0],
'content': row[1],
'docs_url': row[2],
'github_url': row[3]
}
return None
def _cache_docs(self, library_name: str, library_info: LibraryInfo, content: str):
"""문서 캐시 저장"""
conn = sqlite3.connect(self.cache_db)
cursor = conn.cursor()
expires_at = time.time() + (12 * 60 * 60) # 12시간 후 만료
content_hash = hashlib.md5(content.encode()).hexdigest()
cursor.execute('''
INSERT OR REPLACE INTO library_docs
(library_name, version, docs_content, docs_url, github_url, cached_at, expires_at, content_hash)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
library_name, library_info.version, content, library_info.docs_url,
library_info.github_url, time.time(), expires_at, content_hash
))
conn.commit()
conn.close()
async def close(self):
"""리소스 정리"""
if self.session and not self.session.closed:
await self.session.close()
# 사용 예시
async def demo_real_context7():
"""실제 Context7 데모"""
context7 = RealContext7Wrapper()
print("=== 실제 Context7 래퍼 테스트 ===")
# 실제 라이브러리 문서 검색
libraries = ['fastapi', 'react', 'django', 'express']
for lib in libraries:
print(f"\n🔍 {lib} 문서 검색 중...")
result = await context7.search_library_docs(lib)
if result['success']:
print(f"✅ 성공: {lib} v{result['version']}")
print(f"📚 문서 URL: {result['urls']['docs']}")
print(f"🐙 GitHub: {result['urls']['github']}")
print(f"📄 내용 미리보기: {result['content'][:200]}...")
else:
print(f"❌ 실패: {result['error']}")
await context7.close()
if __name__ == "__main__":
# 실제 Context7 테스트
asyncio.run(demo_real_context7())