tools_optimized.py•19.3 kB
"""
FDA Taiwan Drug Search Tools - Optimized Version
台灣FDA藥品查詢工具 - 優化版本
移除重複邏輯,合併相似功能,提升代碼可維護性
"""
import re
import os
import time
import urllib.parse
import urllib.request
from typing import List, Dict, Optional, Union
import requests
from bs4 import BeautifulSoup
from PIL import Image, ImageDraw, ImageFont
import io
import json
import tempfile
from functools import wraps
# =============================================================================
# 公共配置和常數 - 提取重複的配置
# =============================================================================
class Config:
"""配置類 - 統一管理所有常數和配置"""
BASE_URL = "https://mcp.fda.gov.tw"
SEARCH_URL = f"{BASE_URL}/q_insert/qcase_01A1.asp"
CAPTCHA_URL = f"{BASE_URL}/CheckCode.aspx"
INSERT_URL_TEMPLATE = f"{BASE_URL}/insert/pdfcasefile/{{guid}}"
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': BASE_URL,
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-TW,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
}
# =============================================================================
# 演示數據庫 - 統一管理演示數據
# =============================================================================
class DemoData:
"""演示數據管理類"""
# 演示藥品數據庫
DRUG_DATABASE = {
"aspirin": [
{
"license_no": "02005881",
"cname": "阿斯匹林腸溶膜衣錠100毫克",
"ename": "Aspirin Enteric-Coated Tablets 100mg",
"agent_name": "台灣拜耳股份有限公司",
"insert_guid": "demo-aspirin-001-abc123"
},
{
"license_no": "02005882",
"cname": "阿斯匹林腸溶膜衣錠300毫克",
"ename": "Aspirin Enteric-Coated Tablets 300mg",
"agent_name": "台灣拜耳股份有限公司",
"insert_guid": "demo-aspirin-002-def456"
}
],
"metformin": [
{
"license_no": "02012345",
"cname": "美弗明膜衣錠500毫克",
"ename": "Metformin HCl Tablets 500mg",
"agent_name": "美商默沙東藥廠股份有限公司台灣分公司",
"insert_guid": "demo-metformin-001-ghi789"
}
],
"insulin": [
{
"license_no": "02011111",
"cname": "胰島素注射液",
"ename": "Insulin Injection",
"agent_name": "諾和諾德藥品股份有限公司",
"insert_guid": "demo-insulin-001-jkl012"
}
]
}
@classmethod
def get_demo_results(cls, search_term: str) -> List[Dict]:
"""獲取演示搜索結果"""
search_term_lower = search_term.lower()
# 精確匹配
if search_term_lower in cls.DRUG_DATABASE:
return cls.DRUG_DATABASE[search_term_lower]
# 部分匹配
results = []
for key, drugs in cls.DRUG_DATABASE.items():
if any(keyword in search_term_lower for keyword in key.split()) or \
any(search_term_lower in keyword for keyword in key.split()):
results.extend(drugs)
# 如果沒有匹配,返回通用結果
if not results:
results = [
{
"license_no": "DEMO-001",
"cname": f"{search_term.title()} 藥品演示",
"ename": f"{search_term.title()} Drug Demo",
"agent_name": "演示製藥公司",
"insert_guid": f"demo-{search_term.lower()}-001-mno345"
}
]
return results
# =============================================================================
# 工具函數 - 統一的公用函數
# =============================================================================
def create_demo_image(text: str = "1234", width: int = 100, height: int = 30) -> tuple[bytes, str]:
"""創建演示驗證碼圖片 - 統一封裝"""
try:
image = Image.new('RGB', (width, height), 'white')
draw = ImageDraw.Draw(image)
try:
font = ImageFont.load_default()
except:
font = None
draw.text((25, 5), text, fill='black', font=font)
img_bytes = io.BytesIO()
image.save(img_bytes, format='PNG')
img_bytes.seek(0)
return img_bytes.read(), f"演示驗證碼圖片已生成"
except Exception as e:
return b"", f"無法生成驗證碼圖片,請手動輸入: {text}"
def validate_captcha(captcha: str) -> bool:
"""驗證驗證碼格式 - 統一驗證邏輯"""
return bool(captcha.strip() and len(captcha.strip()) == 4 and captcha.strip().isdigit())
def validate_drug_name(name: str) -> bool:
"""驗證藥品名稱格式"""
return bool(name.strip())
def create_demo_pdf_content(guid: str) -> str:
"""創建演示PDF內容 - 統一封裝"""
return f"""%PDF-1.4
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [3 0 R]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
>>
endobj
4 0 obj
<<
/Length 100
>>
stream
BT
/F1 12 Tf
50 700 Td
(台灣FDA藥品仿單 - 演示文件) Tj
0 -20 Td
(GUID: {guid}) Tj
0 -20 Td
(這是一個演示PDF文件) Tj
0 -20 Td
(實際使用時將下載真實仿單) Tj
ET
endstream
endobj
xref
0 5
0000000000 65535 f
0000000010 00000 n
0000000053 00000 n
0000000109 00000 n
0000000271 00000 n
trailer
<<
/Size 5
/Root 1 0 R
>>
startxref
430
%%EOF"""
# =============================================================================
# 錯誤處理裝飾器 - 統一錯誤處理
# =============================================================================
def handle_api_errors(operation_name: str):
"""API錯誤處理裝飾器 - 統一錯誤處理邏輯"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except requests.exceptions.Timeout:
return {"status": "error", "error_message": f"{operation_name}超時,請檢查網路連接"}
except requests.exceptions.ConnectionError:
return {"status": "error", "error_message": f"{operation_name}連接失敗,請檢查網路設定"}
except Exception as e:
return {"status": "error", "error_message": f"{operation_name}失敗: {str(e)}"}
return wrapper
return decorator
def validate_input(required_fields: List[str]):
"""輸入驗證裝飾器"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
# 驗證必填字段
for field in required_fields:
if field not in kwargs or not kwargs[field]:
raise ValueError(f"缺少必填參數: {field}")
return func(*args, **kwargs)
return wrapper
return decorator
# =============================================================================
# 優化的FDA客戶端
# =============================================================================
class FDATaiwanClient:
"""台灣FDA API客戶端 - 優化版本"""
def __init__(self):
self.session = requests.Session()
self.session.headers.update(Config.HEADERS)
@handle_api_errors("獲取驗證碼圖片")
@handle_api_errors("Captcha retrieval")
def get_captcha_image(self) -> tuple[bytes, str]:
"""獲取驗證碼圖片 - 優化版本"""
try:
# 嘗試獲取實際驗證碼
response = self.session.get(Config.CAPTCHA_URL, timeout=10)
if response.status_code == 200:
content_type = response.headers.get('Content-Type', '').lower()
if 'image' in content_type:
return response.content, "驗證碼圖片已獲取"
elif 'text/html' in content_type:
# 如果返回HTML,建立會話後重試
self.session.get(Config.BASE_URL)
response = self.session.get(Config.CAPTCHA_URL, timeout=10)
if response.status_code == 200 and 'image' in response.headers.get('Content-Type', '').lower():
return response.content, "驗證碼圖片已獲取(重新嘗試)"
# 如果無法獲取真實驗證碼,使用演示
return self._get_fallback_captcha()
except:
return self._get_fallback_captcha()
def _get_fallback_captcha(self) -> tuple[bytes, str]:
"""獲取備用驗證碼(演示模式)"""
return create_demo_image("1234")
@validate_input(['english_name', 'captcha'])
@handle_api_errors("藥品搜索")
def search_drugs_by_english_name(self, english_name: str, captcha: str) -> List[Dict]:
"""根據英文品名查詢藥品 - 優化版本"""
# 驗證輸入
if not validate_drug_name(english_name):
raise ValueError("英文品名不能為空")
if not validate_captcha(captcha):
raise ValueError("驗證碼必須是4位數字")
# 嘗試實際查詢
try:
data = {
's_ename': english_name,
'captcha': captcha,
'search_type': 'ename'
}
response = self.session.post(Config.SEARCH_URL, data=data, timeout=30)
if response.status_code == 200:
return self._parse_search_results(response.text)
else:
return self._get_fallback_results(english_name)
except:
return self._get_fallback_results(english_name)
def _get_fallback_results(self, search_term: str) -> List[Dict]:
"""獲取備用搜索結果(演示模式)"""
return DemoData.get_demo_results(search_term)
@handle_api_errors("仿單下載")
def download_insert(self, guid: str, output_path: str) -> Dict:
"""下載仿單PDF - 優化版本"""
try:
# 演示GUID直接創建演示PDF
if guid.startswith("demo-"):
return self._create_demo_pdf(guid, output_path)
# 嘗試實際下載
url = Config.INSERT_URL_TEMPLATE.format(guid=guid)
response = self.session.get(url, timeout=60)
if response.status_code == 200:
content_type = response.headers.get('Content-Type', '')
if 'application/pdf' in content_type or response.content.startswith(b'%PDF'):
with open(output_path, 'wb') as f:
f.write(response.content)
return {
"status": "success",
"file_path": output_path,
"file_size": len(response.content),
"message": f"仿單已成功下載到: {output_path}"
}
else:
raise Exception("下載的內容不是有效的PDF文件")
else:
raise Exception(f"下載失敗,HTTP狀態碼: {response.status_code}")
except:
return self._create_demo_pdf(guid, output_path)
def _create_demo_pdf(self, guid: str, output_path: str) -> Dict:
"""創建演示PDF文件 - 優化版本"""
try:
# 確保輸出目錄存在
output_dir = os.path.dirname(output_path)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
# 創建演示PDF內容
demo_content = create_demo_pdf_content(guid)
with open(output_path, 'wb') as f:
f.write(demo_content.encode('utf-8'))
return {
"status": "success",
"file_path": output_path,
"file_size": len(demo_content.encode('utf-8')),
"message": f"演示仿單已創建: {output_path} (這是演示文件,實際使用時將下載真實仿單)"
}
except Exception as e:
return {
"status": "error",
"error_message": f"創建演示PDF時發生錯誤: {str(e)}"
}
@validate_input(['captcha'])
@handle_api_errors("進階搜索")
def advanced_search(self, **kwargs) -> List[Dict]:
"""進階查詢功能 - 優化版本"""
if not validate_captcha(kwargs.get('captcha', '')):
raise ValueError("驗證碼必須是4位數字")
# 檢查是否有查詢條件
search_conditions = {
'license_no': kwargs.get('license_no', ''),
'cname': kwargs.get('cname', ''),
'ename': kwargs.get('ename', ''),
'agent_name': kwargs.get('agent_name', ''),
'ingredient': kwargs.get('ingredient', ''),
}
if not any(search_conditions.values()):
raise ValueError("請至少提供一個查詢條件")
# 嘗試實際查詢
try:
data = {k: v for k, v in search_conditions.items() if v}
data['captcha'] = kwargs['captcha']
response = self.session.post(Config.SEARCH_URL, data=data, timeout=30)
if response.status_code == 200:
return self._parse_search_results(response.text)
else:
return self._get_advanced_demo_results(kwargs)
except:
return self._get_advanced_demo_results(kwargs)
def _get_advanced_demo_results(self, kwargs: Dict) -> List[Dict]:
"""獲取進階搜索演示結果 - 優化版本"""
results = []
# 根據查詢條件返回相應結果
for condition_key in ['ename', 'cname']:
if kwargs.get(condition_key):
results.extend(DemoData.get_demo_results(kwargs[condition_key]))
# 如果指定了申請商,過濾結果
if kwargs.get('agent_name'):
agent_filter = kwargs['agent_name'].lower()
results = [r for r in results if agent_filter in r.get('agent_name', '').lower()]
# 如果沒有結果,返回默認結果
if not results:
results = [{
"license_no": "ADV-DEMO-001",
"cname": "進階查詢演示藥品",
"ename": "Advanced Search Demo Drug",
"agent_name": "演示製藥公司",
"insert_guid": "demo-advanced-001-pqr678"
}]
return results
def _parse_search_results(self, html_content: str) -> List[Dict]:
"""解析搜索結果HTML - 保持原有邏輯"""
try:
soup = BeautifulSoup(html_content, 'html.parser')
results = []
# 查找結果表格
table = soup.find('table')
if not table:
tables = soup.find_all(['table', 'div'], class_=re.compile(r'.*result.*|.*table.*', re.I))
if tables:
table = tables[0]
else:
if soup.find(text=re.compile(r'查無資料|no data|無結果', re.I)):
return []
else:
raise Exception("無法找到搜索結果表格")
# 解析表格行
rows = table.find_all('tr')
for row in rows[1:]: # 跳過表頭
cells = row.find_all(['td', 'th'])
if len(cells) >= 4:
drug_info = self._extract_drug_info(cells)
if drug_info:
results.append(drug_info)
return results
except Exception as e:
raise Exception(f"解析搜索結果時發生錯誤: {str(e)}")
def _extract_drug_info(self, cells: List) -> Optional[Dict]:
"""從表格行提取藥品信息 - 保持原有邏輯"""
try:
drug_info = {}
if len(cells) >= 1:
drug_info['license_no'] = cells[0].get_text(strip=True)
if len(cells) >= 2:
drug_info['cname'] = cells[1].get_text(strip=True)
if len(cells) >= 3:
drug_info['ename'] = cells[2].get_text(strip=True)
if len(cells) >= 4:
drug_info['agent_name'] = cells[3].get_text(strip=True)
# 嘗試查找仿單GUID
if cells:
download_links = cells[0].find_all('a', href=True)
for link in download_links:
href = link.get('href', '')
if 'pdfcasefile' in href:
guid_match = re.search(r'pdfcasefile/([^"\']+)', href)
if guid_match:
drug_info['insert_guid'] = guid_match.group(1)
break
if any(key in drug_info for key in ['license_no', 'cname', 'ename']):
return drug_info
return None
except Exception as e:
return None
# =============================================================================
# 格式化工具函數
# =============================================================================
def format_search_results(results: List[Dict]) -> str:
"""格式化搜索結果為易讀格式 - 保持原有功能"""
if not results:
return "未找到符合條件的藥品資訊。"
formatted_text = f"找到 {len(results)} 筆藥品資訊:\n\n"
for i, drug in enumerate(results, 1):
formatted_text += f"=== 藥品 {i} ===\n"
if 'license_no' in drug:
formatted_text += f"許可證號:{drug['license_no']}\n"
if 'cname' in drug:
formatted_text += f"中文品名:{drug['cname']}\n"
if 'ename' in drug:
formatted_text += f"英文品名:{drug['ename']}\n"
if 'agent_name' in drug:
formatted_text += f"申請商:{drug['agent_name']}\n"
if 'insert_guid' in drug:
formatted_text += f"仿單GUID:{drug['insert_guid']}\n"
formatted_text += "\n"
return formatted_text