try:
from transformers import T5Tokenizer, T5ForConditionalGeneration
TRANSFORMERS_AVAILABLE = True
except ImportError:
TRANSFORMERS_AVAILABLE = False
from ..utils.base_tool import BaseTool
from ..utils.errors import ChemMCPToolInitError
from ..utils.mcp_app import ChemMCPManager, run_mcp_server
@ChemMCPManager.register_tool
class MoleculeGenerator(BaseTool):
__version__ = "0.1.0"
name = "MoleculeGenerator"
func_name = "generate_molecule_from_description"
description = "Generate a molecule represented in SMILES with MolT5 that matches the given textual description."
implementation_description = "Uses [the MolT5-large model](laituan245/molt5-large-caption2smiles), a transformer-based neural network trained on molecule-text pairs, to generate SMILES representations from natural language descriptions."
categories = ["Molecule"]
tags = ["Molecular Information", "Text", "Neural Networks", "SMILES"]
required_envs = []
text_input_sig = [('description', 'str', 'N/A', 'Textual description of the molecule.')]
code_input_sig = [('description', 'str', 'N/A', 'Textual description of the molecule.')]
output_sig = [('smiles', 'str', 'SMILES representation of the molecule.')]
examples = [
{'code_input': {'description': 'The molecule is an ether in which the oxygen atom is linked to two ethyl groups. It has a role as an inhalation anaesthetic, a non-polar solvent and a refrigerant. It is a volatile organic compound and an ether.'}, 'text_input': {'description': 'The molecule is an ether in which the oxygen atom is linked to two ethyl groups. It has a role as an inhalation anaesthetic, a non-polar solvent and a refrigerant. It is a volatile organic compound and an ether.'}, 'output': {'smiles': 'CCOCC'}},
]
oss_dependencies = [
("MolT5", "https://github.com/blender-nlp/MolT5", "BSD 3-Clause")
]
services_and_software = []
def __init__(self, init: bool = True, interface: str = 'code'):
# 由于需要transformers库,延迟到实际使用时再初始化模块
# 这样可以避免在工具注册时就需要transformers库
super().__init__(init=False, interface=interface)
def _init_modules(self):
if not TRANSFORMERS_AVAILABLE:
raise ChemMCPToolInitError("The 'transformers' library is required for MoleculeGenerator. Please install it with: pip install transformers")
# Load pre-trained model and tokenizer
model_name = "laituan245/molt5-large-caption2smiles"
self.tokenizer = T5Tokenizer.from_pretrained(model_name, model_max_length=512)
self.model = T5ForConditionalGeneration.from_pretrained(model_name)
def _run_base(self, description: str) -> str:
# 只在实际使用时才初始化模块
if not hasattr(self, 'tokenizer') or not hasattr(self, 'model'):
if not TRANSFORMERS_AVAILABLE:
raise ChemMCPToolInitError("The 'transformers' library is required for MoleculeGenerator. Please install it with: pip install transformers")
self._init_modules()
input_text = description
input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids
outputs = self.model.generate(input_ids, max_length=512, num_beams=5, early_stopping=True)
smiles = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return smiles
if __name__ == "__main__":
run_mcp_server()