Rowan MCP Server

rowan-mcp
rowan_mcp
functions_v2

molecule_lookup.py•7.61 KiB

"""
Molecule name to SMILES converter using Chemical Identifier Resolver (CIR).
Enables natural language molecule input for Rowan workflows.
"""

from typing import List, Dict, Annotated
from urllib.request import urlopen
from urllib.parse import quote
import logging

logger = logging.getLogger(__name__)


def molecule_lookup(
    molecule_name: Annotated[str, "Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')"],
    fallback_to_input: Annotated[bool, "If lookup fails, return the input string assuming it might be SMILES"] = False
) -> str:
    """Convert molecule names to SMILES using Chemical Identifier Resolver (CIR).
    
    Args:
        molecule_name: Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')
        fallback_to_input: If lookup fails, return the input string assuming it might be SMILES
    
    This tool enables natural language input for molecules by converting common names,
    IUPAC names, CAS numbers, and other identifiers to SMILES strings that can be
    used with Rowan workflows.
    
    Supported Input Types:
    - Common names: 'aspirin', 'caffeine', 'benzene', 'glucose'
    - IUPAC names: '2-acetoxybenzoic acid', '1,3,7-trimethylpurine-2,6-dione'
    - CAS numbers: '50-78-2' (aspirin), '58-08-2' (caffeine)
    - InChI strings
    - Already valid SMILES (will be validated)
    
    Returns:
        SMILES string if successful, error message if not found
        
    Examples:
        # Common drug name
        result = molecule_lookup("aspirin")
        # Returns: "CC(=O)Oc1ccccc1C(=O)O"
        
        # IUPAC name
        result = molecule_lookup("2-acetoxybenzoic acid")
        # Returns: "CC(=O)Oc1ccccc1C(=O)O"
        
        # CAS number
        result = molecule_lookup("50-78-2")
        # Returns: "CC(=O)Oc1ccccc1C(=O)O"
        
        # Complex molecule
        result = molecule_lookup("paracetamol")
        # Returns: "CC(=O)Nc1ccc(O)cc1"
    """
    try:
        # Clean input
        molecule_name = molecule_name.strip()
        
        # Check if already SMILES-like (contains typical SMILES characters)
        smiles_chars = {'=', '#', '(', ')', '[', ']', '@', '+', '-'}
        if any(char in molecule_name for char in smiles_chars):
            logger.info(f"Input '{molecule_name}' appears to be SMILES, returning as-is")
            return molecule_name
        
        # Query CIR service
        logger.info(f"Looking up molecule: {molecule_name}")
        url = f'http://cactus.nci.nih.gov/chemical/structure/{quote(molecule_name)}/smiles'
        
        response = urlopen(url, timeout=10)
        smiles = response.read().decode('utf8').strip()
        
        # CIR may return multiple SMILES for some queries, take the first one
        if '\n' in smiles:
            smiles = smiles.split('\n')[0]
        
        logger.info(f"Successfully converted '{molecule_name}' to SMILES: {smiles}")
        return smiles
        
    except Exception as e:
        logger.warning(f"Failed to lookup '{molecule_name}': {e}")
        
        if fallback_to_input:
            logger.info(f"Returning original input as fallback: {molecule_name}")
            return molecule_name
        else:
            return f"Could not find SMILES for '{molecule_name}'. Please check the name or provide a valid SMILES string."


def batch_molecule_lookup(
    molecule_names: Annotated[List[str], "List of molecule names to convert to SMILES"],
    skip_failures: Annotated[bool, "Skip molecules that fail lookup instead of stopping"] = True
) -> Dict[str, str]:
    """Convert multiple molecule names to SMILES in batch.
    
    Args:
        molecule_names: List of molecule names to convert to SMILES
        skip_failures: Skip molecules that fail lookup instead of stopping
    
    Useful for preparing multiple molecules for workflows or screening.
        
    Returns:
        Dictionary mapping input names to SMILES strings (or error messages)
        
    Examples:
        # Drug screening set
        result = batch_molecule_lookup([
            "aspirin",
            "ibuprofen", 
            "paracetamol",
            "caffeine"
        ])
        # Returns: {
        #     "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
        #     "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
        #     "paracetamol": "CC(=O)Nc1ccc(O)cc1",
        #     "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
        # }
        
        # Mixed input types
        result = batch_molecule_lookup([
            "benzene",           # Common name
            "50-78-2",          # CAS number
            "ethanoic acid"     # IUPAC name
        ])
    """
    results = {}
    
    for name in molecule_names:
        try:
            smiles = molecule_lookup(name, fallback_to_input=False)
            results[name] = smiles
        except Exception as e:
            error_msg = f"Lookup failed: {str(e)}"
            if skip_failures:
                logger.warning(f"Skipping {name}: {error_msg}")
                results[name] = error_msg
            else:
                raise ValueError(f"Failed to lookup '{name}': {error_msg}")
    
    return results


def validate_smiles(
    smiles: Annotated[str, "SMILES string to validate"]
) -> Dict[str, any]:
    """Validate a SMILES string and return basic molecular properties.
    
    Args:
        smiles: SMILES string to validate
    
    Uses RDKit to validate SMILES and extract basic properties.
        
    Returns:
        Dictionary with validation status and properties if valid
        
    Examples:
        result = validate_smiles("CC(=O)O")
        # Returns: {
        #     "valid": True,
        #     "canonical_smiles": "CC(=O)O",
        #     "molecular_formula": "C2H4O2",
        #     "molecular_weight": 60.05
        # }
    """
    try:
        from rdkit import Chem
        from rdkit.Chem import Descriptors
        
        mol = Chem.MolFromSmiles(smiles)
        
        if mol is None:
            return {
                "valid": False,
                "error": "Invalid SMILES string"
            }
        
        return {
            "valid": True,
            "canonical_smiles": Chem.MolToSmiles(mol),
            "molecular_formula": Chem.rdMolDescriptors.CalcMolFormula(mol),
            "molecular_weight": round(Descriptors.MolWt(mol), 2),
            "num_atoms": mol.GetNumAtoms(),
            "num_bonds": mol.GetNumBonds()
        }
        
    except ImportError:
        return {
            "valid": "unknown",
            "error": "RDKit not available for validation"
        }
    except Exception as e:
        return {
            "valid": False,
            "error": str(e)
        }


# Common molecules reference (for documentation)
COMMON_MOLECULES = {
    # Drugs
    "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
    "paracetamol": "CC(=O)Nc1ccc(O)cc1",
    "acetaminophen": "CC(=O)Nc1ccc(O)cc1",  # Same as paracetamol
    "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
    "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
    "penicillin": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O",
    
    # Solvents
    "water": "O",
    "ethanol": "CCO",
    "methanol": "CO",
    "acetone": "CC(=O)C",
    "dmso": "CS(=O)C",
    "chloroform": "C(Cl)(Cl)Cl",
    "benzene": "c1ccccc1",
    "toluene": "Cc1ccccc1",
    
    # Organic compounds
    "glucose": "C(C1C(C(C(C(O1)O)O)O)O)O",
    "acetic acid": "CC(=O)O",
    "ethanoic acid": "CC(=O)O",  # IUPAC for acetic acid
    "phenol": "Oc1ccccc1",
    "aniline": "Nc1ccccc1",
    "naphthalene": "c1ccc2c(c1)cccc2",
    
    # Amino acids
    "glycine": "C(C(=O)O)N",
    "alanine": "CC(C(=O)O)N",
    "valine": "CC(C)C(C(=O)O)N",
    "leucine": "CC(C)CC(C(=O)O)N",
    "lysine": "C(CCN)CC(C(=O)O)N",
}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/k-yenko/rowan-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

molecule_lookup.py•7.61 KiB

"""
Molecule name to SMILES converter using Chemical Identifier Resolver (CIR).
Enables natural language molecule input for Rowan workflows.
"""

from typing import List, Dict, Annotated
from urllib.request import urlopen
from urllib.parse import quote
import logging

logger = logging.getLogger(__name__)


def molecule_lookup(
    molecule_name: Annotated[str, "Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')"],
    fallback_to_input: Annotated[bool, "If lookup fails, return the input string assuming it might be SMILES"] = False
) -> str:
    """Convert molecule names to SMILES using Chemical Identifier Resolver (CIR).
    
    Args:
        molecule_name: Common name, IUPAC name, or CAS number of molecule (e.g., 'aspirin', 'caffeine', '50-78-2')
        fallback_to_input: If lookup fails, return the input string assuming it might be SMILES
    
    This tool enables natural language input for molecules by converting common names,
    IUPAC names, CAS numbers, and other identifiers to SMILES strings that can be
    used with Rowan workflows.
    
    Supported Input Types:
    - Common names: 'aspirin', 'caffeine', 'benzene', 'glucose'
    - IUPAC names: '2-acetoxybenzoic acid', '1,3,7-trimethylpurine-2,6-dione'
    - CAS numbers: '50-78-2' (aspirin), '58-08-2' (caffeine)
    - InChI strings
    - Already valid SMILES (will be validated)
    
    Returns:
        SMILES string if successful, error message if not found
        
    Examples:
        # Common drug name
        result = molecule_lookup("aspirin")
        # Returns: "CC(=O)Oc1ccccc1C(=O)O"
        
        # IUPAC name
        result = molecule_lookup("2-acetoxybenzoic acid")
        # Returns: "CC(=O)Oc1ccccc1C(=O)O"
        
        # CAS number
        result = molecule_lookup("50-78-2")
        # Returns: "CC(=O)Oc1ccccc1C(=O)O"
        
        # Complex molecule
        result = molecule_lookup("paracetamol")
        # Returns: "CC(=O)Nc1ccc(O)cc1"
    """
    try:
        # Clean input
        molecule_name = molecule_name.strip()
        
        # Check if already SMILES-like (contains typical SMILES characters)
        smiles_chars = {'=', '#', '(', ')', '[', ']', '@', '+', '-'}
        if any(char in molecule_name for char in smiles_chars):
            logger.info(f"Input '{molecule_name}' appears to be SMILES, returning as-is")
            return molecule_name
        
        # Query CIR service
        logger.info(f"Looking up molecule: {molecule_name}")
        url = f'http://cactus.nci.nih.gov/chemical/structure/{quote(molecule_name)}/smiles'
        
        response = urlopen(url, timeout=10)
        smiles = response.read().decode('utf8').strip()
        
        # CIR may return multiple SMILES for some queries, take the first one
        if '\n' in smiles:
            smiles = smiles.split('\n')[0]
        
        logger.info(f"Successfully converted '{molecule_name}' to SMILES: {smiles}")
        return smiles
        
    except Exception as e:
        logger.warning(f"Failed to lookup '{molecule_name}': {e}")
        
        if fallback_to_input:
            logger.info(f"Returning original input as fallback: {molecule_name}")
            return molecule_name
        else:
            return f"Could not find SMILES for '{molecule_name}'. Please check the name or provide a valid SMILES string."


def batch_molecule_lookup(
    molecule_names: Annotated[List[str], "List of molecule names to convert to SMILES"],
    skip_failures: Annotated[bool, "Skip molecules that fail lookup instead of stopping"] = True
) -> Dict[str, str]:
    """Convert multiple molecule names to SMILES in batch.
    
    Args:
        molecule_names: List of molecule names to convert to SMILES
        skip_failures: Skip molecules that fail lookup instead of stopping
    
    Useful for preparing multiple molecules for workflows or screening.
        
    Returns:
        Dictionary mapping input names to SMILES strings (or error messages)
        
    Examples:
        # Drug screening set
        result = batch_molecule_lookup([
            "aspirin",
            "ibuprofen", 
            "paracetamol",
            "caffeine"
        ])
        # Returns: {
        #     "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
        #     "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
        #     "paracetamol": "CC(=O)Nc1ccc(O)cc1",
        #     "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
        # }
        
        # Mixed input types
        result = batch_molecule_lookup([
            "benzene",           # Common name
            "50-78-2",          # CAS number
            "ethanoic acid"     # IUPAC name
        ])
    """
    results = {}
    
    for name in molecule_names:
        try:
            smiles = molecule_lookup(name, fallback_to_input=False)
            results[name] = smiles
        except Exception as e:
            error_msg = f"Lookup failed: {str(e)}"
            if skip_failures:
                logger.warning(f"Skipping {name}: {error_msg}")
                results[name] = error_msg
            else:
                raise ValueError(f"Failed to lookup '{name}': {error_msg}")
    
    return results


def validate_smiles(
    smiles: Annotated[str, "SMILES string to validate"]
) -> Dict[str, any]:
    """Validate a SMILES string and return basic molecular properties.
    
    Args:
        smiles: SMILES string to validate
    
    Uses RDKit to validate SMILES and extract basic properties.
        
    Returns:
        Dictionary with validation status and properties if valid
        
    Examples:
        result = validate_smiles("CC(=O)O")
        # Returns: {
        #     "valid": True,
        #     "canonical_smiles": "CC(=O)O",
        #     "molecular_formula": "C2H4O2",
        #     "molecular_weight": 60.05
        # }
    """
    try:
        from rdkit import Chem
        from rdkit.Chem import Descriptors
        
        mol = Chem.MolFromSmiles(smiles)
        
        if mol is None:
            return {
                "valid": False,
                "error": "Invalid SMILES string"
            }
        
        return {
            "valid": True,
            "canonical_smiles": Chem.MolToSmiles(mol),
            "molecular_formula": Chem.rdMolDescriptors.CalcMolFormula(mol),
            "molecular_weight": round(Descriptors.MolWt(mol), 2),
            "num_atoms": mol.GetNumAtoms(),
            "num_bonds": mol.GetNumBonds()
        }
        
    except ImportError:
        return {
            "valid": "unknown",
            "error": "RDKit not available for validation"
        }
    except Exception as e:
        return {
            "valid": False,
            "error": str(e)
        }


# Common molecules reference (for documentation)
COMMON_MOLECULES = {
    # Drugs
    "aspirin": "CC(=O)Oc1ccccc1C(=O)O",
    "paracetamol": "CC(=O)Nc1ccc(O)cc1",
    "acetaminophen": "CC(=O)Nc1ccc(O)cc1",  # Same as paracetamol
    "ibuprofen": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
    "caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
    "penicillin": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O",
    
    # Solvents
    "water": "O",
    "ethanol": "CCO",
    "methanol": "CO",
    "acetone": "CC(=O)C",
    "dmso": "CS(=O)C",
    "chloroform": "C(Cl)(Cl)Cl",
    "benzene": "c1ccccc1",
    "toluene": "Cc1ccccc1",
    
    # Organic compounds
    "glucose": "C(C1C(C(C(C(O1)O)O)O)O)O",
    "acetic acid": "CC(=O)O",
    "ethanoic acid": "CC(=O)O",  # IUPAC for acetic acid
    "phenol": "Oc1ccccc1",
    "aniline": "Nc1ccccc1",
    "naphthalene": "c1ccc2c(c1)cccc2",
    
    # Amino acids
    "glycine": "C(C(=O)O)N",
    "alanine": "CC(C(=O)O)N",
    "valine": "CC(C)C(C(=O)O)N",
    "leucine": "CC(C)CC(C(=O)O)N",
    "lysine": "C(CCN)CC(C(=O)O)N",
}