Skip to main content
Glama
gscfwid

NCCN Guidelines MCP Server

by gscfwid

download_pdf

Download PDF files from URLs with optional NCCN authentication for accessing clinical guidelines.

Instructions

Download a PDF file from the specified URL, with optional NCCN login credentials.

Args:
    url: The URL of the PDF file to download
    filename: Optional custom filename for the downloaded file
    username: Optional NCCN username/email for authentication (defaults to NCCN_USERNAME env var)
    password: Optional NCCN password for authentication (defaults to NCCN_PASSWORD env var)

Returns:
    String indicating success/failure and the path to the downloaded file

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
urlYes

Implementation Reference

  • MCP tool handler for 'download_pdf': orchestrates authentication, directory setup, and delegates download to NCCNDownloader instance.
    @mcp.tool()
    async def download_pdf(url: str) -> str:
        """
        Download a PDF file from the specified URL, with optional NCCN login credentials.
        
        Args:
            url: The URL of the PDF file to download
            filename: Optional custom filename for the downloaded file
            username: Optional NCCN username/email for authentication (defaults to NCCN_USERNAME env var)
            password: Optional NCCN password for authentication (defaults to NCCN_PASSWORD env var)
        
        Returns:
            String indicating success/failure and the path to the downloaded file
        """
        try:
            # Ensure download directory exists
            download_path = current_dir / DOWNLOAD_DIR
            download_path.mkdir(exist_ok=True)
            
            # Use provided credentials or fall back to global configuration
            auth_username = NCCN_USERNAME
            auth_password = NCCN_PASSWORD
            
            # Create downloader instance with credentials if available
            if auth_username and auth_password:
                downloader_instance = NCCNDownloader(auth_username, auth_password)
                logger.info(f"Using NCCN authentication for user: {auth_username}")
            else:
                downloader_instance = downloader
                logger.info("No NCCN authentication configured - attempting anonymous download")
            
            # Download the PDF
            success, actual_filename = await downloader_instance.download_pdf(
                pdf_url=url,
                download_dir=str(download_path),
                username=auth_username,
                password=auth_password,
                skip_if_exists=True
            )
            
            # Update the full path with the actual filename used
            actual_full_path = download_path / actual_filename
            
            if success:
                logger.info(f"PDF downloaded successfully: {actual_full_path}")
                return f"PDF downloaded successfully: {actual_full_path} (filename: {actual_filename})"
            else:
                error_msg = f"Failed to download PDF from {url} (attempted filename: {actual_filename})."
                if not (auth_username and auth_password):
                    error_msg += " You may need to provide NCCN login credentials via environment variables (NCCN_USERNAME, NCCN_PASSWORD) or function parameters."
                logger.error(error_msg)
                return error_msg
        
        except Exception as e:
            logger.error(f"Error downloading PDF: {str(e)}")
            return f"Error downloading PDF: {str(e)}"
  • Core implementation of PDF downloading in NCCNDownloader class, including caching check, HTTP requests, automatic login detection and handling, and recursive re-download after login.
    async def download_pdf(self, pdf_url, download_dir=None, username=None, password=None, skip_if_exists=True, max_cache_age_days=PDF_CACHE_MAX_AGE_DAYS):
        """
        Downloads a PDF file, automatically logging in if required.
        
        Args:
            pdf_url (str): URL of the PDF file.
            download_dir (str, optional): Directory to save the PDF. Defaults to current directory.
            username (str, optional): Username (email address), required if not already logged in.
            password (str, optional): Password, required if not already logged in.
            skip_if_exists (bool): Whether to skip download if the file already exists. Defaults to True.
            max_cache_age_days (int): Maximum cache file validity period (days). Defaults to PDF_CACHE_MAX_AGE_DAYS.
        
        Returns:
            tuple: (success (bool), saved_filename (str))
        """
        try:
            # Automatically extract filename from URL
            filename = os.path.basename(pdf_url)
            if not filename or not filename.endswith('.pdf'):
                filename = 'nccn_guideline.pdf'
            
            if download_dir:
                os.makedirs(download_dir, exist_ok=True)
            else:
                download_dir = os.getcwd() # Use current working directory if not specified
            
            save_path = os.path.join(download_dir, filename)
            
            # Check if file already exists and is still valid (not too old)
            if skip_if_exists:
                cache_info = check_pdf_cache_age(save_path, max_cache_age_days)
                if cache_info['exists']:
                    if cache_info['is_valid']:
                        logger.info(f"Using valid cached PDF: {save_path}")
                        logger.info(f"File size: {cache_info['size']} bytes, age: {cache_info['age_days']} days")
                        return True, filename
                    else:
                        logger.info(f"PDF cache expired ({cache_info['age_days']} days > {max_cache_age_days} days) or corrupted, re-downloading...")
                else:
                    logger.info(f"PDF not found in cache, downloading: {save_path}")
            
            logger.info(f"Downloading PDF: {pdf_url}")
            
            # Set request headers for PDF download
            pdf_headers = {
                'Accept': 'application/pdf,*/*',
                'Referer': 'https://www.nccn.org/',
            }
            
            # First, make a regular GET request to check the response
            response = await self.session.get(pdf_url, headers=pdf_headers, follow_redirects=True)
            
            logger.info(f"Response status: {response.status_code}")
            logger.info(f"Final URL: {response.url}")
            
            # Check if we were redirected to a login page
            if response.status_code == 200:
                content_type = response.headers.get('Content-Type', '')
                logger.info(f"Content-Type: {content_type}")
                
                # Check if this is actually a PDF
                if 'application/pdf' in content_type:
                    # This is a PDF, save it directly
                    with open(save_path, 'wb') as f:
                        f.write(response.content)
                    
                    file_size = os.path.getsize(save_path)
                    logger.info(f"PDF file saved to: {save_path}")
                    logger.info(f"File size: {file_size} bytes")
                    return True, filename
                
                elif 'text/html' in content_type:
                    # This is HTML, likely a login page
                    response_text = response.text
                    
                    if 'login' in response_text.lower() or 'log in' in response_text.lower():
                        logger.info("Login required detected, attempting automatic login...")
                        
                        # If login credentials are provided, attempt to log in
                        login_username = username or self.username
                        login_password = password or self.password
                        
                        if login_username and login_password:
                            if await self.login(login_username, login_password, pdf_url):
                                logger.info("Login successful, re-downloading PDF...")
                                time.sleep(1)  # Wait for login state to stabilize
                                # Recursive call, but do not pass login credentials to avoid infinite loop
                                return await self.download_pdf(pdf_url, download_dir=download_dir, skip_if_exists=skip_if_exists, max_cache_age_days=max_cache_age_days)
                            else:
                                logger.error("Automatic login failed.")
                                return False, filename
                        else:
                            logger.error("Login required but username and password not provided.")
                            return False, filename
                    else:
                        logger.warning("Received HTML response but no login form detected.")
                        logger.debug(f"Response preview: {response_text[:500]}...")
                        return False, filename
                else:
                    logger.warning(f"Unexpected content type: {content_type}")
                    return False, filename
            
            elif response.status_code == 302:
                # Handle redirect manually if needed
                redirect_url = response.headers.get('Location')
                logger.info(f"Received redirect to: {redirect_url}")
                
                # Check if redirect is to login page
                if redirect_url and 'login' in redirect_url.lower():
                    logger.info("Redirected to login page, attempting automatic login...")
                    
                    login_username = username or self.username
                    login_password = password or self.password
                    
                    if login_username and login_password:
                        if await self.login(login_username, login_password, pdf_url):
                            logger.info("Login successful, re-downloading PDF...")
                            time.sleep(1)
                            return await self.download_pdf(pdf_url, download_dir=download_dir, skip_if_exists=skip_if_exists, max_cache_age_days=max_cache_age_days)
                        else:
                            logger.error("Automatic login failed.")
                            return False, filename
                    else:
                        logger.error("Login required but username and password not provided.")
                        return False, filename
                else:
                    logger.error(f"Unexpected redirect to: {redirect_url}")
                    return False, filename
            
            else:
                logger.error(f"Download failed, status code: {response.status_code}")
                return False, filename
                
        except Exception as e:
            logger.error(f"An error occurred during download: {str(e)}")
            return False, filename
  • server.py:167-167 (registration)
    The @mcp.tool() decorator registers the download_pdf function as an MCP tool.
    @mcp.tool()
Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gscfwid/NCCN_guidelines_MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server