MedAdapt Content Server

by ryoureddy
Verified
import requests import xml.etree.ElementTree as ET import os from dotenv import load_dotenv # Load environment variables for API key load_dotenv() API_KEY = os.getenv('NCBI_API_KEY', '') def search_bookshelf(query, max_results=10): """ Search NCBI Bookshelf for textbook content Args: query: Search terms max_results: Maximum number of results to return Returns: List of book metadata """ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" api_params = f"&api_key={API_KEY}" if API_KEY else "" # Step 1: Search for book IDs search_url = f"{base_url}esearch.fcgi?db=books&term={query}&retmode=json&retmax={max_results}{api_params}" try: response = requests.get(search_url) data = response.json() if 'esearchresult' not in data or 'idlist' not in data['esearchresult']: return [] book_ids = data['esearchresult']['idlist'] if not book_ids: return [] # Step 2: Fetch book details fetch_url = f"{base_url}efetch.fcgi?db=books&id={','.join(book_ids)}&retmode=xml{api_params}" fetch_response = requests.get(fetch_url) # Parse XML response root = ET.fromstring(fetch_response.text) books = [] for book in root.findall('.//Book'): book_id_elem = book.find('.//BookId') if book_id_elem is None: continue book_id = book_id_elem.text title_elem = book.find('.//BookTitle') publisher_elem = book.find('.//Publisher/PublisherName') books.append({ 'id': f"bookshelf-{book_id}", 'title': title_elem.text if title_elem is not None else "Unknown Title", 'publisher': publisher_elem.text if publisher_elem is not None else None, 'source_type': 'bookshelf', 'source_id': book_id, 'content_type': 'textbook', 'url': f"https://www.ncbi.nlm.nih.gov/books/{book_id}/" }) return books except Exception as e: print(f"Error searching Bookshelf: {e}") return [] def fetch_bookshelf_content(book_id): """ Fetch complete book details and chapter list Args: book_id: Bookshelf ID Returns: Book details with chapter list """ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" api_params = f"&api_key={API_KEY}" if API_KEY else "" fetch_url = f"{base_url}efetch.fcgi?db=books&id={book_id}&retmode=xml{api_params}" try: response = requests.get(fetch_url) root = ET.fromstring(response.text) book_elem = root.find('.//Book') if book_elem is None: return {"error": "Book not found"} title_elem = book_elem.find('.//BookTitle') publisher_elem = book_elem.find('.//Publisher/PublisherName') # Get chapter information chapters = [] for chapter_elem in book_elem.findall('.//Chapter'): chapter_title = chapter_elem.find('.//ChapterTitle') chapter_id = chapter_elem.find('.//ChapterId') if chapter_title is not None and chapter_id is not None: chapters.append({ 'title': chapter_title.text, 'id': chapter_id.text }) # Get authors authors = [] for author_elem in book_elem.findall('.//AuthorList/Author'): last_name = author_elem.find('.//LastName') fore_name = author_elem.find('.//ForeName') if last_name is not None: author_name = last_name.text if fore_name is not None: author_name = f"{fore_name.text} {author_name}" authors.append(author_name) # Get publication information publication_year = None year_elem = book_elem.find('.//PubDate/Year') if year_elem is not None: publication_year = year_elem.text return { 'id': f"bookshelf-{book_id}", 'title': title_elem.text if title_elem is not None else "Unknown Title", 'publisher': publisher_elem.text if publisher_elem is not None else "Unknown Publisher", 'authors': authors, 'publication_year': publication_year, 'chapters': chapters, 'source_type': 'bookshelf', 'source_id': book_id, 'content_type': 'textbook', 'url': f"https://www.ncbi.nlm.nih.gov/books/{book_id}/" } except Exception as e: print(f"Error fetching book details: {e}") return {"error": f"Error fetching book: {e}"} def fetch_chapter_content(book_id, chapter_id): """ Fetch specific chapter content Args: book_id: Bookshelf book ID chapter_id: Chapter ID Returns: Chapter content and metadata """ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" api_params = f"&api_key={API_KEY}" if API_KEY else "" # Fetch specific chapter fetch_url = f"{base_url}efetch.fcgi?db=books&id={book_id}.{chapter_id}&retmode=xml{api_params}" try: response = requests.get(fetch_url) root = ET.fromstring(response.text) chapter_elem = root.find('.//Chapter') if chapter_elem is None: return {"error": "Chapter not found"} title_elem = chapter_elem.find('.//ChapterTitle') # Extract text content (simplified) sections = [] for section_elem in chapter_elem.findall('.//Section'): section_title = section_elem.find('.//SectionTitle') # Get paragraphs paragraphs = [] for para in section_elem.findall('.//Para'): if para.text: paragraphs.append(para.text) sections.append({ 'title': section_title.text if section_title is not None else None, 'content': '\n\n'.join(paragraphs) }) return { 'id': f"bookshelf-{book_id}-{chapter_id}", 'book_id': book_id, 'chapter_id': chapter_id, 'title': title_elem.text if title_elem is not None else "Unknown Chapter", 'sections': sections, 'content_type': 'chapter', 'source_type': 'bookshelf', 'url': f"https://www.ncbi.nlm.nih.gov/books/{book_id}/{chapter_id}/" } except Exception as e: print(f"Error fetching chapter: {e}") return {"error": f"Error fetching chapter: {e}"}
ID: kf16vbveq1