MCP Server Box

by box-community
Verified
# Copyright (c) 2024 Airbyte, Inc., all rights reserved. import json import logging import dotenv import os from dataclasses import dataclass from typing import Iterable, List, Union import requests from box_sdk_gen import ( AiExtractResponse, AiItemBase, AiItemBaseTypeField, BoxCCGAuth, BoxClient, BoxSDKError, CCGConfig, CreateAiAskMode, CreateAiExtractStructuredFields, CreateAiExtractStructuredFieldsOptionsField, File, Folder, FolderMini, SearchForContentContentTypes, SearchForContentType, ByteStream, AiSingleAgentResponseFull, AiAgentAsk, AiAgentAskTypeField, AiAgentLongTextTool, AiAgentBasicTextTool, AiAgentExtract, AiAgentExtractTypeField, ) logger = logging.getLogger(__name__) @dataclass class BoxFileExtended: file: File text_representation: str def _do_request(box_client: BoxClient, url: str): """ Performs a GET request to a Box API endpoint using the provided Box client. This is an internal helper function and should not be called directly. Args: box_client (BoxClient): An authenticated Box client object. url (str): The URL of the Box API endpoint to make the request to. Returns: bytes: The content of the response from the Box API. Raises: BoxSDKError: If an error occurs while retrieving the access token. requests.exceptions.RequestException: If the request fails (e.g., network error, 4XX or 5XX status code). """ try: access_token = box_client.auth.retrieve_token().access_token except BoxSDKError as e: raise e resp = requests.get(url, headers={"Authorization": f"Bearer {access_token}"}) resp.raise_for_status() return resp.content def box_file_get_by_id(client: BoxClient, file_id: str) -> File: return client.files.get_file_by_id(file_id=file_id) def box_file_text_extract(client: BoxClient, file_id: str) -> str: # Request the file with the "extracted_text" representation hint file_text_representation = client.files.get_file_by_id( file_id, x_rep_hints="[extracted_text]", fields=["name", "representations"], ) # Check if any representations exist if not file_text_representation.representations.entries: logger.debug(f"No representation for file {file_text_representation.id}") return "" # Find the "extracted_text" representation extracted_text_entry = next( ( entry for entry in file_text_representation.representations.entries if entry.representation == "extracted_text" ), None, ) if not extracted_text_entry: return "" # Handle cases where the extracted text needs generation if extracted_text_entry.status.state == "none": _do_request(extracted_text_entry.info.url) # Trigger text generation # Construct the download URL and sanitize filename url = extracted_text_entry.content.url_template.replace("{+asset_path}", "") # Download and truncate the raw content raw_content = _do_request(client, url) # check to see if rawcontent is bytes if isinstance(raw_content, bytes): return raw_content.decode("utf-8") else: return raw_content def box_file_ai_ask( client: BoxClient, file_id: str, prompt: str, ai_agent: AiAgentAsk = None ) -> str: mode = CreateAiAskMode.SINGLE_ITEM_QA ai_item = AiItemBase(id=file_id, type=AiItemBaseTypeField.FILE) response = client.ai.create_ai_ask( mode=mode, prompt=prompt, items=[ai_item], ai_agent=ai_agent ) return response.answer def box_file_ai_extract( client: BoxClient, file_id: str, prompt: str, ai_agent: AiAgentAsk = None ) -> dict: ai_item = AiItemBase(id=file_id, type=AiItemBaseTypeField.FILE) response = client.ai.create_ai_extract( prompt=prompt, items=[ai_item], ai_agent=ai_agent ) # Return a dictionary from the json answer return json.loads(response.answer) def box_file_ai_extract_structured( client: BoxClient, file_id: str, fields_json_str: str ) -> str: ai_item = AiItemBase(id=file_id, type=AiItemBaseTypeField.FILE) fields_list = json.loads(fields_json_str) ai_fields = [] options = [] for field in fields_list: field_options = field.get("options") if field_options is not None: for option in field.get("options"): options.append( CreateAiExtractStructuredFieldsOptionsField(key=option.get("key")) ) ai_fields.append( CreateAiExtractStructuredFields( key=field.get("key"), description=field.get("description"), display_name=field.get("display_name"), prompt=field.get("prompt"), type=field.get("type"), options=options if options is not None and len(options) > 0 else None, ) ) response: AiExtractResponse = client.ai.create_ai_extract_structured( items=[ai_item], fields=ai_fields ) return json.dumps(response.to_dict(), indent=2) def box_folder_text_representation( client: BoxClient, folder_id: str, is_recursive: bool = False, by_pass_text_extraction: bool = False, ) -> Iterable[BoxFileExtended]: # folder items iterator for item in client.folders.get_folder_items(folder_id).entries: if item.type == "file": file = box_file_get_by_id(client=client, file_id=item.id) if not by_pass_text_extraction: text_representation = box_file_text_extract( client=client, file_id=item.id ) else: text_representation = "" yield BoxFileExtended(file=file, text_representation=text_representation) elif item.type == "folder" and is_recursive: yield from box_folder_text_representation( client=client, folder_id=item.id, is_recursive=is_recursive, by_pass_text_extraction=by_pass_text_extraction, ) def box_folder_ai_ask( client: BoxClient, folder_id: str, prompt: str, is_recursive: bool = False, by_pass_text_extraction: bool = False, ) -> Iterable[BoxFileExtended]: # folder items iterator for item in client.folders.get_folder_items(folder_id).entries: if item.type == "file": file = box_file_get_by_id(client=client, file_id=item.id) if not by_pass_text_extraction: text_representation = box_file_ai_ask( client=client, file_id=item.id, prompt=prompt ) else: text_representation = "" yield BoxFileExtended(file=file, text_representation=text_representation) elif item.type == "folder" and is_recursive: yield from box_folder_ai_ask( client=client, folder_id=item.id, prompt=prompt, is_recursive=is_recursive, by_pass_text_extraction=by_pass_text_extraction, ) def box_folder_ai_extract( client: BoxClient, folder_id: str, prompt: str, is_recursive: bool = False, by_pass_text_extraction: bool = False, ) -> Iterable[BoxFileExtended]: # folder items iterator for item in client.folders.get_folder_items(folder_id).entries: if item.type == "file": file = box_file_get_by_id(client=client, file_id=item.id) if not by_pass_text_extraction: text_representation = box_file_ai_extract( client=client, file_id=item.id, prompt=prompt ) else: text_representation = "" yield BoxFileExtended(file=file, text_representation=text_representation) elif item.type == "folder" and is_recursive: yield from box_folder_ai_extract( client=client, folder_id=item.id, prompt=prompt, is_recursive=is_recursive, by_pass_text_extraction=by_pass_text_extraction, ) def box_folder_ai_extract_structured( client: BoxClient, folder_id: str, fields_json_str: str, is_recursive: bool = False, by_pass_text_extraction: bool = False, ) -> Iterable[BoxFileExtended]: # folder items iterator for item in client.folders.get_folder_items(folder_id).entries: if item.type == "file": file = box_file_get_by_id(client=client, file_id=item.id) if not by_pass_text_extraction: text_representation = box_file_ai_extract_structured( client=client, file_id=item.id, fields_json_str=fields_json_str ) else: text_representation = "" yield BoxFileExtended(file=file, text_representation=text_representation) elif item.type == "folder" and is_recursive: yield from box_folder_ai_extract_structured( client=client, folder_id=item.id, fields_json_str=fields_json_str, is_recursive=is_recursive, by_pass_text_extraction=by_pass_text_extraction, ) def box_search( client: BoxClient, query: str, file_extensions: List[str] | None = None, content_types: List[SearchForContentContentTypes] | None = None, ancestor_folder_ids: List[str] | None = None, ) -> List[File]: # content_types: List[SearchForContentContentTypes] = [ # SearchForContentContentTypes.NAME, # SearchForContentContentTypes.DESCRIPTION, # # SearchForContentContentTypes.FILE_CONTENT, # SearchForContentContentTypes.COMMENTS, # SearchForContentContentTypes.TAG, # ] type = [ SearchForContentType.FILE, ] fields: List[str] = ["id", "name", "type", "size", "description"] search_results = client.search.search_for_content( query=query, file_extensions=file_extensions, ancestor_folder_ids=ancestor_folder_ids, content_types=content_types, type=type, fields=fields, ) return search_results.entries def box_locate_folder_by_name( client: BoxClient, folder_name: str, parent_folder_id: str = "0" ) -> List[Folder]: type = [ SearchForContentType.FOLDER, ] fields: List[str] = ["id", "name", "type"] content_types: List[SearchForContentContentTypes] = [ SearchForContentContentTypes.NAME, ] search_results = client.search.search_for_content( query=folder_name, # file_extensions=file_extensions, ancestor_folder_ids=parent_folder_id, content_types=content_types, type=type, fields=fields, ) return search_results.entries def box_folder_list_content( client: BoxClient, folder_id: str, is_recursive: bool = False ) -> List[Union[File, Folder]]: # fields = "id,name,type" result: List[Union[File, FolderMini]] = [] for item in client.folders.get_folder_items(folder_id).entries: if item.type == "web_link": continue if item.type == "folder" and is_recursive: result.extend(box_folder_list_content(client, item.id, is_recursive)) result.append(item) return result def box_file_download(client: BoxClient, file_id: str) -> ByteStream: # file = client.files.get_file_by_id(file_id) return client.downloads.download_file(file_id=file_id) def box_available_ai_agents(client: BoxClient) -> List[AiSingleAgentResponseFull]: return client.ai_studio.get_ai_agents().entries def box_claude_ai_agent_ask() -> AiAgentAsk: return AiAgentAsk( type=AiAgentAskTypeField.AI_AGENT_ASK, long_text=AiAgentLongTextTool( model="aws__claude_3_7_sonnet", ), basic_text=AiAgentBasicTextTool( model="aws__claude_3_7_sonnet", ), long_text_multi=AiAgentLongTextTool( model="aws__claude_3_7_sonnet", ), basic_text_multi=AiAgentBasicTextTool( model="aws__claude_3_7_sonnet", ), ) def box_claude_ai_agent_extract() -> AiAgentExtract: return AiAgentExtract( type=AiAgentExtractTypeField.AI_AGENT_EXTRACT, long_text=AiAgentLongTextTool( model="aws__claude_3_7_sonnet", ), basic_text=AiAgentBasicTextTool( model="aws__claude_3_7_sonnet", ), )