Aucterra MCP Server

Overview Schema Related Servers Score Discussions

aucterra-mcp
aucterra_mcp

server.py•5.36 KiB

import os
import base64
import re
from typing import List, Dict, Any, Union, Type, Optional
import httpx
from pydantic import BaseModel, Field, root_validator
from mcp.server.fastmcp import FastMCP

mcp = FastMCP("aucterra")

API_KEY = os.getenv("AUCTERRA_API_KEY")
EXTRACTION_URL = "https://5gn4u7v34c2pabqoqudprz4szq0ygrdt.lambda-url.us-east-1.on.aws/parser/document-extract"
CLASSIFICATION_URL = "https://5gn4u7v34c2pabqoqudprz4szq0ygrdt.lambda-url.us-east-1.on.aws/parser/document-classify"


def is_base64(s: str) -> bool:
    try:
        return base64.b64encode(base64.b64decode(s)).decode("utf-8") == s
    except Exception:
        return False


def encode_file_to_base64(file_path: str) -> str:
    with open(file_path, "rb") as file:
        return base64.b64encode(file.read()).decode("utf-8")


async def prepare_input_data(input_data: str) -> str:
    if os.path.exists(input_data):
        return encode_file_to_base64(input_data)
    if is_base64(input_data):
        return input_data
    if re.match(r"^https?://", input_data):
        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(input_data)
                response.raise_for_status()
                return base64.b64encode(response.content).decode("utf-8")
        except Exception as e:
            raise ValueError(f"Failed to download document from URL: {e}")
    raise ValueError("Invalid input_data: must be a local path, base64 string, or a valid URL.")


# -----------------------------------
# 🔍 Field Definition (Recursive)
# -----------------------------------
class FieldDefinition(BaseModel):
    field_key: str = Field(..., description="Unique identifier for the field.")
    field_name: str = Field(..., description="Display name for the field.")
    field_type: Optional[str] = Field(None, description="Data type of the field, e.g., 'float', 'string'.")
    format_instruction: Optional[str] = Field(None, description="Instruction to format the extracted value.")
    fields: Optional[List["FieldDefinition"]] = Field(None, description="Nested fields for structured data.")


# -------------------------------
# 🛠 Document Extraction Tool
# -------------------------------
class ExtractionInput(BaseModel):
    input_data: str = Field(..., description="Document to extract from. Accepts a local file path, URL, or base64 string.")
    fields: List[FieldDefinition] = Field(..., description="List of field definitions to extract. Can be nested.")
    document_id: str = Field("123", description="Unique identifier for the document.")
    extraction_type: str = Field("generic", description="Extraction type: 'generic' or 'specific'.")

@mcp.tool()
async def document_extraction_tool(
    input_data: str,
    fields: List[FieldDefinition],
    document_id: str = "123",
    extraction_type: str = "generic"
) -> Union[Dict[str, Any], str]:
    name: str = "document_extraction_tool"
    description: str = (
        "Extracts structured data from documents using a parsing service. Supports nested fields and multiple input types."
    )
    args_schema: Type[BaseModel] = ExtractionInput
    try:
        input_data = await prepare_input_data(input_data)
    except Exception as e:
        return f"Invalid input_data: {str(e)}"

    payload = {
        "input_data": input_data,
        "fields": [field.dict() for field in fields],
        "document_id": document_id,
        "extraction_type": extraction_type,
        "advanced_ocr": "disable"
    }

    headers = {
        "accept": "application/json",
        "x-api-key": API_KEY,
        "Content-Type": "application/json"
    }

    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(EXTRACTION_URL, headers=headers, json=payload, timeout=60)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return f"Extraction failed: {str(e)}"


# -------------------------------
# 🧠 Document Classification Tool
# -------------------------------
class ClassificationInput(BaseModel):
    input_data: str = Field(..., description="Document to classify. Accepts a local file path, URL, or base64 string.")
    classes: List[str] = Field(..., description="List of possible document classes.")
    document_id: str = Field("123", description="Unique identifier for the document.")

@mcp.tool()
async def document_classification_tool(
    input_data: str,
    classes: List[str],
    document_id: str = "123"
) -> Union[Dict[str, Any], str]:
    name: str = "document_classification_tool"
    description: str = "Classifies the document into a predefined category using the classification service."
    args_schema: Type[BaseModel] = ClassificationInput
    try:
        input_data = await prepare_input_data(input_data)
    except Exception as e:
        return f"Invalid input_data: {str(e)}"

    payload = {
        "input_data": input_data,
        "classes": classes,
        "document_id": document_id
    }

    headers = {
        "accept": "application/json",
        "x-api-key": API_KEY,
        "Content-Type": "application/json"
    }

    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(CLASSIFICATION_URL, headers=headers, json=payload, timeout=60)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return f"Classification failed: {str(e)}"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/japisuru/aucterra-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•5.36 KiB

import os
import base64
import re
from typing import List, Dict, Any, Union, Type, Optional
import httpx
from pydantic import BaseModel, Field, root_validator
from mcp.server.fastmcp import FastMCP

mcp = FastMCP("aucterra")

API_KEY = os.getenv("AUCTERRA_API_KEY")
EXTRACTION_URL = "https://5gn4u7v34c2pabqoqudprz4szq0ygrdt.lambda-url.us-east-1.on.aws/parser/document-extract"
CLASSIFICATION_URL = "https://5gn4u7v34c2pabqoqudprz4szq0ygrdt.lambda-url.us-east-1.on.aws/parser/document-classify"


def is_base64(s: str) -> bool:
    try:
        return base64.b64encode(base64.b64decode(s)).decode("utf-8") == s
    except Exception:
        return False


def encode_file_to_base64(file_path: str) -> str:
    with open(file_path, "rb") as file:
        return base64.b64encode(file.read()).decode("utf-8")


async def prepare_input_data(input_data: str) -> str:
    if os.path.exists(input_data):
        return encode_file_to_base64(input_data)
    if is_base64(input_data):
        return input_data
    if re.match(r"^https?://", input_data):
        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(input_data)
                response.raise_for_status()
                return base64.b64encode(response.content).decode("utf-8")
        except Exception as e:
            raise ValueError(f"Failed to download document from URL: {e}")
    raise ValueError("Invalid input_data: must be a local path, base64 string, or a valid URL.")


# -----------------------------------
# 🔍 Field Definition (Recursive)
# -----------------------------------
class FieldDefinition(BaseModel):
    field_key: str = Field(..., description="Unique identifier for the field.")
    field_name: str = Field(..., description="Display name for the field.")
    field_type: Optional[str] = Field(None, description="Data type of the field, e.g., 'float', 'string'.")
    format_instruction: Optional[str] = Field(None, description="Instruction to format the extracted value.")
    fields: Optional[List["FieldDefinition"]] = Field(None, description="Nested fields for structured data.")


# -------------------------------
# 🛠 Document Extraction Tool
# -------------------------------
class ExtractionInput(BaseModel):
    input_data: str = Field(..., description="Document to extract from. Accepts a local file path, URL, or base64 string.")
    fields: List[FieldDefinition] = Field(..., description="List of field definitions to extract. Can be nested.")
    document_id: str = Field("123", description="Unique identifier for the document.")
    extraction_type: str = Field("generic", description="Extraction type: 'generic' or 'specific'.")

@mcp.tool()
async def document_extraction_tool(
    input_data: str,
    fields: List[FieldDefinition],
    document_id: str = "123",
    extraction_type: str = "generic"
) -> Union[Dict[str, Any], str]:
    name: str = "document_extraction_tool"
    description: str = (
        "Extracts structured data from documents using a parsing service. Supports nested fields and multiple input types."
    )
    args_schema: Type[BaseModel] = ExtractionInput
    try:
        input_data = await prepare_input_data(input_data)
    except Exception as e:
        return f"Invalid input_data: {str(e)}"

    payload = {
        "input_data": input_data,
        "fields": [field.dict() for field in fields],
        "document_id": document_id,
        "extraction_type": extraction_type,
        "advanced_ocr": "disable"
    }

    headers = {
        "accept": "application/json",
        "x-api-key": API_KEY,
        "Content-Type": "application/json"
    }

    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(EXTRACTION_URL, headers=headers, json=payload, timeout=60)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return f"Extraction failed: {str(e)}"


# -------------------------------
# 🧠 Document Classification Tool
# -------------------------------
class ClassificationInput(BaseModel):
    input_data: str = Field(..., description="Document to classify. Accepts a local file path, URL, or base64 string.")
    classes: List[str] = Field(..., description="List of possible document classes.")
    document_id: str = Field("123", description="Unique identifier for the document.")

@mcp.tool()
async def document_classification_tool(
    input_data: str,
    classes: List[str],
    document_id: str = "123"
) -> Union[Dict[str, Any], str]:
    name: str = "document_classification_tool"
    description: str = "Classifies the document into a predefined category using the classification service."
    args_schema: Type[BaseModel] = ClassificationInput
    try:
        input_data = await prepare_input_data(input_data)
    except Exception as e:
        return f"Invalid input_data: {str(e)}"

    payload = {
        "input_data": input_data,
        "classes": classes,
        "document_id": document_id
    }

    headers = {
        "accept": "application/json",
        "x-api-key": API_KEY,
        "Content-Type": "application/json"
    }

    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(CLASSIFICATION_URL, headers=headers, json=payload, timeout=60)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return f"Classification failed: {str(e)}"