Registry Review MCP Server

test_upload_tools.py•43.5 KiB

"""Tests for file upload tools."""

import pytest
import base64
from pathlib import Path

from registry_review_mcp.tools import upload_tools, session_tools
from registry_review_mcp.models.errors import SessionNotFoundError


@pytest.fixture
def sample_pdf_base64():
    """Create a minimal test PDF encoded as base64."""
    # Minimal valid PDF structure
    pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Resources <<\n/Font <<\n/F1 <<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Times-Roman\n>>\n>>\n>>\n/MediaBox [0 0 612 792]\n>>\nendobj\nxref\n0 4\n0000000000 65535 f\n0000000009 00000 n\n0000000074 00000 n\n0000000120 00000 n\ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\nstartxref\n149\n%%EOF"
    return base64.b64encode(pdf_content).decode('utf-8')


@pytest.fixture
def sample_text_base64():
    """Create a simple text file encoded as base64."""
    text_content = b"This is a test document for the registry review system."
    return base64.b64encode(text_content).decode('utf-8')


@pytest.fixture
def sample_pdf2_base64():
    """Create a different minimal PDF encoded as base64 (with different content)."""
    # Different PDF with unique content to avoid deduplication
    pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Resources <<\n/Font <<\n/F1 <<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Helvetica\n>>\n>>\n>>\n/MediaBox [0 0 612 792]\n/Contents 4 0 R\n>>\nendobj\n4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n100 700 Td\n(Different Content) Tj\nET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000074 00000 n\n0000000120 00000 n\n0000000298 00000 n\ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n393\n%%EOF"
    return base64.b64encode(pdf_content).decode('utf-8')


@pytest.fixture
def sample_pdf3_base64():
    """Create a third different minimal PDF encoded as base64."""
    # Another unique PDF content
    pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Resources <<\n/Font <<\n/F1 <<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Courier\n>>\n>>\n>>\n/MediaBox [0 0 612 792]\n/Contents 4 0 R\n>>\nendobj\n4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n100 700 Td\n(Third File Content) Tj\nET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000074 00000 n\n0000000120 00000 n\n0000000296 00000 n\ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n391\n%%EOF"
    return base64.b64encode(pdf_content).decode('utf-8')


class TestSanitizeProjectName:
    """Test project name sanitization."""

    def test_sanitize_basic_name(self):
        """Test basic project name sanitization."""
        result = upload_tools._sanitize_project_name("Botany Farm 2022")
        assert result == "botany-farm-2022"

    def test_sanitize_special_chars(self):
        """Test sanitization removes special characters."""
        result = upload_tools._sanitize_project_name("Project @#$% Name!")
        assert result == "project-name"

    def test_sanitize_multiple_spaces(self):
        """Test multiple spaces become single hyphens."""
        result = upload_tools._sanitize_project_name("Project    Name")
        assert result == "project-name"

    def test_sanitize_leading_trailing_hyphens(self):
        """Test leading/trailing hyphens are removed."""
        result = upload_tools._sanitize_project_name("  Project Name  ")
        assert result == "project-name"


class TestCreateSessionFromUploads:
    """Test create_session_from_uploads business logic."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_create_session_success(self, test_settings, sample_pdf_base64):
        """Test successful session creation from uploads."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Test Project",
            files=[
                {
                    "filename": "test.pdf",
                    "content_base64": sample_pdf_base64,
                    "mime_type": "application/pdf"
                }
            ],
            methodology="soil-carbon-v1.2.2"
        )

        # Verify result structure
        assert result["success"] is True
        assert "session_id" in result
        assert result["session_id"].startswith("session-")
        assert result["files_saved"] == ["test.pdf"]
        assert result["documents_found"] >= 1
        assert "documents_directory" in result

        # Verify documents directory was created and contains file
        temp_dir = Path(result["documents_directory"])
        assert temp_dir.exists()
        assert (temp_dir / "test.pdf").exists()

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_multiple_files(
        self, test_settings, sample_pdf_base64, sample_pdf2_base64, sample_text_base64
    ):
        """Test creating session with multiple files."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Multi File Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.txt", "content_base64": sample_text_base64},
                {"filename": "file3.pdf", "content_base64": sample_pdf2_base64},  # Use different PDF
            ],
        )

        assert result["success"] is True
        assert len(result["files_saved"]) == 3
        assert "file1.pdf" in result["files_saved"]
        assert "file2.txt" in result["files_saved"]
        assert "file3.pdf" in result["files_saved"]

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_missing_project_name(self, test_settings):
        """Test error when project_name is missing."""
        with pytest.raises(ValueError, match="project_name is required"):
            await upload_tools.create_session_from_uploads(
                project_name="",
                files=[{"filename": "test.pdf", "content_base64": "abc"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_missing_project_name_whitespace(self, test_settings):
        """Test error when project_name is only whitespace."""
        with pytest.raises(ValueError, match="project_name is required"):
            await upload_tools.create_session_from_uploads(
                project_name="   ",
                files=[{"filename": "test.pdf", "content_base64": "abc"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_no_files(self, test_settings):
        """Test error when files array is empty."""
        with pytest.raises(ValueError, match="At least one file is required"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[]
            )

    @pytest.mark.asyncio
    async def test_create_session_missing_filename(self, test_settings, sample_pdf_base64):
        """Test error when file is missing filename."""
        with pytest.raises(ValueError, match="missing 'filename'"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[{"content_base64": sample_pdf_base64}]
            )

    @pytest.mark.asyncio
    async def test_create_session_missing_content(self, test_settings):
        """Test error when file is missing both content_base64 and path."""
        with pytest.raises(ValueError, match="must have either 'content_base64' or 'path' field"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[{"filename": "test.pdf"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_invalid_base64(self, test_settings):
        """Test error when base64 content is invalid."""
        with pytest.raises(ValueError, match="Failed to decode base64"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[{"filename": "test.pdf", "content_base64": "not-valid-base64!!!"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_with_all_metadata(self, test_settings, sample_pdf_base64):
        """Test session creation with all optional metadata."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Full Metadata Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
            methodology="soil-carbon-v1.2.2",
            project_id="C06-1234",
            proponent="Test Proponent Inc.",
            crediting_period="2022-2032",
        )

        assert result["success"] is True

        # Load session and verify metadata
        session_data = await session_tools.load_session(result["session_id"])
        assert session_data["project_metadata"]["project_id"] == "C06-1234"
        assert session_data["project_metadata"]["proponent"] == "Test Proponent Inc."
        assert session_data["project_metadata"]["crediting_period"] == "2022-2032"

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors


class TestUploadAdditionalFiles:
    """Test upload_additional_files business logic."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_upload_additional_files_success(self, test_settings, sample_pdf_base64, sample_pdf2_base64):
        """Test adding files to existing session."""
        # First create a session
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test Project",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            # Add another file with different content
            result = await upload_tools.upload_additional_files(
                session_id=session_id,
                files=[{"filename": "file2.pdf", "content_base64": sample_pdf2_base64}]
            )

            assert result["success"] is True
            assert result["session_id"] == session_id
            assert result["files_added"] == ["file2.pdf"]
            assert result["documents_found"] >= 2

            # Verify file was written
            session_data = await session_tools.load_session(session_id)
            docs_path = Path(session_data["project_metadata"]["documents_path"])
            assert (docs_path / "file2.pdf").exists()

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_upload_additional_multiple_files(self, test_settings, sample_pdf_base64, sample_pdf2_base64, sample_pdf3_base64):
        """Test adding multiple files at once."""
        # Create session
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            # Add multiple files with different content
            result = await upload_tools.upload_additional_files(
                session_id=session_id,
                files=[
                    {"filename": "file2.pdf", "content_base64": sample_pdf2_base64},
                    {"filename": "file3.pdf", "content_base64": sample_pdf3_base64},
                ]
            )

            assert len(result["files_added"]) == 2
            assert "file2.pdf" in result["files_added"]
            assert "file3.pdf" in result["files_added"]
            assert result["documents_found"] >= 3

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_upload_additional_files_duplicate_filename(self, test_settings, sample_pdf_base64):
        """Test error when uploading file with duplicate filename."""
        # Create session with file1.pdf
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            # Try to add another file1.pdf
            with pytest.raises(ValueError, match="File already exists"):
                await upload_tools.upload_additional_files(
                    session_id=session_id,
                    files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
                )

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_upload_additional_files_session_not_found(self, test_settings, sample_pdf_base64):
        """Test error when session doesn't exist."""
        with pytest.raises(SessionNotFoundError):
            await upload_tools.upload_additional_files(
                session_id="session-000000000000",
                files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}]
            )

    @pytest.mark.asyncio
    async def test_upload_additional_files_no_files(self, test_settings, sample_pdf_base64):
        """Test error when files array is empty."""
        # Create session
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            with pytest.raises(ValueError, match="At least one file is required"):
                await upload_tools.upload_additional_files(
                    session_id=session_id,
                    files=[]
                )

        finally:
            await session_tools.delete_session(session_id)


class TestStartReviewFromUploads:
    """Test start_review_from_uploads business logic."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_start_review_full_workflow(self, test_settings, sample_pdf_base64, sample_pdf2_base64):
        """Test complete review workflow with auto-extraction."""
        result = await upload_tools.start_review_from_uploads(
            project_name="Full Test",
            files=[
                {"filename": "ProjectPlan.pdf", "content_base64": sample_pdf_base64},
                {"filename": "BaselineReport.pdf", "content_base64": sample_pdf2_base64}  # Use different PDF
            ],
            auto_extract=True
        )

        assert "session_creation" in result
        assert "evidence_extraction" in result

        session_result = result["session_creation"]
        assert session_result["success"] is True
        assert len(session_result["files_saved"]) == 2
        assert "ProjectPlan.pdf" in session_result["files_saved"]
        assert "BaselineReport.pdf" in session_result["files_saved"]

        # Evidence extraction may succeed or fail gracefully
        evidence = result["evidence_extraction"]
        assert evidence is not None

        # Cleanup
        try:
            await session_tools.delete_session(session_result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_no_auto_extract(self, test_settings, sample_pdf_base64):
        """Test workflow without auto-extraction."""
        result = await upload_tools.start_review_from_uploads(
            project_name="No Extract Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
            auto_extract=False
        )

        assert "session_creation" in result
        assert "evidence_extraction" not in result  # Should not extract

        # Cleanup
        try:
            await session_tools.delete_session(result["session_creation"]["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_with_metadata(self, test_settings, sample_pdf_base64):
        """Test start review with all metadata fields."""
        result = await upload_tools.start_review_from_uploads(
            project_name="Metadata Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
            methodology="soil-carbon-v1.2.2",
            project_id="C06-5678",
            proponent="Test Org",
            crediting_period="2023-2033",
            auto_extract=False
        )

        session_result = result["session_creation"]
        assert session_result["success"] is True

        # Verify metadata was passed through
        session_data = await session_tools.load_session(session_result["session_id"])
        assert session_data["project_metadata"]["project_id"] == "C06-5678"
        assert session_data["project_metadata"]["proponent"] == "Test Org"
        assert session_data["project_metadata"]["crediting_period"] == "2023-2033"

        # Cleanup
        try:
            await session_tools.delete_session(session_result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_invalid_inputs(self, test_settings):
        """Test error handling for invalid inputs."""
        with pytest.raises(ValueError, match="project_name is required"):
            await upload_tools.start_review_from_uploads(
                project_name="",
                files=[{"filename": "test.pdf", "content_base64": "abc"}]
            )

        with pytest.raises(ValueError, match="At least one file is required"):
            await upload_tools.start_review_from_uploads(
                project_name="Test",
                files=[]
            )


class TestDeduplication:
    """Test file deduplication functionality."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    def test_deduplicate_by_filename_basic(self):
        """Test filename deduplication."""
        files = [
            {"filename": "file1.pdf", "content_base64": "AAAA"},
            {"filename": "file2.pdf", "content_base64": "BBBB"},
            {"filename": "file1.pdf", "content_base64": "CCCC"},  # Duplicate filename
        ]

        unique, duplicates = upload_tools.deduplicate_by_filename(files)

        assert len(unique) == 2
        assert len(duplicates) == 1
        assert duplicates[0] == "file1.pdf"
        assert unique[0]["filename"] == "file1.pdf"
        assert unique[1]["filename"] == "file2.pdf"

    def test_deduplicate_by_filename_with_existing(self):
        """Test filename deduplication with existing files."""
        files = [
            {"filename": "file1.pdf", "content_base64": "AAAA"},
            {"filename": "file2.pdf", "content_base64": "BBBB"},
        ]
        existing = {"file2.pdf", "file3.pdf"}

        unique, duplicates = upload_tools.deduplicate_by_filename(files, existing)

        assert len(unique) == 1
        assert len(duplicates) == 1
        assert duplicates[0] == "file2.pdf"
        assert unique[0]["filename"] == "file1.pdf"

    def test_deduplicate_by_content(self, sample_pdf_base64):
        """Test content-based deduplication."""
        files = [
            {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
            {"filename": "file2.pdf", "content_base64": sample_pdf_base64},  # Same content
            {"filename": "file3.pdf", "content_base64": "ZGlmZmVyZW50"},  # Different
        ]

        unique, duplicates_map = upload_tools.deduplicate_by_content(files)

        assert len(unique) == 2
        assert "file2.pdf" in duplicates_map
        assert duplicates_map["file2.pdf"] == "file1.pdf"
        assert unique[0]["filename"] == "file1.pdf"
        assert unique[1]["filename"] == "file3.pdf"

    def test_deduplicate_by_content_all_unique(self, sample_pdf_base64, sample_pdf2_base64):
        """Test content deduplication when all files are unique."""
        files = [
            {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
            {"filename": "file2.pdf", "content_base64": sample_pdf2_base64},
        ]

        unique, duplicates_map = upload_tools.deduplicate_by_content(files)

        assert len(unique) == 2
        assert len(duplicates_map) == 0

    def test_deduplicate_by_content_invalid_base64(self):
        """Test content deduplication with invalid base64."""
        files = [
            {"filename": "file1.pdf", "content_base64": "invalid!!!"},
            {"filename": "file2.pdf", "content_base64": "ZGlmZmVyZW50"},
        ]

        unique, duplicates_map = upload_tools.deduplicate_by_content(files)

        # Invalid base64 is treated as unique
        assert len(unique) == 2
        assert len(duplicates_map) == 0

    @pytest.mark.asyncio
    async def test_create_session_with_duplicates(self, test_settings, sample_pdf_base64):
        """Test that duplicates are automatically removed."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Dedup Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},  # Dup filename
                {"filename": "file2.pdf", "content_base64": sample_pdf_base64},  # Dup content
            ],
            deduplicate=True
        )

        assert result["success"] is True
        assert result["files_uploaded"] == 3
        assert len(result["files_saved"]) == 1  # Only 1 unique file
        assert result["deduplication"]["enabled"] is True
        assert result["deduplication"]["total_duplicates_removed"] == 2
        assert "file1.pdf" in result["deduplication"]["duplicate_filenames_skipped"]
        assert "file2.pdf" in result["deduplication"]["duplicate_content_detected"]

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_deduplication_disabled(self, test_settings, sample_pdf_base64):
        """Test creating session with deduplication disabled."""
        result = await upload_tools.create_session_from_uploads(
            project_name="No Dedup Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.pdf", "content_base64": sample_pdf_base64},
            ],
            deduplicate=False
        )

        assert result["success"] is True
        assert len(result["files_saved"]) == 2  # Both files saved
        assert result["deduplication"]["enabled"] is False
        assert result["deduplication"]["total_duplicates_removed"] == 0

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_all_same_content(self, test_settings, sample_pdf_base64):
        """Test that same content with different filenames keeps one file."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Same Content Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.pdf", "content_base64": sample_pdf_base64},  # Same content
                {"filename": "file3.pdf", "content_base64": sample_pdf_base64},  # Same content
            ],
            deduplicate=True
        )

        # Should keep 1 file (the first one) and remove 2 duplicates
        assert result["success"] is True
        assert result["files_uploaded"] == 3
        assert len(result["files_saved"]) == 1
        assert result["deduplication"]["total_duplicates_removed"] == 2

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_with_deduplication(
        self, test_settings, sample_pdf_base64, sample_pdf2_base64
    ):
        """Test start_review_from_uploads with deduplication."""
        result = await upload_tools.start_review_from_uploads(
            project_name="Review Dedup Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.pdf", "content_base64": sample_pdf2_base64},
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},  # Dup
            ],
            deduplicate=True,
            auto_extract=False
        )

        session_result = result["session_creation"]
        assert session_result["success"] is True
        assert session_result["files_uploaded"] == 3
        assert len(session_result["files_saved"]) == 2
        assert session_result["deduplication"]["total_duplicates_removed"] == 1

        # Cleanup
        try:
            await session_tools.delete_session(session_result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors



class TestSessionDetection:
    """Test automatic session detection (Phase 2)."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_detect_existing_session_basic(self, test_settings, sample_pdf_base64):
        """Test that existing session is detected for same files."""
        # Create initial session
        result1 = await upload_tools.create_session_from_uploads(
            project_name="Test Project",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}]
        )
        session1_id = result1["session_id"]

        try:
            # Try to create again with same files - should detect existing
            result2 = await upload_tools.create_session_from_uploads(
                project_name="Test Project",  # Same name
                files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],  # Same file
                force_new_session=False  # Default: detect existing
            )

            assert result2["existing_session_detected"] is True
            assert result2["session_id"] == session1_id
            assert "Found existing session" in result2["message"]

        finally:
            await session_tools.delete_session(session1_id)

    @pytest.mark.asyncio
    async def test_force_new_session_override(self, test_settings, sample_pdf_base64):
        """Test that force_new_session creates new session despite existing match."""
        # Create initial session
        result1 = await upload_tools.create_session_from_uploads(
            project_name="Force Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}]
        )
        session1_id = result1["session_id"]

        try:
            # Force create new session with same files
            result2 = await upload_tools.create_session_from_uploads(
                project_name="Force Test",
                files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
                force_new_session=True  # Override detection
            )

            # Should create NEW session, not return existing
            assert "existing_session_detected" not in result2 or result2.get("existing_session_detected") is False
            assert result2["session_id"] != session1_id
            assert result2["success"] is True

            # Cleanup second session too
            await session_tools.delete_session(result2["session_id"])

        finally:
            await session_tools.delete_session(session1_id)



class TestPathBasedUploads:
    """Test path-based file upload functionality (Phase 3)."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.fixture
    def temp_pdf_file(self, tmp_path, sample_pdf_base64):
        """Create a temporary PDF file for testing path-based uploads."""
        pdf_path = tmp_path / "test_document.pdf"
        pdf_content = base64.b64decode(sample_pdf_base64)
        pdf_path.write_bytes(pdf_content)
        return pdf_path

    @pytest.fixture
    def temp_pdf_file2(self, tmp_path, sample_pdf2_base64):
        """Create a second temporary PDF file."""
        pdf_path = tmp_path / "test_document2.pdf"
        pdf_content = base64.b64decode(sample_pdf2_base64)
        pdf_path.write_bytes(pdf_content)
        return pdf_path

    def test_process_file_input_base64_format(self, sample_pdf_base64):
        """Test process_file_input with base64 format."""
        file_obj = {
            "filename": "test.pdf",
            "content_base64": sample_pdf_base64
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == "test.pdf"
        assert result["content_base64"] == sample_pdf_base64

    def test_process_file_input_path_format(self, temp_pdf_file, sample_pdf_base64):
        """Test process_file_input with path format."""
        file_obj = {
            "filename": "test.pdf",
            "path": str(temp_pdf_file)
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == "test.pdf"
        assert result["content_base64"] == sample_pdf_base64

    def test_process_file_input_name_field_compatibility(self, temp_pdf_file):
        """Test that 'name' field works as alternative to 'filename' (ElizaOS compatibility)."""
        file_obj = {
            "name": "test.pdf",
            "path": str(temp_pdf_file)
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == "test.pdf"

    def test_process_file_input_missing_filename(self):
        """Test error when both filename and name are missing."""
        file_obj = {
            "content_base64": "abc123"
        }

        with pytest.raises(ValueError, match="missing 'filename' or 'name' field"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_missing_content_and_path(self):
        """Test error when neither content_base64 nor path is provided."""
        file_obj = {
            "filename": "test.pdf"
        }

        with pytest.raises(ValueError, match="must have either 'content_base64' or 'path' field"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_relative_path_rejected(self, tmp_path):
        """Test that relative paths are rejected for security."""
        file_obj = {
            "filename": "test.pdf",
            "path": "relative/path/to/file.pdf"
        }

        with pytest.raises(ValueError, match="Only absolute paths are allowed"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_nonexistent_path(self):
        """Test error when path doesn't exist."""
        file_obj = {
            "filename": "test.pdf",
            "path": "/nonexistent/path/to/file.pdf"
        }

        with pytest.raises(ValueError, match="path does not exist"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_directory_path_rejected(self, tmp_path):
        """Test that directory paths are rejected."""
        file_obj = {
            "filename": "test.pdf",
            "path": str(tmp_path)  # Directory, not file
        }

        with pytest.raises(ValueError, match="path is not a file"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_empty_base64(self):
        """Test error when base64 content is empty."""
        file_obj = {
            "filename": "test.pdf",
            "content_base64": "   "  # Empty/whitespace
        }

        with pytest.raises(ValueError, match="empty content_base64"):
            upload_tools.process_file_input(file_obj, 0)

    @pytest.mark.asyncio
    async def test_create_session_with_path_format(self, test_settings, temp_pdf_file):
        """Test creating session with path-based file input."""
        files = [
            {
                "filename": "test.pdf",
                "path": str(temp_pdf_file)
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Path Test Project",
            files=files
        )

        try:
            assert result["success"] is True
            assert result["files_saved"] == ["test.pdf"]
            assert result["documents_found"] == 1

        finally:
            await session_tools.delete_session(result["session_id"])

    @pytest.mark.asyncio
    async def test_create_session_mixed_formats(self, test_settings, temp_pdf_file, sample_pdf2_base64):
        """Test creating session with mix of path and base64 files."""
        files = [
            {
                "filename": "path_file.pdf",
                "path": str(temp_pdf_file)
            },
            {
                "filename": "base64_file.pdf",
                "content_base64": sample_pdf2_base64
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Mixed Format Project",
            files=files
        )

        try:
            assert result["success"] is True
            assert result["files_saved"] == ["path_file.pdf", "base64_file.pdf"]
            assert result["documents_found"] == 2

        finally:
            await session_tools.delete_session(result["session_id"])

    @pytest.mark.asyncio
    async def test_upload_additional_files_path_format(self, test_settings, temp_pdf_file, temp_pdf_file2, sample_pdf_base64):
        """Test uploading additional files using path format."""
        # Create initial session with base64
        initial_result = await upload_tools.create_session_from_uploads(
            project_name="Additional Files Test",
            files=[{"filename": "initial.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = initial_result["session_id"]

        try:
            # Add files using path format
            additional_result = await upload_tools.upload_additional_files(
                session_id=session_id,
                files=[
                    {"filename": "additional1.pdf", "path": str(temp_pdf_file2)}
                ]
            )

            assert additional_result["success"] is True
            assert "additional1.pdf" in additional_result["files_added"]
            assert additional_result["documents_found"] == 2

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_start_review_with_path_format(self, test_settings, temp_pdf_file, temp_pdf_file2):
        """Test complete workflow with path-based uploads."""
        files = [
            {"name": "ProjectPlan.pdf", "path": str(temp_pdf_file)},
            {"name": "Baseline.pdf", "path": str(temp_pdf_file2)}
        ]

        result = await upload_tools.start_review_from_uploads(
            project_name="Full Workflow Path Test",
            files=files,
            auto_extract=False  # Skip evidence extraction for speed
        )

        session_result = result["session_creation"]
        session_id = session_result["session_id"]

        try:
            assert session_result["success"] is True
            assert session_result["documents_found"] == 2

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_path_deduplication_works(self, test_settings, temp_pdf_file):
        """Test that deduplication works with path-based uploads."""
        # Same file uploaded twice via path
        files = [
            {"filename": "file1.pdf", "path": str(temp_pdf_file)},
            {"filename": "file2.pdf", "path": str(temp_pdf_file)}  # Same file, different name
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Path Deduplication Test",
            files=files,
            deduplicate=True
        )

        try:
            # Should detect content duplication
            assert result["deduplication"]["total_duplicates_removed"] == 1
            assert len(result["files_saved"]) == 1

        finally:
            await session_tools.delete_session(result["session_id"])

    def test_process_file_input_path_without_filename(self, temp_pdf_file):
        """Test that filename is extracted from path when not provided (ElizaOS compatibility)."""
        file_obj = {
            "path": str(temp_pdf_file)
            # No 'filename' or 'name' field
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == temp_pdf_file.name
        assert "content_base64" in result

    def test_process_file_input_explicit_filename_takes_precedence(self, temp_pdf_file):
        """Test that explicit filename takes precedence over path extraction."""
        file_obj = {
            "filename": "custom_name.pdf",
            "path": str(temp_pdf_file)  # Has different name
        }

        result = upload_tools.process_file_input(file_obj, 0)

        # Should use explicit filename, not extracted from path
        assert result["filename"] == "custom_name.pdf"
        assert result["filename"] != temp_pdf_file.name

    @pytest.mark.asyncio
    async def test_create_session_path_only_format(self, test_settings, temp_pdf_file):
        """Test creating session with path-only format (no filename field)."""
        files = [
            {
                "path": str(temp_pdf_file)
                # No filename - should extract from path
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Path Only Test",
            files=files
        )

        try:
            assert result["success"] is True
            assert temp_pdf_file.name in result["files_saved"]
            assert result["documents_found"] == 1

        finally:
            await session_tools.delete_session(result["session_id"])


class TestPathResolution:
    """Test ElizaOS path resolution functionality."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    def test_resolve_absolute_path_exists(self, tmp_path, sample_pdf_base64):
        """Test that absolute paths that exist are used as-is."""
        test_file = tmp_path / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        resolved = upload_tools._resolve_file_path(str(test_file))
        assert resolved == test_file
        assert resolved.exists()

    def test_resolve_eliza_media_url_cwd(self, tmp_path, sample_pdf_base64):
        """Test resolving ElizaOS /media/uploads/ URL from current directory."""
        # Create mock ElizaOS structure in temp directory
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / "packages/cli/.eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        # Save current directory and change to eliza_root
        import os
        original_cwd = os.getcwd()
        try:
            os.chdir(eliza_root)

            # Try to resolve ElizaOS URL
            url_path = "/media/uploads/agents/abc123/test.pdf"
            resolved = upload_tools._resolve_file_path(url_path)

            assert resolved.exists()
            assert resolved == test_file

        finally:
            os.chdir(original_cwd)

    def test_resolve_eliza_media_url_env_var(self, tmp_path, sample_pdf_base64, monkeypatch):
        """Test resolving ElizaOS URL using ELIZA_ROOT environment variable."""
        # Create mock ElizaOS structure
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / "packages/cli/.eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        # Set ELIZA_ROOT environment variable
        monkeypatch.setenv('ELIZA_ROOT', str(eliza_root))

        # Try to resolve ElizaOS URL
        url_path = "/media/uploads/agents/abc123/test.pdf"
        resolved = upload_tools._resolve_file_path(url_path)

        assert resolved.exists()
        assert resolved == test_file

    def test_resolve_eliza_media_url_alternative_structure(self, tmp_path, sample_pdf_base64, monkeypatch):
        """Test resolving ElizaOS URL with alternative directory structure (no packages/cli)."""
        # Create alternative structure: .eliza/data/ directly under root
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / ".eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        monkeypatch.setenv('ELIZA_ROOT', str(eliza_root))

        url_path = "/media/uploads/agents/abc123/test.pdf"
        resolved = upload_tools._resolve_file_path(url_path)

        assert resolved.exists()
        assert resolved == test_file

    def test_resolve_nonexistent_path_returns_original(self):
        """Test that nonexistent paths are returned as-is (will fail later validation)."""
        fake_path = "/nonexistent/path/to/file.pdf"
        resolved = upload_tools._resolve_file_path(fake_path)

        # Should return original path (Path object)
        assert str(resolved) == fake_path
        assert not resolved.exists()

    def test_resolve_relative_path_from_cwd(self, tmp_path, sample_pdf_base64):
        """Test resolving relative path from current working directory."""
        # Create file in temp directory
        test_file = tmp_path / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        import os
        original_cwd = os.getcwd()
        try:
            os.chdir(tmp_path)

            # Relative path should resolve from cwd
            resolved = upload_tools._resolve_file_path("test.pdf")

            assert resolved.exists()
            assert resolved == test_file

        finally:
            os.chdir(original_cwd)

    @pytest.mark.asyncio
    async def test_create_session_with_eliza_url_path(self, tmp_path, test_settings, sample_pdf_base64, monkeypatch):
        """Test creating session with ElizaOS /media/uploads/ URL path."""
        # Create mock ElizaOS structure
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / "packages/cli/.eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "ProjectPlan.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        # Set ELIZA_ROOT environment variable
        monkeypatch.setenv('ELIZA_ROOT', str(eliza_root))

        # Use ElizaOS URL format
        files = [
            {
                "path": "/media/uploads/agents/abc123/ProjectPlan.pdf"
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="ElizaOS URL Test",
            files=files
        )

        try:
            assert result["success"] is True
            assert "ProjectPlan.pdf" in result["files_saved"]
            assert result["documents_found"] == 1

        finally:
            await session_tools.delete_session(result["session_id"])

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gaiaaiagent/regen-registry-review-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_upload_tools.py•43.5 KiB

"""Tests for file upload tools."""

import pytest
import base64
from pathlib import Path

from registry_review_mcp.tools import upload_tools, session_tools
from registry_review_mcp.models.errors import SessionNotFoundError


@pytest.fixture
def sample_pdf_base64():
    """Create a minimal test PDF encoded as base64."""
    # Minimal valid PDF structure
    pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Resources <<\n/Font <<\n/F1 <<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Times-Roman\n>>\n>>\n>>\n/MediaBox [0 0 612 792]\n>>\nendobj\nxref\n0 4\n0000000000 65535 f\n0000000009 00000 n\n0000000074 00000 n\n0000000120 00000 n\ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\nstartxref\n149\n%%EOF"
    return base64.b64encode(pdf_content).decode('utf-8')


@pytest.fixture
def sample_text_base64():
    """Create a simple text file encoded as base64."""
    text_content = b"This is a test document for the registry review system."
    return base64.b64encode(text_content).decode('utf-8')


@pytest.fixture
def sample_pdf2_base64():
    """Create a different minimal PDF encoded as base64 (with different content)."""
    # Different PDF with unique content to avoid deduplication
    pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Resources <<\n/Font <<\n/F1 <<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Helvetica\n>>\n>>\n>>\n/MediaBox [0 0 612 792]\n/Contents 4 0 R\n>>\nendobj\n4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n100 700 Td\n(Different Content) Tj\nET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000074 00000 n\n0000000120 00000 n\n0000000298 00000 n\ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n393\n%%EOF"
    return base64.b64encode(pdf_content).decode('utf-8')


@pytest.fixture
def sample_pdf3_base64():
    """Create a third different minimal PDF encoded as base64."""
    # Another unique PDF content
    pdf_content = b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Resources <<\n/Font <<\n/F1 <<\n/Type /Font\n/Subtype /Type1\n/BaseFont /Courier\n>>\n>>\n>>\n/MediaBox [0 0 612 792]\n/Contents 4 0 R\n>>\nendobj\n4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n100 700 Td\n(Third File Content) Tj\nET\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000074 00000 n\n0000000120 00000 n\n0000000296 00000 n\ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n391\n%%EOF"
    return base64.b64encode(pdf_content).decode('utf-8')


class TestSanitizeProjectName:
    """Test project name sanitization."""

    def test_sanitize_basic_name(self):
        """Test basic project name sanitization."""
        result = upload_tools._sanitize_project_name("Botany Farm 2022")
        assert result == "botany-farm-2022"

    def test_sanitize_special_chars(self):
        """Test sanitization removes special characters."""
        result = upload_tools._sanitize_project_name("Project @#$% Name!")
        assert result == "project-name"

    def test_sanitize_multiple_spaces(self):
        """Test multiple spaces become single hyphens."""
        result = upload_tools._sanitize_project_name("Project    Name")
        assert result == "project-name"

    def test_sanitize_leading_trailing_hyphens(self):
        """Test leading/trailing hyphens are removed."""
        result = upload_tools._sanitize_project_name("  Project Name  ")
        assert result == "project-name"


class TestCreateSessionFromUploads:
    """Test create_session_from_uploads business logic."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_create_session_success(self, test_settings, sample_pdf_base64):
        """Test successful session creation from uploads."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Test Project",
            files=[
                {
                    "filename": "test.pdf",
                    "content_base64": sample_pdf_base64,
                    "mime_type": "application/pdf"
                }
            ],
            methodology="soil-carbon-v1.2.2"
        )

        # Verify result structure
        assert result["success"] is True
        assert "session_id" in result
        assert result["session_id"].startswith("session-")
        assert result["files_saved"] == ["test.pdf"]
        assert result["documents_found"] >= 1
        assert "documents_directory" in result

        # Verify documents directory was created and contains file
        temp_dir = Path(result["documents_directory"])
        assert temp_dir.exists()
        assert (temp_dir / "test.pdf").exists()

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_multiple_files(
        self, test_settings, sample_pdf_base64, sample_pdf2_base64, sample_text_base64
    ):
        """Test creating session with multiple files."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Multi File Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.txt", "content_base64": sample_text_base64},
                {"filename": "file3.pdf", "content_base64": sample_pdf2_base64},  # Use different PDF
            ],
        )

        assert result["success"] is True
        assert len(result["files_saved"]) == 3
        assert "file1.pdf" in result["files_saved"]
        assert "file2.txt" in result["files_saved"]
        assert "file3.pdf" in result["files_saved"]

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_missing_project_name(self, test_settings):
        """Test error when project_name is missing."""
        with pytest.raises(ValueError, match="project_name is required"):
            await upload_tools.create_session_from_uploads(
                project_name="",
                files=[{"filename": "test.pdf", "content_base64": "abc"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_missing_project_name_whitespace(self, test_settings):
        """Test error when project_name is only whitespace."""
        with pytest.raises(ValueError, match="project_name is required"):
            await upload_tools.create_session_from_uploads(
                project_name="   ",
                files=[{"filename": "test.pdf", "content_base64": "abc"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_no_files(self, test_settings):
        """Test error when files array is empty."""
        with pytest.raises(ValueError, match="At least one file is required"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[]
            )

    @pytest.mark.asyncio
    async def test_create_session_missing_filename(self, test_settings, sample_pdf_base64):
        """Test error when file is missing filename."""
        with pytest.raises(ValueError, match="missing 'filename'"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[{"content_base64": sample_pdf_base64}]
            )

    @pytest.mark.asyncio
    async def test_create_session_missing_content(self, test_settings):
        """Test error when file is missing both content_base64 and path."""
        with pytest.raises(ValueError, match="must have either 'content_base64' or 'path' field"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[{"filename": "test.pdf"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_invalid_base64(self, test_settings):
        """Test error when base64 content is invalid."""
        with pytest.raises(ValueError, match="Failed to decode base64"):
            await upload_tools.create_session_from_uploads(
                project_name="Test",
                files=[{"filename": "test.pdf", "content_base64": "not-valid-base64!!!"}]
            )

    @pytest.mark.asyncio
    async def test_create_session_with_all_metadata(self, test_settings, sample_pdf_base64):
        """Test session creation with all optional metadata."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Full Metadata Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
            methodology="soil-carbon-v1.2.2",
            project_id="C06-1234",
            proponent="Test Proponent Inc.",
            crediting_period="2022-2032",
        )

        assert result["success"] is True

        # Load session and verify metadata
        session_data = await session_tools.load_session(result["session_id"])
        assert session_data["project_metadata"]["project_id"] == "C06-1234"
        assert session_data["project_metadata"]["proponent"] == "Test Proponent Inc."
        assert session_data["project_metadata"]["crediting_period"] == "2022-2032"

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors


class TestUploadAdditionalFiles:
    """Test upload_additional_files business logic."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_upload_additional_files_success(self, test_settings, sample_pdf_base64, sample_pdf2_base64):
        """Test adding files to existing session."""
        # First create a session
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test Project",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            # Add another file with different content
            result = await upload_tools.upload_additional_files(
                session_id=session_id,
                files=[{"filename": "file2.pdf", "content_base64": sample_pdf2_base64}]
            )

            assert result["success"] is True
            assert result["session_id"] == session_id
            assert result["files_added"] == ["file2.pdf"]
            assert result["documents_found"] >= 2

            # Verify file was written
            session_data = await session_tools.load_session(session_id)
            docs_path = Path(session_data["project_metadata"]["documents_path"])
            assert (docs_path / "file2.pdf").exists()

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_upload_additional_multiple_files(self, test_settings, sample_pdf_base64, sample_pdf2_base64, sample_pdf3_base64):
        """Test adding multiple files at once."""
        # Create session
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            # Add multiple files with different content
            result = await upload_tools.upload_additional_files(
                session_id=session_id,
                files=[
                    {"filename": "file2.pdf", "content_base64": sample_pdf2_base64},
                    {"filename": "file3.pdf", "content_base64": sample_pdf3_base64},
                ]
            )

            assert len(result["files_added"]) == 2
            assert "file2.pdf" in result["files_added"]
            assert "file3.pdf" in result["files_added"]
            assert result["documents_found"] >= 3

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_upload_additional_files_duplicate_filename(self, test_settings, sample_pdf_base64):
        """Test error when uploading file with duplicate filename."""
        # Create session with file1.pdf
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            # Try to add another file1.pdf
            with pytest.raises(ValueError, match="File already exists"):
                await upload_tools.upload_additional_files(
                    session_id=session_id,
                    files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
                )

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_upload_additional_files_session_not_found(self, test_settings, sample_pdf_base64):
        """Test error when session doesn't exist."""
        with pytest.raises(SessionNotFoundError):
            await upload_tools.upload_additional_files(
                session_id="session-000000000000",
                files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}]
            )

    @pytest.mark.asyncio
    async def test_upload_additional_files_no_files(self, test_settings, sample_pdf_base64):
        """Test error when files array is empty."""
        # Create session
        session_result = await upload_tools.create_session_from_uploads(
            project_name="Test",
            files=[{"filename": "file1.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = session_result["session_id"]

        try:
            with pytest.raises(ValueError, match="At least one file is required"):
                await upload_tools.upload_additional_files(
                    session_id=session_id,
                    files=[]
                )

        finally:
            await session_tools.delete_session(session_id)


class TestStartReviewFromUploads:
    """Test start_review_from_uploads business logic."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_start_review_full_workflow(self, test_settings, sample_pdf_base64, sample_pdf2_base64):
        """Test complete review workflow with auto-extraction."""
        result = await upload_tools.start_review_from_uploads(
            project_name="Full Test",
            files=[
                {"filename": "ProjectPlan.pdf", "content_base64": sample_pdf_base64},
                {"filename": "BaselineReport.pdf", "content_base64": sample_pdf2_base64}  # Use different PDF
            ],
            auto_extract=True
        )

        assert "session_creation" in result
        assert "evidence_extraction" in result

        session_result = result["session_creation"]
        assert session_result["success"] is True
        assert len(session_result["files_saved"]) == 2
        assert "ProjectPlan.pdf" in session_result["files_saved"]
        assert "BaselineReport.pdf" in session_result["files_saved"]

        # Evidence extraction may succeed or fail gracefully
        evidence = result["evidence_extraction"]
        assert evidence is not None

        # Cleanup
        try:
            await session_tools.delete_session(session_result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_no_auto_extract(self, test_settings, sample_pdf_base64):
        """Test workflow without auto-extraction."""
        result = await upload_tools.start_review_from_uploads(
            project_name="No Extract Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
            auto_extract=False
        )

        assert "session_creation" in result
        assert "evidence_extraction" not in result  # Should not extract

        # Cleanup
        try:
            await session_tools.delete_session(result["session_creation"]["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_with_metadata(self, test_settings, sample_pdf_base64):
        """Test start review with all metadata fields."""
        result = await upload_tools.start_review_from_uploads(
            project_name="Metadata Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
            methodology="soil-carbon-v1.2.2",
            project_id="C06-5678",
            proponent="Test Org",
            crediting_period="2023-2033",
            auto_extract=False
        )

        session_result = result["session_creation"]
        assert session_result["success"] is True

        # Verify metadata was passed through
        session_data = await session_tools.load_session(session_result["session_id"])
        assert session_data["project_metadata"]["project_id"] == "C06-5678"
        assert session_data["project_metadata"]["proponent"] == "Test Org"
        assert session_data["project_metadata"]["crediting_period"] == "2023-2033"

        # Cleanup
        try:
            await session_tools.delete_session(session_result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_invalid_inputs(self, test_settings):
        """Test error handling for invalid inputs."""
        with pytest.raises(ValueError, match="project_name is required"):
            await upload_tools.start_review_from_uploads(
                project_name="",
                files=[{"filename": "test.pdf", "content_base64": "abc"}]
            )

        with pytest.raises(ValueError, match="At least one file is required"):
            await upload_tools.start_review_from_uploads(
                project_name="Test",
                files=[]
            )


class TestDeduplication:
    """Test file deduplication functionality."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    def test_deduplicate_by_filename_basic(self):
        """Test filename deduplication."""
        files = [
            {"filename": "file1.pdf", "content_base64": "AAAA"},
            {"filename": "file2.pdf", "content_base64": "BBBB"},
            {"filename": "file1.pdf", "content_base64": "CCCC"},  # Duplicate filename
        ]

        unique, duplicates = upload_tools.deduplicate_by_filename(files)

        assert len(unique) == 2
        assert len(duplicates) == 1
        assert duplicates[0] == "file1.pdf"
        assert unique[0]["filename"] == "file1.pdf"
        assert unique[1]["filename"] == "file2.pdf"

    def test_deduplicate_by_filename_with_existing(self):
        """Test filename deduplication with existing files."""
        files = [
            {"filename": "file1.pdf", "content_base64": "AAAA"},
            {"filename": "file2.pdf", "content_base64": "BBBB"},
        ]
        existing = {"file2.pdf", "file3.pdf"}

        unique, duplicates = upload_tools.deduplicate_by_filename(files, existing)

        assert len(unique) == 1
        assert len(duplicates) == 1
        assert duplicates[0] == "file2.pdf"
        assert unique[0]["filename"] == "file1.pdf"

    def test_deduplicate_by_content(self, sample_pdf_base64):
        """Test content-based deduplication."""
        files = [
            {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
            {"filename": "file2.pdf", "content_base64": sample_pdf_base64},  # Same content
            {"filename": "file3.pdf", "content_base64": "ZGlmZmVyZW50"},  # Different
        ]

        unique, duplicates_map = upload_tools.deduplicate_by_content(files)

        assert len(unique) == 2
        assert "file2.pdf" in duplicates_map
        assert duplicates_map["file2.pdf"] == "file1.pdf"
        assert unique[0]["filename"] == "file1.pdf"
        assert unique[1]["filename"] == "file3.pdf"

    def test_deduplicate_by_content_all_unique(self, sample_pdf_base64, sample_pdf2_base64):
        """Test content deduplication when all files are unique."""
        files = [
            {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
            {"filename": "file2.pdf", "content_base64": sample_pdf2_base64},
        ]

        unique, duplicates_map = upload_tools.deduplicate_by_content(files)

        assert len(unique) == 2
        assert len(duplicates_map) == 0

    def test_deduplicate_by_content_invalid_base64(self):
        """Test content deduplication with invalid base64."""
        files = [
            {"filename": "file1.pdf", "content_base64": "invalid!!!"},
            {"filename": "file2.pdf", "content_base64": "ZGlmZmVyZW50"},
        ]

        unique, duplicates_map = upload_tools.deduplicate_by_content(files)

        # Invalid base64 is treated as unique
        assert len(unique) == 2
        assert len(duplicates_map) == 0

    @pytest.mark.asyncio
    async def test_create_session_with_duplicates(self, test_settings, sample_pdf_base64):
        """Test that duplicates are automatically removed."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Dedup Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},  # Dup filename
                {"filename": "file2.pdf", "content_base64": sample_pdf_base64},  # Dup content
            ],
            deduplicate=True
        )

        assert result["success"] is True
        assert result["files_uploaded"] == 3
        assert len(result["files_saved"]) == 1  # Only 1 unique file
        assert result["deduplication"]["enabled"] is True
        assert result["deduplication"]["total_duplicates_removed"] == 2
        assert "file1.pdf" in result["deduplication"]["duplicate_filenames_skipped"]
        assert "file2.pdf" in result["deduplication"]["duplicate_content_detected"]

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_deduplication_disabled(self, test_settings, sample_pdf_base64):
        """Test creating session with deduplication disabled."""
        result = await upload_tools.create_session_from_uploads(
            project_name="No Dedup Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.pdf", "content_base64": sample_pdf_base64},
            ],
            deduplicate=False
        )

        assert result["success"] is True
        assert len(result["files_saved"]) == 2  # Both files saved
        assert result["deduplication"]["enabled"] is False
        assert result["deduplication"]["total_duplicates_removed"] == 0

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_create_session_all_same_content(self, test_settings, sample_pdf_base64):
        """Test that same content with different filenames keeps one file."""
        result = await upload_tools.create_session_from_uploads(
            project_name="Same Content Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.pdf", "content_base64": sample_pdf_base64},  # Same content
                {"filename": "file3.pdf", "content_base64": sample_pdf_base64},  # Same content
            ],
            deduplicate=True
        )

        # Should keep 1 file (the first one) and remove 2 duplicates
        assert result["success"] is True
        assert result["files_uploaded"] == 3
        assert len(result["files_saved"]) == 1
        assert result["deduplication"]["total_duplicates_removed"] == 2

        # Cleanup
        try:
            await session_tools.delete_session(result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors

    @pytest.mark.asyncio
    async def test_start_review_with_deduplication(
        self, test_settings, sample_pdf_base64, sample_pdf2_base64
    ):
        """Test start_review_from_uploads with deduplication."""
        result = await upload_tools.start_review_from_uploads(
            project_name="Review Dedup Test",
            files=[
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},
                {"filename": "file2.pdf", "content_base64": sample_pdf2_base64},
                {"filename": "file1.pdf", "content_base64": sample_pdf_base64},  # Dup
            ],
            deduplicate=True,
            auto_extract=False
        )

        session_result = result["session_creation"]
        assert session_result["success"] is True
        assert session_result["files_uploaded"] == 3
        assert len(session_result["files_saved"]) == 2
        assert session_result["deduplication"]["total_duplicates_removed"] == 1

        # Cleanup
        try:
            await session_tools.delete_session(session_result["session_id"])
        except Exception:
            pass  # Ignore cleanup errors



class TestSessionDetection:
    """Test automatic session detection (Phase 2)."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.mark.asyncio
    async def test_detect_existing_session_basic(self, test_settings, sample_pdf_base64):
        """Test that existing session is detected for same files."""
        # Create initial session
        result1 = await upload_tools.create_session_from_uploads(
            project_name="Test Project",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}]
        )
        session1_id = result1["session_id"]

        try:
            # Try to create again with same files - should detect existing
            result2 = await upload_tools.create_session_from_uploads(
                project_name="Test Project",  # Same name
                files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],  # Same file
                force_new_session=False  # Default: detect existing
            )

            assert result2["existing_session_detected"] is True
            assert result2["session_id"] == session1_id
            assert "Found existing session" in result2["message"]

        finally:
            await session_tools.delete_session(session1_id)

    @pytest.mark.asyncio
    async def test_force_new_session_override(self, test_settings, sample_pdf_base64):
        """Test that force_new_session creates new session despite existing match."""
        # Create initial session
        result1 = await upload_tools.create_session_from_uploads(
            project_name="Force Test",
            files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}]
        )
        session1_id = result1["session_id"]

        try:
            # Force create new session with same files
            result2 = await upload_tools.create_session_from_uploads(
                project_name="Force Test",
                files=[{"filename": "test.pdf", "content_base64": sample_pdf_base64}],
                force_new_session=True  # Override detection
            )

            # Should create NEW session, not return existing
            assert "existing_session_detected" not in result2 or result2.get("existing_session_detected") is False
            assert result2["session_id"] != session1_id
            assert result2["success"] is True

            # Cleanup second session too
            await session_tools.delete_session(result2["session_id"])

        finally:
            await session_tools.delete_session(session1_id)



class TestPathBasedUploads:
    """Test path-based file upload functionality (Phase 3)."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    @pytest.fixture
    def temp_pdf_file(self, tmp_path, sample_pdf_base64):
        """Create a temporary PDF file for testing path-based uploads."""
        pdf_path = tmp_path / "test_document.pdf"
        pdf_content = base64.b64decode(sample_pdf_base64)
        pdf_path.write_bytes(pdf_content)
        return pdf_path

    @pytest.fixture
    def temp_pdf_file2(self, tmp_path, sample_pdf2_base64):
        """Create a second temporary PDF file."""
        pdf_path = tmp_path / "test_document2.pdf"
        pdf_content = base64.b64decode(sample_pdf2_base64)
        pdf_path.write_bytes(pdf_content)
        return pdf_path

    def test_process_file_input_base64_format(self, sample_pdf_base64):
        """Test process_file_input with base64 format."""
        file_obj = {
            "filename": "test.pdf",
            "content_base64": sample_pdf_base64
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == "test.pdf"
        assert result["content_base64"] == sample_pdf_base64

    def test_process_file_input_path_format(self, temp_pdf_file, sample_pdf_base64):
        """Test process_file_input with path format."""
        file_obj = {
            "filename": "test.pdf",
            "path": str(temp_pdf_file)
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == "test.pdf"
        assert result["content_base64"] == sample_pdf_base64

    def test_process_file_input_name_field_compatibility(self, temp_pdf_file):
        """Test that 'name' field works as alternative to 'filename' (ElizaOS compatibility)."""
        file_obj = {
            "name": "test.pdf",
            "path": str(temp_pdf_file)
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == "test.pdf"

    def test_process_file_input_missing_filename(self):
        """Test error when both filename and name are missing."""
        file_obj = {
            "content_base64": "abc123"
        }

        with pytest.raises(ValueError, match="missing 'filename' or 'name' field"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_missing_content_and_path(self):
        """Test error when neither content_base64 nor path is provided."""
        file_obj = {
            "filename": "test.pdf"
        }

        with pytest.raises(ValueError, match="must have either 'content_base64' or 'path' field"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_relative_path_rejected(self, tmp_path):
        """Test that relative paths are rejected for security."""
        file_obj = {
            "filename": "test.pdf",
            "path": "relative/path/to/file.pdf"
        }

        with pytest.raises(ValueError, match="Only absolute paths are allowed"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_nonexistent_path(self):
        """Test error when path doesn't exist."""
        file_obj = {
            "filename": "test.pdf",
            "path": "/nonexistent/path/to/file.pdf"
        }

        with pytest.raises(ValueError, match="path does not exist"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_directory_path_rejected(self, tmp_path):
        """Test that directory paths are rejected."""
        file_obj = {
            "filename": "test.pdf",
            "path": str(tmp_path)  # Directory, not file
        }

        with pytest.raises(ValueError, match="path is not a file"):
            upload_tools.process_file_input(file_obj, 0)

    def test_process_file_input_empty_base64(self):
        """Test error when base64 content is empty."""
        file_obj = {
            "filename": "test.pdf",
            "content_base64": "   "  # Empty/whitespace
        }

        with pytest.raises(ValueError, match="empty content_base64"):
            upload_tools.process_file_input(file_obj, 0)

    @pytest.mark.asyncio
    async def test_create_session_with_path_format(self, test_settings, temp_pdf_file):
        """Test creating session with path-based file input."""
        files = [
            {
                "filename": "test.pdf",
                "path": str(temp_pdf_file)
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Path Test Project",
            files=files
        )

        try:
            assert result["success"] is True
            assert result["files_saved"] == ["test.pdf"]
            assert result["documents_found"] == 1

        finally:
            await session_tools.delete_session(result["session_id"])

    @pytest.mark.asyncio
    async def test_create_session_mixed_formats(self, test_settings, temp_pdf_file, sample_pdf2_base64):
        """Test creating session with mix of path and base64 files."""
        files = [
            {
                "filename": "path_file.pdf",
                "path": str(temp_pdf_file)
            },
            {
                "filename": "base64_file.pdf",
                "content_base64": sample_pdf2_base64
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Mixed Format Project",
            files=files
        )

        try:
            assert result["success"] is True
            assert result["files_saved"] == ["path_file.pdf", "base64_file.pdf"]
            assert result["documents_found"] == 2

        finally:
            await session_tools.delete_session(result["session_id"])

    @pytest.mark.asyncio
    async def test_upload_additional_files_path_format(self, test_settings, temp_pdf_file, temp_pdf_file2, sample_pdf_base64):
        """Test uploading additional files using path format."""
        # Create initial session with base64
        initial_result = await upload_tools.create_session_from_uploads(
            project_name="Additional Files Test",
            files=[{"filename": "initial.pdf", "content_base64": sample_pdf_base64}]
        )

        session_id = initial_result["session_id"]

        try:
            # Add files using path format
            additional_result = await upload_tools.upload_additional_files(
                session_id=session_id,
                files=[
                    {"filename": "additional1.pdf", "path": str(temp_pdf_file2)}
                ]
            )

            assert additional_result["success"] is True
            assert "additional1.pdf" in additional_result["files_added"]
            assert additional_result["documents_found"] == 2

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_start_review_with_path_format(self, test_settings, temp_pdf_file, temp_pdf_file2):
        """Test complete workflow with path-based uploads."""
        files = [
            {"name": "ProjectPlan.pdf", "path": str(temp_pdf_file)},
            {"name": "Baseline.pdf", "path": str(temp_pdf_file2)}
        ]

        result = await upload_tools.start_review_from_uploads(
            project_name="Full Workflow Path Test",
            files=files,
            auto_extract=False  # Skip evidence extraction for speed
        )

        session_result = result["session_creation"]
        session_id = session_result["session_id"]

        try:
            assert session_result["success"] is True
            assert session_result["documents_found"] == 2

        finally:
            await session_tools.delete_session(session_id)

    @pytest.mark.asyncio
    async def test_path_deduplication_works(self, test_settings, temp_pdf_file):
        """Test that deduplication works with path-based uploads."""
        # Same file uploaded twice via path
        files = [
            {"filename": "file1.pdf", "path": str(temp_pdf_file)},
            {"filename": "file2.pdf", "path": str(temp_pdf_file)}  # Same file, different name
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Path Deduplication Test",
            files=files,
            deduplicate=True
        )

        try:
            # Should detect content duplication
            assert result["deduplication"]["total_duplicates_removed"] == 1
            assert len(result["files_saved"]) == 1

        finally:
            await session_tools.delete_session(result["session_id"])

    def test_process_file_input_path_without_filename(self, temp_pdf_file):
        """Test that filename is extracted from path when not provided (ElizaOS compatibility)."""
        file_obj = {
            "path": str(temp_pdf_file)
            # No 'filename' or 'name' field
        }

        result = upload_tools.process_file_input(file_obj, 0)

        assert result["filename"] == temp_pdf_file.name
        assert "content_base64" in result

    def test_process_file_input_explicit_filename_takes_precedence(self, temp_pdf_file):
        """Test that explicit filename takes precedence over path extraction."""
        file_obj = {
            "filename": "custom_name.pdf",
            "path": str(temp_pdf_file)  # Has different name
        }

        result = upload_tools.process_file_input(file_obj, 0)

        # Should use explicit filename, not extracted from path
        assert result["filename"] == "custom_name.pdf"
        assert result["filename"] != temp_pdf_file.name

    @pytest.mark.asyncio
    async def test_create_session_path_only_format(self, test_settings, temp_pdf_file):
        """Test creating session with path-only format (no filename field)."""
        files = [
            {
                "path": str(temp_pdf_file)
                # No filename - should extract from path
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="Path Only Test",
            files=files
        )

        try:
            assert result["success"] is True
            assert temp_pdf_file.name in result["files_saved"]
            assert result["documents_found"] == 1

        finally:
            await session_tools.delete_session(result["session_id"])


class TestPathResolution:
    """Test ElizaOS path resolution functionality."""

    @pytest.fixture(autouse=True)
    def _cleanup(self, cleanup_sessions):
        pass

    def test_resolve_absolute_path_exists(self, tmp_path, sample_pdf_base64):
        """Test that absolute paths that exist are used as-is."""
        test_file = tmp_path / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        resolved = upload_tools._resolve_file_path(str(test_file))
        assert resolved == test_file
        assert resolved.exists()

    def test_resolve_eliza_media_url_cwd(self, tmp_path, sample_pdf_base64):
        """Test resolving ElizaOS /media/uploads/ URL from current directory."""
        # Create mock ElizaOS structure in temp directory
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / "packages/cli/.eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        # Save current directory and change to eliza_root
        import os
        original_cwd = os.getcwd()
        try:
            os.chdir(eliza_root)

            # Try to resolve ElizaOS URL
            url_path = "/media/uploads/agents/abc123/test.pdf"
            resolved = upload_tools._resolve_file_path(url_path)

            assert resolved.exists()
            assert resolved == test_file

        finally:
            os.chdir(original_cwd)

    def test_resolve_eliza_media_url_env_var(self, tmp_path, sample_pdf_base64, monkeypatch):
        """Test resolving ElizaOS URL using ELIZA_ROOT environment variable."""
        # Create mock ElizaOS structure
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / "packages/cli/.eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        # Set ELIZA_ROOT environment variable
        monkeypatch.setenv('ELIZA_ROOT', str(eliza_root))

        # Try to resolve ElizaOS URL
        url_path = "/media/uploads/agents/abc123/test.pdf"
        resolved = upload_tools._resolve_file_path(url_path)

        assert resolved.exists()
        assert resolved == test_file

    def test_resolve_eliza_media_url_alternative_structure(self, tmp_path, sample_pdf_base64, monkeypatch):
        """Test resolving ElizaOS URL with alternative directory structure (no packages/cli)."""
        # Create alternative structure: .eliza/data/ directly under root
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / ".eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        monkeypatch.setenv('ELIZA_ROOT', str(eliza_root))

        url_path = "/media/uploads/agents/abc123/test.pdf"
        resolved = upload_tools._resolve_file_path(url_path)

        assert resolved.exists()
        assert resolved == test_file

    def test_resolve_nonexistent_path_returns_original(self):
        """Test that nonexistent paths are returned as-is (will fail later validation)."""
        fake_path = "/nonexistent/path/to/file.pdf"
        resolved = upload_tools._resolve_file_path(fake_path)

        # Should return original path (Path object)
        assert str(resolved) == fake_path
        assert not resolved.exists()

    def test_resolve_relative_path_from_cwd(self, tmp_path, sample_pdf_base64):
        """Test resolving relative path from current working directory."""
        # Create file in temp directory
        test_file = tmp_path / "test.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        import os
        original_cwd = os.getcwd()
        try:
            os.chdir(tmp_path)

            # Relative path should resolve from cwd
            resolved = upload_tools._resolve_file_path("test.pdf")

            assert resolved.exists()
            assert resolved == test_file

        finally:
            os.chdir(original_cwd)

    @pytest.mark.asyncio
    async def test_create_session_with_eliza_url_path(self, tmp_path, test_settings, sample_pdf_base64, monkeypatch):
        """Test creating session with ElizaOS /media/uploads/ URL path."""
        # Create mock ElizaOS structure
        eliza_root = tmp_path / "eliza"
        uploads_dir = eliza_root / "packages/cli/.eliza/data/uploads/agents/abc123"
        uploads_dir.mkdir(parents=True)

        test_file = uploads_dir / "ProjectPlan.pdf"
        test_file.write_bytes(base64.b64decode(sample_pdf_base64))

        # Set ELIZA_ROOT environment variable
        monkeypatch.setenv('ELIZA_ROOT', str(eliza_root))

        # Use ElizaOS URL format
        files = [
            {
                "path": "/media/uploads/agents/abc123/ProjectPlan.pdf"
            }
        ]

        result = await upload_tools.create_session_from_uploads(
            project_name="ElizaOS URL Test",
            files=files
        )

        try:
            assert result["success"] is True
            assert "ProjectPlan.pdf" in result["files_saved"]
            assert result["documents_found"] == 1

        finally:
            await session_tools.delete_session(result["session_id"])