Skip to main content
Glama
test_documents_api.py18.8 kB
import json from uuid import UUID from tests.unit_tests.fixtures import ( get_async_test_client, ) USER_1_HEADERS = { "Authorization": "Bearer user1", } USER_2_HEADERS = { "Authorization": "Bearer user2", } NO_SUCH_USER_HEADERS = { "Authorization": "Bearer no_such_user", } async def test_documents_create_and_list_and_delete_and_search() -> None: """Test creating, listing, deleting, and searching documents.""" async with get_async_test_client() as client: # Create a collection for documents collection_name = "docs_test_col" col_payload = {"name": collection_name, "metadata": {"purpose": "doc-test"}} create_col = await client.post( "/collections", json=col_payload, headers=USER_1_HEADERS ) assert create_col.status_code == 201 collection_data = create_col.json() collection_id = collection_data["uuid"] # Prepare a simple text file file_content = b"Hello world. This is a test document." files = [("files", ("test.txt", file_content, "text/plain"))] # Create documents without metadata resp = await client.post( f"/collections/{collection_id}/documents", files=files, headers=USER_1_HEADERS, ) assert resp.status_code == 200 data = resp.json() assert data["success"] is True # added_chunk_ids should be a non-empty list of UUIDs ids = data["added_chunk_ids"] assert isinstance(ids, list) assert ids for chunk_id in ids: # Validate each is a UUID string UUID(chunk_id) # List documents in collection, default limit 10 list_resp = await client.get( f"/collections/{collection_id}/documents", headers=USER_1_HEADERS ) assert list_resp.status_code == 200 docs = list_resp.json() assert isinstance(docs, list) assert docs # Each doc should have id and text fields assert len(docs) == 1 assert docs[0]["content"] == "Hello world. This is a test document." # Search documents with a valid query search_payload = {"query": "test document", "limit": 5} search_resp = await client.post( f"/collections/{collection_id}/documents/search", json=search_payload, headers=USER_1_HEADERS, ) assert search_resp.status_code == 200 results = search_resp.json() assert isinstance(results, list) # Each result should have id, score, text assert len(results) == 1 assert results[0] == { "id": docs[0]["id"], "score": results[0]["score"], "page_content": "Hello world. This is a test document.", "metadata": { "file_id": docs[0]["metadata"]["file_id"], "source": None, }, } # Delete a document doc_id = docs[0]["id"] del_resp = await client.delete( f"/collections/{collection_id}/documents/{doc_id}", headers=USER_1_HEADERS, ) assert del_resp.status_code == 200 assert del_resp.json() == {"success": True} # Delete non-existent document gracefully del_resp2 = await client.delete( f"/collections/{collection_id}/documents/{doc_id}", headers=USER_1_HEADERS, ) # Should still return success True or 200/204; here assume 200 assert del_resp2.status_code in (200, 204) async def test_documents_create_with_invalid_metadata_json() -> None: """Test creating documents with invalid metadata JSON.""" async with get_async_test_client() as client: # Create a collection col_name = "meta_test_col" collection_response = await client.post( "/collections", json={"name": col_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare file file_content = b"Sample" files = [("files", ("a.txt", file_content, "text/plain"))] # Provide invalid JSON resp = await client.post( f"/collections/{collection_id}/documents", files=files, data={"metadatas_json": "not-a-json"}, headers=USER_1_HEADERS, ) assert resp.status_code == 400 async def test_documents_search_empty_query() -> None: """Test searching documents with an empty query.""" async with get_async_test_client() as client: # Create a collection for search test col_name = "search_test_col" collection_response = await client.post( "/collections", json={"name": col_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Attempt search with empty query resp = await client.post( f"/collections/{collection_id}/documents/search", json={"query": "", "limit": 3}, headers=USER_1_HEADERS, ) assert resp.status_code == 400 assert "Search query cannot be empty" in resp.json()["detail"] async def test_documents_in_nonexistent_collection() -> None: """Test operations on documents in a non-existent collection.""" async with get_async_test_client() as client: # Try listing documents in missing collection no_such_collection = "12345678-1234-5678-1234-567812345678" response = await client.get( f"/collections/{no_such_collection}/documents", headers=USER_1_HEADERS ) assert response.status_code == 404 # Try uploading to a non existent collection file_content = b"X" files = [("files", ("x.txt", file_content, "text/plain"))] upload_resp = await client.post( f"/collections/{no_such_collection}/documents", files=files, headers=USER_1_HEADERS, ) assert upload_resp.status_code == 404 assert "Collection not found" in upload_resp.json()["detail"] # Try deleting from missing collection/document del_resp = await client.delete( f"/collections/{no_such_collection}/documents/abcdef", headers=USER_1_HEADERS, ) assert del_resp.status_code == 404 # Try search in missing collection search_resp = await client.post( f"/collections/{no_such_collection}/documents/search", json={"query": "foo"}, headers=USER_1_HEADERS, ) # Not found or 404 assert search_resp.status_code == 404 async def test_documents_create_with_valid_text_file_and_metadata() -> None: """Test creating documents with a valid text file and metadata.""" async with get_async_test_client() as client: # Create a collection first collection_name = "doc_test_with_metadata" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare a text file with content file_content = b"This is a test document with metadata." files = [("files", ("metadata_test.txt", file_content, "text/plain"))] # Prepare metadata as JSON metadata = [{"source": "test", "author": "user1", "importance": "high"}] metadata_json = json.dumps(metadata) # Create document with metadata response = await client.post( f"/collections/{collection_id}/documents", files=files, data={"metadatas_json": metadata_json}, headers=USER_1_HEADERS, ) assert response.status_code == 200 data = response.json() assert data["success"] is True assert "added_chunk_ids" in data ids = data["added_chunk_ids"] assert isinstance(ids, list) assert len(ids) > 0 # Verify each ID is a valid UUID for chunk_id in ids: UUID(chunk_id) # This will raise an exception if invalid # Verify document was added by listing documents list_response = await client.get( f"/collections/{collection_id}/documents", headers=USER_1_HEADERS, ) assert list_response.status_code == 200 documents = list_response.json() assert len(documents) == 1 # Verify metadata was attached doc = documents[0] assert "metadata" in doc assert "file_id" in doc["metadata"] # The file_id will be a new UUID, so we can't check the exact value async def test_documents_create_with_valid_text_file_without_metadata() -> None: """Test creating documents with a valid text file without metadata.""" async with get_async_test_client() as client: # Create a collection first collection_name = "doc_test_without_metadata" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare a text file with content file_content = b"This is a test document without metadata." files = [("files", ("no_metadata_test.txt", file_content, "text/plain"))] # Create document without metadata response = await client.post( f"/collections/{collection_id}/documents", files=files, headers=USER_1_HEADERS, ) assert response.status_code == 200 data = response.json() assert data["success"] is True assert "added_chunk_ids" in data ids = data["added_chunk_ids"] assert isinstance(ids, list) assert len(ids) > 0 # Verify document was added by listing documents list_response = await client.get( f"/collections/{collection_id}/documents", headers=USER_1_HEADERS, ) assert list_response.status_code == 200 documents = list_response.json() assert len(documents) > 0 # Verify content is in the document assert documents[0]["content"] == "This is a test document without metadata." async def test_documents_create_with_empty_file() -> None: """Test creating documents with an empty file.""" async with get_async_test_client() as client: # Create a collection first collection_name = "doc_test_empty_file" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare an empty file file_content = b"" files = [("files", ("empty.txt", file_content, "text/plain"))] # Create document with empty file response = await client.post( f"/collections/{collection_id}/documents", files=files, headers=USER_1_HEADERS, ) # Empty files should be rejected with 400 Bad Request assert response.status_code == 400 data = response.json() assert "Failed to process any documents" in data["detail"] async def test_documents_create_with_invalid_metadata_format() -> None: """Test creating documents with invalid metadata format.""" async with get_async_test_client() as client: # Create a collection first collection_name = "doc_test_invalid_metadata" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare a text file with content file_content = b"This is a test document with invalid metadata." files = [("files", ("invalid_metadata.txt", file_content, "text/plain"))] # Invalid JSON format for metadata invalid_metadata = "not a json" # Create document with invalid metadata response = await client.post( f"/collections/{collection_id}/documents", files=files, data={"metadatas_json": invalid_metadata}, headers=USER_1_HEADERS, ) assert response.status_code == 400 # Test with metadata that's not a list invalid_metadata_not_list = json.dumps({"key": "value"}) response = await client.post( f"/collections/{collection_id}/documents", files=files, data={"metadatas_json": invalid_metadata_not_list}, headers=USER_1_HEADERS, ) assert response.status_code == 400 async def test_documents_create_with_non_existent_collection() -> None: """Test creating documents in a non-existent collection.""" async with get_async_test_client() as client: # Prepare a text file with content file_content = b"This is a test document for a non-existent collection." files = [("files", ("nonexistent.txt", file_content, "text/plain"))] # Try to create document in a non-existent collection uuid = "12345678-1234-5678-1234-567812345678" response = await client.post( f"/collections/{uuid}/documents", files=files, headers=USER_1_HEADERS, ) assert response.status_code == 404 data = response.json() assert "Collection not found" in data["detail"] async def test_documents_create_with_multiple_files(): """Test creating documents with multiple files.""" async with get_async_test_client() as client: # Create a collection first collection_name = "doc_test_multiple_files" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare multiple files files = [ ("files", ("file1.txt", b"Content of file 1", "text/plain")), ("files", ("file2.txt", b"Content of file 2", "text/plain")), ] # Create document with multiple files response = await client.post( f"/collections/{collection_id}/documents", files=files, headers=USER_1_HEADERS, ) assert response.status_code == 200 data = response.json() assert data["success"] is True assert "added_chunk_ids" in data ids = data["added_chunk_ids"] assert isinstance(ids, list) # We should have at least 2 chunks (one for each file) assert len(ids) >= 2 # Verify documents were added by listing documents list_response = await client.get( f"/collections/{collection_id}/documents", headers=USER_1_HEADERS, ) assert list_response.status_code == 200 documents = list_response.json() # The number of documents returned might not match the number of files # exactly, as documents are chunked and only one chunk per file_id is returned assert len(documents) > 0 async def test_documents_create_with_mismatched_metadata(): """Test creating documents with metadata count not matching files count.""" async with get_async_test_client() as client: # Create a collection first collection_name = "doc_test_mismatched_metadata" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare multiple files files = [ ("files", ("file1.txt", b"Content of file 1", "text/plain")), ("files", ("file2.txt", b"Content of file 2", "text/plain")), ] # Metadata with only one entry for two files metadata = [{"source": "test"}] metadata_json = json.dumps(metadata) # Create document with mismatched metadata response = await client.post( f"/collections/{collection_id}/documents", files=files, data={"metadatas_json": metadata_json}, headers=USER_1_HEADERS, ) assert response.status_code == 400 data = response.json() assert "does not match number of files" in data["detail"] async def test_documents_create_ownership_validation(): """Test creating documents with a different user than the collection owner.""" async with get_async_test_client() as client: # Create a collection as USER_1 collection_name = "doc_test_ownership" collection_response = await client.post( "/collections", json={"name": collection_name, "metadata": {}}, headers=USER_1_HEADERS, ) assert collection_response.status_code == 201 collection_data = collection_response.json() collection_id = collection_data["uuid"] # Prepare a file file_content = b"This is a test document for ownership validation." files = [("files", ("ownership.txt", file_content, "text/plain"))] # Try to create document as USER_2 response = await client.post( f"/collections/{collection_id}/documents", files=files, headers=USER_2_HEADERS, ) # Should return 404 as USER_2 can't see USER_1's collection assert response.status_code == 404 data = response.json() assert "Collection not found" in data["detail"]

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/HyunjunJeon/vibecoding-lg-mcp-a2a'

If you have feedback or need assistance with the MCP directory API, please join our Discord server