Skip to main content
Glama

CEDAR MCP Server

test_processing.py45.8 kB
#!/usr/bin/env python3 import pytest from typing import Dict, Any from src.cedar_mcp.processing import ( _determine_datatype, _extract_controlled_term_values, _extract_default_value, _transform_field, clean_template_response, clean_template_instance_response, ) from src.cedar_mcp.model import ( ControlledTermValue, ControlledTermDefault, FieldDefinition, ) @pytest.mark.unit class TestDetermineDatatype: """Tests for _determine_datatype function.""" def test_string_datatype_default(self): """Test that string is returned as default datatype.""" field_data = {"properties": {"@value": {}}} result = _determine_datatype(field_data) assert result == "string" def test_integer_datatype(self): """Test integer datatype detection.""" field_data = {"properties": {"@value": {"type": "integer"}}} result = _determine_datatype(field_data) assert result == "integer" def test_decimal_datatype(self): """Test decimal datatype detection.""" field_data = {"properties": {"@value": {"type": "number"}}} result = _determine_datatype(field_data) assert result == "decimal" def test_boolean_datatype(self): """Test boolean datatype detection.""" field_data = {"properties": {"@value": {"type": "boolean"}}} result = _determine_datatype(field_data) assert result == "boolean" def test_list_type_integer(self): """Test integer datatype from list type.""" field_data = {"properties": {"@value": {"type": ["string", "integer"]}}} result = _determine_datatype(field_data) assert result == "integer" def test_list_type_number(self): """Test decimal datatype from list type.""" field_data = {"properties": {"@value": {"type": ["string", "number"]}}} result = _determine_datatype(field_data) assert result == "decimal" def test_empty_field_data(self): """Test handling of empty field data.""" result = _determine_datatype({}) assert result == "string" @pytest.mark.integration class TestExtractControlledTermValues: """Tests for _extract_controlled_term_values function.""" def test_extract_literal_values( self, bioportal_api_key: str, sample_field_data_with_literals: Dict[str, Any] ): """Test extraction of literal controlled term values.""" result = _extract_controlled_term_values( sample_field_data_with_literals, bioportal_api_key ) assert result is not None assert len(result) == 3 assert all(isinstance(value, ControlledTermValue) for value in result) assert all(value.iri is None for value in result) # Literals have no IRI labels = [value.label for value in result] assert "Option 1" in labels assert "Option 2" in labels assert "Option 3" in labels def test_extract_class_values( self, bioportal_api_key: str, sample_field_data_with_classes: Dict[str, Any] ): """Test extraction of class controlled term values.""" result = _extract_controlled_term_values( sample_field_data_with_classes, bioportal_api_key ) assert result is not None assert len(result) == 1 assert result[0].label == "Sample Class" assert result[0].iri == "http://example.org/sample-class" def test_extract_branch_values( self, bioportal_api_key: str, sample_field_data_with_branches: Dict[str, Any] ): """Test extraction of branch controlled term values using real BioPortal API.""" result = _extract_controlled_term_values( sample_field_data_with_branches, bioportal_api_key ) # Result depends on actual BioPortal data if result is not None: assert all(isinstance(value, ControlledTermValue) for value in result) assert all( value.iri is not None for value in result ) # Branch children have IRIs assert all(value.label.strip() != "" for value in result) def test_no_constraints_returns_none(self, bioportal_api_key: str): """Test that fields without constraints return None.""" field_data = {"schema:name": "Simple Field", "_valueConstraints": {}} result = _extract_controlled_term_values(field_data, bioportal_api_key) assert result is None def test_invalid_api_key_handles_gracefully( self, sample_field_data_with_branches: Dict[str, Any] ): """Test that invalid API key is handled gracefully for branches.""" result = _extract_controlled_term_values( sample_field_data_with_branches, "invalid-key" ) # Should not crash, may return empty or None assert result is None or isinstance(result, list) def test_mixed_constraints(self, bioportal_api_key: str): """Test field with both classes and branches.""" field_data = { "_valueConstraints": { "classes": [ {"prefLabel": "Test Class", "@id": "http://example.org/test"} ], "branches": [ { "name": "Test Branch", "uri": "http://purl.bioontology.org/ontology/HRAVS/HRAVS_0000225", "acronym": "HRAVS", } ], } } result = _extract_controlled_term_values(field_data, bioportal_api_key) if result is not None: # Should contain class value class_labels = [ v.label for v in result if v.iri == "http://example.org/test" ] assert len(class_labels) > 0 assert "Test Class" in class_labels @pytest.mark.unit class TestExtractDefaultValue: """Tests for _extract_default_value function.""" def test_extract_controlled_term_default(self): """Test extraction of controlled term default value.""" field_data = { "_valueConstraints": { "defaultValue": { "rdfs:label": "Default Term", "termUri": "http://example.org/default", } } } result = _extract_default_value(field_data) assert isinstance(result, ControlledTermDefault) assert result.label == "Default Term" assert result.iri == "http://example.org/default" def test_extract_simple_default(self): """Test extraction of simple default values.""" test_cases = [ ({"_valueConstraints": {"defaultValue": "test string"}}, "test string"), ({"_valueConstraints": {"defaultValue": 42}}, 42), ({"_valueConstraints": {"defaultValue": 3.14}}, 3.14), ({"_valueConstraints": {"defaultValue": True}}, True), ] for field_data, expected in test_cases: result = _extract_default_value(field_data) assert result == expected def test_extract_branch_default(self): """Test extraction of branch as default value.""" field_data = { "_valueConstraints": { "branches": [ {"name": "Default Branch", "uri": "http://example.org/branch"} ] } } result = _extract_default_value(field_data) assert isinstance(result, ControlledTermDefault) assert result.label == "Default Branch" assert result.iri == "http://example.org/branch" def test_no_default_returns_none(self): """Test that fields without defaults return None.""" field_data = {"_valueConstraints": {}} result = _extract_default_value(field_data) assert result is None @pytest.mark.integration class TestTransformField: """Tests for _transform_field function.""" def test_transform_simple_field(self, bioportal_api_key: str): """Test transformation of a simple field.""" field_data = { "schema:name": "Test Field", "schema:description": "A test field", "skos:prefLabel": "Test Field Label", "_valueConstraints": {"requiredValue": True}, "@type": "https://schema.metadatacenter.org/core/TemplateField", } result = _transform_field("test_field", field_data, bioportal_api_key) assert isinstance(result, FieldDefinition) assert result.name == "Test Field" assert result.description == "A test field" assert result.prefLabel == "Test Field Label" assert result.datatype == "string" assert result.configuration.required is True assert result.values is None assert result.default is None def test_transform_field_with_literals( self, bioportal_api_key: str, sample_field_data_with_literals: Dict[str, Any] ): """Test transformation of field with literal constraints.""" result = _transform_field( "literal_field", sample_field_data_with_literals, bioportal_api_key ) assert isinstance(result, FieldDefinition) assert result.values is not None assert len(result.values) == 3 assert all(v.iri is None for v in result.values) def test_transform_field_with_branches( self, bioportal_api_key: str, sample_field_data_with_branches: Dict[str, Any] ): """Test transformation of field with branch constraints.""" result = _transform_field( "branch_field", sample_field_data_with_branches, bioportal_api_key ) assert isinstance(result, FieldDefinition) # Values may or may not be present depending on BioPortal response if result.values is not None: assert all(isinstance(v, ControlledTermValue) for v in result.values) def test_transform_field_with_regex(self, bioportal_api_key: str): """Test transformation of field with regex constraint.""" field_data = { "schema:name": "Email Field", "schema:description": "Email address field", "skos:prefLabel": "Email", "_valueConstraints": { "requiredValue": True, "regex": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", }, "@type": "https://schema.metadatacenter.org/core/TemplateField", } result = _transform_field("email_field", field_data, bioportal_api_key) assert result.regex == r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" @pytest.mark.integration class TestCleanTemplateResponse: """Tests for clean_template_response function.""" def test_clean_minimal_template( self, bioportal_api_key: str, sample_minimal_template_data: Dict[str, Any] ): """Test cleaning of minimal template data.""" result = clean_template_response( sample_minimal_template_data, bioportal_api_key ) assert isinstance(result, dict) assert result["type"] == "template" assert result["name"] == "Test Template" assert "children" in result assert len(result["children"]) == 2 # Verify field order is preserved field_names = [field["name"] for field in result["children"]] assert field_names == ["Field 1", "Field 2"] def test_clean_template_with_schema_name(self, bioportal_api_key: str): """Test template cleaning with schema:name preference.""" template_data = { "schema:name": "Proper Template Name", "title": "Template Title Schema", "_ui": {"order": []}, "properties": {}, } result = clean_template_response(template_data, bioportal_api_key) assert result["name"] == "Proper Template Name" def test_clean_template_fallback_title(self, bioportal_api_key: str): """Test template cleaning with title fallback.""" template_data = { "title": "Fallback template schema", "_ui": {"order": []}, "properties": {}, } result = clean_template_response(template_data, bioportal_api_key) assert result["name"] == "Fallback" def test_clean_template_empty_name(self, bioportal_api_key: str): """Test template cleaning with empty names.""" template_data = { "schema:name": "", "title": "", "_ui": {"order": []}, "properties": {}, } result = clean_template_response(template_data, bioportal_api_key) assert result["name"] == "Unnamed Template" @pytest.mark.integration class TestTransformElement: """Tests for _transform_element function.""" def test_transform_simple_element( self, bioportal_api_key: str, sample_nested_template_element: Dict[str, Any] ): """Test transformation of a simple template element with nested fields.""" from src.cedar_mcp.processing import _transform_element from src.cedar_mcp.model import ElementDefinition, FieldDefinition result = _transform_element( "resource_type", sample_nested_template_element, bioportal_api_key ) assert isinstance(result, ElementDefinition) assert result.name == "Resource Type" assert ( result.description == "Information about the type of the resource being described with metadata." ) assert result.prefLabel == "Resource Type" assert result.datatype == "element" assert result.is_array is False assert len(result.children) == 2 # Check that children are fields child_names = [child.name for child in result.children] assert "Resource Type Category" in child_names assert "Resource Type Detail" in child_names assert all(isinstance(child, FieldDefinition) for child in result.children) def test_transform_array_element( self, bioportal_api_key: str, sample_array_template_element: Dict[str, Any] ): """Test transformation of an array template element.""" from src.cedar_mcp.processing import _transform_element from src.cedar_mcp.model import ElementDefinition, FieldDefinition result = _transform_element( "data_file_title", sample_array_template_element, bioportal_api_key ) assert isinstance(result, ElementDefinition) assert result.name == "Data File Title" assert result.datatype == "element" assert result.is_array is True assert len(result.children) == 2 # Check that children are fields from the items structure child_names = [child.name for child in result.children] assert "Data File Title" in child_names assert "Title Language" in child_names assert all(isinstance(child, FieldDefinition) for child in result.children) def test_process_element_children(self, bioportal_api_key: str): """Test processing of element children with mixed fields and elements.""" from src.cedar_mcp.processing import _process_element_children from src.cedar_mcp.model import ElementDefinition, FieldDefinition element_data = { "_ui": {"order": ["simple_field", "nested_element"]}, "properties": { "simple_field": { "@type": "https://schema.metadatacenter.org/core/TemplateField", "schema:name": "Simple Field", "schema:description": "A simple field", "skos:prefLabel": "Simple Field", "_valueConstraints": {"requiredValue": False}, }, "nested_element": { "@type": "https://schema.metadatacenter.org/core/TemplateElement", "schema:name": "Nested Element", "schema:description": "A nested element", "skos:prefLabel": "Nested Element", "_valueConstraints": {"requiredValue": False}, "_ui": {"order": []}, "properties": {}, }, }, } result = _process_element_children(element_data, bioportal_api_key) assert len(result) == 2 assert isinstance(result[0], FieldDefinition) assert isinstance(result[1], ElementDefinition) assert result[0].name == "Simple Field" assert result[1].name == "Nested Element" @pytest.mark.integration class TestCleanTemplateResponseNested: """Tests for clean_template_response function with nested structures.""" def test_clean_template_with_elements( self, bioportal_api_key: str, sample_nested_template_element: Dict[str, Any] ): """Test cleaning template with template elements.""" template_data = { "schema:name": "Template With Elements", "_ui": {"order": ["resource_type"]}, "properties": {"resource_type": sample_nested_template_element}, } result = clean_template_response(template_data, bioportal_api_key) assert result["type"] == "template" assert result["name"] == "Template With Elements" assert len(result["children"]) == 1 element = result["children"][0] assert element["name"] == "Resource Type" assert element["datatype"] == "element" assert element["is_array"] is False assert len(element["children"]) == 2 # Verify nested fields child_names = [child["name"] for child in element["children"]] assert "Resource Type Category" in child_names assert "Resource Type Detail" in child_names def test_clean_template_with_array_elements( self, bioportal_api_key: str, sample_array_template_element: Dict[str, Any] ): """Test cleaning template with array elements.""" template_data = { "schema:name": "Template With Arrays", "_ui": {"order": ["data_file_title"]}, "properties": {"data_file_title": sample_array_template_element}, } result = clean_template_response(template_data, bioportal_api_key) assert result["type"] == "template" assert result["name"] == "Template With Arrays" assert len(result["children"]) == 1 element = result["children"][0] assert element["name"] == "Data File Title" assert element["datatype"] == "element" assert element["is_array"] is True assert len(element["children"]) == 2 # Verify nested fields from array items child_names = [child["name"] for child in element["children"]] assert "Data File Title" in child_names assert "Title Language" in child_names def test_clean_complex_nested_template( self, bioportal_api_key: str, sample_complex_nested_template: Dict[str, Any] ): """Test cleaning of complex template with multiple nesting levels.""" result = clean_template_response( sample_complex_nested_template, bioportal_api_key ) assert result["type"] == "template" assert result["name"] == "Complex Nested Template" assert len(result["children"]) == 4 # Check structure: Simple Field, Resource Type (element), Data File Title (array), Data File Spatial Coverage (nested array) children_by_name = {child["name"]: child for child in result["children"]} # Simple field simple_field = children_by_name["Simple Field"] assert simple_field["datatype"] == "string" assert "children" not in simple_field # Template element resource_type = children_by_name["Resource Type"] assert resource_type["datatype"] == "element" assert resource_type["is_array"] is False assert len(resource_type["children"]) == 2 # Array element title_array = children_by_name["Data File Title"] assert title_array["datatype"] == "element" assert title_array["is_array"] is True assert len(title_array["children"]) == 2 # Nested array element spatial_coverage = children_by_name["Data File Spatial Coverage"] assert spatial_coverage["datatype"] == "element" assert spatial_coverage["is_array"] is True assert len(spatial_coverage["children"]) == 3 # Check that nested coverage is also an array element nested_children_by_name = { child["name"]: child for child in spatial_coverage["children"] } nested_coverage = nested_children_by_name["Nested Coverage"] assert nested_coverage["datatype"] == "element" assert nested_coverage["is_array"] is True assert len(nested_coverage["children"]) == 2 def test_mixed_fields_and_elements(self, bioportal_api_key: str): """Test template with both simple fields and nested elements.""" template_data = { "schema:name": "Mixed Template", "_ui": {"order": ["simple_field", "complex_element"]}, "properties": { "simple_field": { "@type": "https://schema.metadatacenter.org/core/TemplateField", "schema:name": "Simple Field", "schema:description": "A simple field", "skos:prefLabel": "Simple Field", "_valueConstraints": {"requiredValue": False}, }, "complex_element": { "@type": "https://schema.metadatacenter.org/core/TemplateElement", "schema:name": "Complex Element", "schema:description": "A complex element", "skos:prefLabel": "Complex Element", "_valueConstraints": {"requiredValue": False}, "_ui": {"order": ["nested_field"]}, "properties": { "nested_field": { "@type": "https://schema.metadatacenter.org/core/TemplateField", "schema:name": "Nested Field", "schema:description": "A nested field", "skos:prefLabel": "Nested Field", "_valueConstraints": {"requiredValue": True}, } }, }, }, } result = clean_template_response(template_data, bioportal_api_key) assert len(result["children"]) == 2 # First child should be a simple field simple_child = result["children"][0] assert simple_child["name"] == "Simple Field" assert simple_child["datatype"] == "string" assert "children" not in simple_child # Second child should be an element with nested field complex_child = result["children"][1] assert complex_child["name"] == "Complex Element" assert complex_child["datatype"] == "element" assert complex_child["is_array"] is False assert len(complex_child["children"]) == 1 assert complex_child["children"][0]["name"] == "Nested Field" assert complex_child["children"][0]["configuration"]["required"] is True @pytest.mark.unit class TestCleanTemplateResponseArrayFields: """Tests for clean_template_response function with array fields (arrays of TemplateFields).""" def test_clean_template_with_array_field( self, bioportal_api_key: str, sample_template_with_array_field: Dict[str, Any] ): """Test cleaning template with array field (array of TemplateFields).""" result = clean_template_response( sample_template_with_array_field, bioportal_api_key ) assert result["type"] == "template" assert result["name"] == "Template with Array Field" assert len(result["children"]) == 2 # Check structure children_by_name = {child["name"]: child for child in result["children"]} # Simple field should not be array simple_field = children_by_name["Simple Field"] assert simple_field["datatype"] == "string" assert simple_field["is_array"] is False assert "children" not in simple_field # Array field should be marked as array array_field = children_by_name["Notes"] assert array_field["name"] == "Notes" assert array_field["datatype"] == "string" assert array_field["is_array"] is True assert "children" not in array_field assert ( array_field["description"] == "Additional notes or comments about the resource." ) def test_array_field_properties( self, bioportal_api_key: str, sample_array_template_field: Dict[str, Any] ): """Test that array field properties are correctly extracted from items structure.""" template_data = { "schema:name": "Array Field Test", "_ui": {"order": ["notes_array"]}, "properties": {"notes_array": sample_array_template_field}, } result = clean_template_response(template_data, bioportal_api_key) assert len(result["children"]) == 1 array_field = result["children"][0] # Verify properties extracted from items structure assert array_field["name"] == "Notes" assert ( array_field["description"] == "Additional notes or comments about the resource." ) assert array_field["prefLabel"] == "Notes" assert array_field["datatype"] == "string" assert array_field["is_array"] is True assert array_field["configuration"]["required"] is False def test_mixed_array_fields_and_elements(self, bioportal_api_key: str): """Test template with both array fields and array elements.""" template_data = { "schema:name": "Mixed Arrays Template", "_ui": {"order": ["notes_array", "elements_array"]}, "properties": { "notes_array": { "type": "array", "minItems": 1, "items": { "@type": "https://schema.metadatacenter.org/core/TemplateField", "schema:name": "Notes", "schema:description": "Array of note fields", "skos:prefLabel": "Notes", "_valueConstraints": {"requiredValue": False}, }, }, "elements_array": { "type": "array", "minItems": 1, "items": { "@type": "https://schema.metadatacenter.org/core/TemplateElement", "schema:name": "Complex Item", "schema:description": "Array of complex elements", "skos:prefLabel": "Complex Item", "_ui": {"order": ["inner_field"]}, "properties": { "inner_field": { "@type": "https://schema.metadatacenter.org/core/TemplateField", "schema:name": "Inner Field", "schema:description": "Field inside element", "skos:prefLabel": "Inner Field", "_valueConstraints": {"requiredValue": True}, } }, }, }, }, } result = clean_template_response(template_data, bioportal_api_key) assert len(result["children"]) == 2 children_by_name = {child["name"]: child for child in result["children"]} # Array field should be a simple array field notes_array = children_by_name["Notes"] assert notes_array["datatype"] == "string" assert notes_array["is_array"] is True assert "children" not in notes_array # Array element should be an array element with children elements_array = children_by_name["Complex Item"] assert elements_array["datatype"] == "element" assert elements_array["is_array"] is True assert len(elements_array["children"]) == 1 assert elements_array["children"][0]["name"] == "Inner Field" @pytest.mark.unit class TestCleanTemplateInstanceResponse: """Tests for clean_template_instance_response function - core transformations.""" def test_metadata_removal_and_transformations(self): """Test metadata removal and all core transformations.""" sample_instance = { "@context": {"schema": "http://schema.org/"}, "@id": "https://repo.metadatacenter.org/template-instances/test-id", "schema:isBasedOn": "https://repo.metadatacenter.org/templates/test-template", "schema:name": "Test Instance", "schema:description": "A test instance for unit testing", "pav:createdOn": "2021-11-18T10:40:02-08:00", "pav:createdBy": "https://metadatacenter.org/users/test-user", "pav:derivedFrom": "https://repo.metadatacenter.org/template-instances/parent-instance", "pav:lastUpdatedOn": "2021-11-18T11:40:02-08:00", "oslc:modifiedBy": "https://metadatacenter.org/users/test-user", "cell_type": { "@id": "http://purl.obolibrary.org/obo/CL_1000412", "rdfs:label": "endothelial cell", }, "is_ftu": {"@value": "No"}, "doi": [ {"@value": "doi:10.1038/s41467-019-10861-2"}, {"@value": "doi:10.1038/s41586-020-2941-1"}, ], } cleaned = clean_template_instance_response(sample_instance) # Verify metadata fields removed metadata_fields = { "@context", "schema:isBasedOn", "schema:name", "schema:description", "pav:createdOn", "pav:createdBy", "pav:derivedFrom", "oslc:modifiedBy", "@id", } for field in metadata_fields: assert field not in cleaned # Verify fields that should be preserved (not metadata) assert ( "pav:lastUpdatedOn" in cleaned ) # This should be preserved as it's not in metadata_fields # Verify @id → iri transformation assert ( cleaned["cell_type"]["iri"] == "http://purl.obolibrary.org/obo/CL_1000412" ) assert "@id" not in cleaned["cell_type"] # Verify rdfs:label → label transformation assert cleaned["cell_type"]["label"] == "endothelial cell" assert "rdfs:label" not in cleaned["cell_type"] # Verify @value flattening assert cleaned["is_ftu"] == "No" assert cleaned["doi"] == [ "doi:10.1038/s41467-019-10861-2", "doi:10.1038/s41586-020-2941-1", ] def test_nested_context_removal(self): """Test that @context fields are removed from nested objects and arrays.""" sample_instance = { "Project Title": [ { "@context": { "title": "http://purl.org/dc/elements/1.1/title", "language": "http://def.isotc211.org/iso19115/2003/IdentificationInformation#MD_DataIdentification.language", }, "title": {"@value": "Test Project Title"}, "language": { "@id": "https://www.omg.org/spec/LCC/Languages/LaISO639-1-LanguageCodes/en", "rdfs:label": "en", }, } ], "Principal Investigator": { "@context": { "ORCID": "https://schema.metadatacenter.org/properties/ee24c19a-1bb5-4693-8775-52ab7716108c" }, "ORCID": {"@value": "https://orcid.org/0000-0003-1791-3626"}, }, } cleaned = clean_template_instance_response(sample_instance) # Verify nested @context fields are removed assert "@context" not in cleaned["Project Title"][0] assert "@context" not in cleaned["Principal Investigator"] # Verify other data is preserved and transformed correctly assert cleaned["Project Title"][0]["title"] == "Test Project Title" assert ( cleaned["Project Title"][0]["language"]["iri"] == "https://www.omg.org/spec/LCC/Languages/LaISO639-1-LanguageCodes/en" ) assert cleaned["Project Title"][0]["language"]["label"] == "en" assert ( cleaned["Principal Investigator"]["ORCID"] == "https://orcid.org/0000-0003-1791-3626" ) def test_template_element_instance_id_removal(self): """Test that @id fields containing template-element-instances are removed.""" sample_instance = { "Project Title": [ { "title": {"@value": "Test Project"}, "@id": "https://repo.metadatacenter.org/template-element-instances/8f727c0b-4033-49b7-92da-de12a7141550", } ], "Principal Investigator": { "ORCID": {"@value": "https://orcid.org/0000-0003-1791-3626"}, "@id": "https://repo.metadatacenter.org/template-element-instances/bcc6c4da-802a-4026-9718-8d439267febd", }, "Data Steward": [ { "Focus": [{"@value": "research oriented"}], "@id": "https://repo.metadatacenter.org/template-element-instances/75b76f22-5928-4ec4-a164-0b313afb2f4e", }, { "Focus": [{"@value": "infrastructure oriented"}] # Note: This one has no @id field, which should work fine }, ], } cleaned = clean_template_instance_response(sample_instance) # Verify template-element-instance @id fields are removed assert "@id" not in cleaned["Project Title"][0] assert "iri" not in cleaned["Project Title"][0] assert "@id" not in cleaned["Principal Investigator"] assert "iri" not in cleaned["Principal Investigator"] assert "@id" not in cleaned["Data Steward"][0] assert "iri" not in cleaned["Data Steward"][0] # Verify other data is preserved and transformed correctly assert cleaned["Project Title"][0]["title"] == "Test Project" assert ( cleaned["Principal Investigator"]["ORCID"] == "https://orcid.org/0000-0003-1791-3626" ) assert cleaned["Data Steward"][0]["Focus"] == ["research oriented"] assert cleaned["Data Steward"][1]["Focus"] == ["infrastructure oriented"] def test_non_template_element_instance_id_preservation(self): """Test that @id fields NOT containing template-element-instances are preserved as iri.""" sample_instance = { "Lead Institution": { "@id": "http://www.fair-data-collective.com/zonmw/projectadmin/MaastrichtUniversity", "rdfs:label": "Maastricht University", }, "Province": [ { "@id": "http://www.fair-data-collective.com/zonmw/projectadmin/Limburg", "rdfs:label": "Limburg", } ], "Country": [ { "@id": "http://purl.bioontology.org/ontology/MESH/D009426", "rdfs:label": "Netherlands", } ], # Root level template-instances (not template-element-instances) should also be removed by root cleanup "nested_data": { "sub_item": { "@id": "http://example.org/some-other-resource", "rdfs:label": "Some Resource", } }, } cleaned = clean_template_instance_response(sample_instance) # Verify non-template-element-instance @id fields are transformed to iri assert ( cleaned["Lead Institution"]["iri"] == "http://www.fair-data-collective.com/zonmw/projectadmin/MaastrichtUniversity" ) assert cleaned["Lead Institution"]["label"] == "Maastricht University" assert "@id" not in cleaned["Lead Institution"] assert "rdfs:label" not in cleaned["Lead Institution"] assert ( cleaned["Province"][0]["iri"] == "http://www.fair-data-collective.com/zonmw/projectadmin/Limburg" ) assert cleaned["Province"][0]["label"] == "Limburg" assert "@id" not in cleaned["Province"][0] assert "rdfs:label" not in cleaned["Province"][0] assert ( cleaned["Country"][0]["iri"] == "http://purl.bioontology.org/ontology/MESH/D009426" ) assert cleaned["Country"][0]["label"] == "Netherlands" assert "@id" not in cleaned["Country"][0] assert "rdfs:label" not in cleaned["Country"][0] # Verify nested non-template-element-instance @id is also transformed assert ( cleaned["nested_data"]["sub_item"]["iri"] == "http://example.org/some-other-resource" ) assert cleaned["nested_data"]["sub_item"]["label"] == "Some Resource" assert "@id" not in cleaned["nested_data"]["sub_item"] assert "rdfs:label" not in cleaned["nested_data"]["sub_item"] def test_complex_nested_structure_with_context_and_ids(self): """Test a complex structure combining both @context and @id removal scenarios.""" sample_instance = { "Funder Information": [ { "@context": { "funderName": "https://schema.metadatacenter.org/properties/0eb27432-1ede-44a1-87c2-bed2081cab5c", "Funder GRID reference": "https://schema.metadatacenter.org/properties/41a72a33-f1c7-4c0b-8b26-129bc1793ea8", }, "funderName": {"@value": "ZonMw"}, "Funder GRID reference": {"@value": "grid.438427.e"}, "@id": "https://repo.metadatacenter.org/template-element-instances/c6eeacfd-8d80-4742-8ba2-d707802dd6a4", } ], "Project Partner Institution": [ { "@id": "http://www.fair-data-collective.com/zonmw/projectadmin/MaastrichtUniversityMedicalCentre" } ], } cleaned = clean_template_instance_response(sample_instance) # Verify @context is removed from nested object assert "@context" not in cleaned["Funder Information"][0] # Verify template-element-instance @id is removed assert "@id" not in cleaned["Funder Information"][0] assert "iri" not in cleaned["Funder Information"][0] # Verify non-template-element-instance @id is transformed to iri assert ( cleaned["Project Partner Institution"][0]["iri"] == "http://www.fair-data-collective.com/zonmw/projectadmin/MaastrichtUniversityMedicalCentre" ) assert "@id" not in cleaned["Project Partner Institution"][0] # Verify @value flattening still works assert cleaned["Funder Information"][0]["funderName"] == "ZonMw" assert ( cleaned["Funder Information"][0]["Funder GRID reference"] == "grid.438427.e" ) def test_value_type_conversion(self): """Test that @value objects with @type are properly converted to appropriate types.""" sample_instance = { # String values (should remain as string) "End date": {"@value": "2022-08-31", "@type": "xsd:date"}, "Project Title": {"@value": "Test Project Title", "@type": "xsd:string"}, "Description": {"@value": "This is a description", "@type": "xsd:string"}, # Numeric values (should be converted to numbers) "Project duration": {"@value": "24", "@type": "xsd:decimal"}, "Budget amount": {"@value": "100000.50", "@type": "xsd:float"}, "Participant count": {"@value": "42", "@type": "xsd:integer"}, "Priority level": {"@value": "5", "@type": "xsd:int"}, "Max participants": {"@value": "1000", "@type": "xsd:long"}, "Weight": {"@value": "98.76", "@type": "xsd:double"}, # Boolean values "Is active": {"@value": "true", "@type": "xsd:boolean"}, "Is completed": {"@value": "false", "@type": "xsd:boolean"}, "Has funding": {"@value": "1", "@type": "xsd:boolean"}, "Is public": {"@value": "0", "@type": "xsd:boolean"}, # Single @value (no @type) should remain as-is "Simple field": {"@value": "simple value"}, # Array with mixed types "Mixed values": [ {"@value": "123", "@type": "xsd:integer"}, {"@value": "test string", "@type": "xsd:string"}, {"@value": "true", "@type": "xsd:boolean"}, ], } cleaned = clean_template_instance_response(sample_instance) # Verify string types remain as strings assert cleaned["End date"] == "2022-08-31" assert isinstance(cleaned["End date"], str) assert cleaned["Project Title"] == "Test Project Title" assert isinstance(cleaned["Project Title"], str) assert cleaned["Description"] == "This is a description" assert isinstance(cleaned["Description"], str) # Verify numeric type conversions assert cleaned["Project duration"] == 24.0 assert isinstance(cleaned["Project duration"], float) assert cleaned["Budget amount"] == 100000.50 assert isinstance(cleaned["Budget amount"], float) assert cleaned["Participant count"] == 42 assert isinstance(cleaned["Participant count"], int) assert cleaned["Priority level"] == 5 assert isinstance(cleaned["Priority level"], int) assert cleaned["Max participants"] == 1000 assert isinstance(cleaned["Max participants"], int) assert cleaned["Weight"] == 98.76 assert isinstance(cleaned["Weight"], float) # Verify boolean type conversions assert cleaned["Is active"] is True assert isinstance(cleaned["Is active"], bool) assert cleaned["Is completed"] is False assert isinstance(cleaned["Is completed"], bool) assert cleaned["Has funding"] is True assert isinstance(cleaned["Has funding"], bool) assert cleaned["Is public"] is False assert isinstance(cleaned["Is public"], bool) # Verify single @value without @type assert cleaned["Simple field"] == "simple value" assert isinstance(cleaned["Simple field"], str) # Verify array with mixed types assert cleaned["Mixed values"] == [123, "test string", True] assert isinstance(cleaned["Mixed values"][0], int) assert isinstance(cleaned["Mixed values"][1], str) assert isinstance(cleaned["Mixed values"][2], bool) def test_value_type_conversion_edge_cases(self): """Test edge cases for @value and @type conversion.""" sample_instance = { # Invalid numeric values should fall back to original string "Invalid decimal": {"@value": "not-a-number", "@type": "xsd:decimal"}, "Invalid integer": {"@value": "abc123", "@type": "xsd:integer"}, # Boolean edge cases "Boolean uppercase": {"@value": "TRUE", "@type": "xsd:boolean"}, "Boolean mixed case": {"@value": "False", "@type": "xsd:boolean"}, # Objects with more than @value and @type should be processed normally "Complex object": { "@value": "some value", "@type": "xsd:string", "@id": "http://example.org/resource", "rdfs:label": "Some Label", }, # Unknown XSD types should return value as-is "Unknown type": {"@value": "custom value", "@type": "custom:unknownType"}, } cleaned = clean_template_instance_response(sample_instance) # Invalid numeric conversions should fall back to original string assert cleaned["Invalid decimal"] == "not-a-number" assert isinstance(cleaned["Invalid decimal"], str) assert cleaned["Invalid integer"] == "abc123" assert isinstance(cleaned["Invalid integer"], str) # Boolean case insensitive assert cleaned["Boolean uppercase"] is True assert isinstance(cleaned["Boolean uppercase"], bool) assert cleaned["Boolean mixed case"] is False assert isinstance(cleaned["Boolean mixed case"], bool) # Complex objects should not be flattened - should keep all transformed fields complex_obj = cleaned["Complex object"] assert "@value" not in complex_obj assert "@type" not in complex_obj assert complex_obj["iri"] == "http://example.org/resource" assert complex_obj["label"] == "Some Label" # Unknown types should return value as string assert cleaned["Unknown type"] == "custom value" assert isinstance(cleaned["Unknown type"], str)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BACH-AI-Tools/cedar-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server