hdf5_handler.py•8.59 kB
import h5py
import os
import glob
import numpy as np
from typing import Dict, Any, List, Optional, Union
import logging
import pathlib
logger = logging.getLogger(__name__)
class HDF5Handler:
"""
Handles operations on HDF5 files as part of MCP capability.
"""
def __init__(self, base_path: str = "/data/samples"):
"""
Initialize the HDF5 handler with a base path for sample data.
"""
self.base_path = base_path
async def list_available_resources(self) -> List[Dict[str, Any]]:
"""
List available HDF5 files as MCP resources.
Returns:
List of resource objects for MCP
"""
# Simulate finding HDF5 files
simulated_files = [
"sample1.h5",
"sample2.h5",
"data/weather.h5",
"data/measurements.h5"
]
resources = []
for file_path in simulated_files:
full_path = os.path.join(self.base_path, file_path)
resources.append({
"id": f"hdf5:{full_path}",
"name": os.path.basename(file_path),
"type": "hdf5",
"description": f"HDF5 data file: {file_path}",
"metadata": {
"path": full_path,
"simulated": True
}
})
return resources
async def get_resource_details(self, file_path: str) -> Dict[str, Any]:
"""
Get detailed information about an HDF5 file resource.
Args:
file_path: Path to the HDF5 file
Returns:
Resource details including structure information
"""
if not self._simulate_file_exists(file_path):
raise FileNotFoundError(f"HDF5 file not found: {file_path}")
simulated_structure = {
"groups": ["group1", "group2", "measurements"],
"datasets": ["metadata", "group1/temperature", "group1/pressure", "group2/timestamps"]
}
return {
"id": f"hdf5:{file_path}",
"name": os.path.basename(file_path),
"type": "hdf5",
"description": f"HDF5 data file: {file_path}",
"metadata": {
"path": file_path,
"structure": simulated_structure,
"simulated": True
}
}
async def read_dataset(self, file_path: str, dataset_path: str) -> Dict[str, Any]:
"""
Read a dataset from an HDF5 file.
Args:
file_path: Path to the HDF5 file
dataset_path: Path to the dataset within the file
Returns:
Dictionary with simulated dataset information and data
"""
if not self._simulate_file_exists(file_path):
raise FileNotFoundError(f"HDF5 file not found: {file_path}")
if not self._simulate_dataset_exists(file_path, dataset_path):
raise ValueError(f"Dataset {dataset_path} not found in file")
if dataset_path == "metadata":
simulated_data = {"created": "2025-03-15", "author": "Researcher",
"version": "1.2"}
simulated_shape = None
simulated_dtype = "object"
elif dataset_path.endswith("temperature"):
simulated_data = [20.1, 20.3, 20.8, 21.2, 21.5]
simulated_shape = (5,)
simulated_dtype = "float64"
elif dataset_path.endswith("pressure"):
simulated_data = [101.3, 101.4, 101.3, 101.2, 101.3]
simulated_shape = (5,)
simulated_dtype = "float64"
elif dataset_path.endswith("timestamps"):
simulated_data = ["2025-04-01T12:00:00", "2025-04-01T12:15:00",
"2025-04-01T12:30:00"]
simulated_shape = (3,)
simulated_dtype = "object"
else:
simulated_data = [1, 2, 3, 4, 5]
simulated_shape = (5,)
simulated_dtype = "int64"
simulated_attrs = {"unit": "celsius" if "temperature" in dataset_path
else "hPa" if "pressure" in dataset_path else "none"}
return {
"data": simulated_data,
"shape": simulated_shape,
"dtype": simulated_dtype,
"attributes": simulated_attrs,
"simulated": True
}
async def list_contents(self, file_path: str, group_path: Optional[str] =
"/") -> Dict[str, List[str]]:
"""
List the contents (groups and datasets) within an HDF5 file.
Args:
file_path: Path to the HDF5 file
group_path: Path to the group within the file (default: root)
Returns:
Dictionary with lists of groups and datasets
"""
if not self._simulate_file_exists(file_path):
raise FileNotFoundError(f"HDF5 file not found: {file_path}")
if group_path != "/" and not self._simulate_group_exists(file_path, group_path):
raise ValueError(f"Group {group_path} not found in file")
simulated_structure = {
"/": {
"groups": ["group1", "group2", "measurements"],
"datasets": ["metadata"]
},
"group1": {
"groups": [],
"datasets": ["temperature", "pressure"]
},
"group2": {
"groups": ["subgroup1"],
"datasets": ["timestamps"]
},
"group2/subgroup1": {
"groups": [],
"datasets": ["data1", "data2"]
},
"measurements": {
"groups": [],
"datasets": ["sensor1", "sensor2", "sensor3"]
}
}
# Get the contents for the requested group
if group_path == "/":
contents = simulated_structure["/"]
else:
# Remove leading slash if present for lookup
lookup_path = group_path[1:] if group_path.startswith("/") else group_path
contents = simulated_structure.get(lookup_path, {"groups": [], "datasets": []})
return {
"groups": contents["groups"],
"datasets": contents["datasets"],
"simulated": True
}
def _simulate_file_exists(self, file_path: str) -> bool:
"""
Simulate checking if an HDF5 file exists.
Args:
file_path: Path to the HDF5 file
Returns:
True if the file exists, False otherwise
"""
if "nonexistent" in file_path:
return False
return True
def _simulate_group_exists(self, file_path: str, group_path: str) -> bool:
"""
Simulate checking if a group exists in an HDF5 file.
Args:
file_path: Path to the HDF5 file
group_path: Path to the group within the file
Returns:
True if the group exists, False otherwise
"""
# Define valid groups for simulation
valid_groups = ["group1", "group2", "measurements", "group2/subgroup1"]
# Remove leading slash if present for comparison
group_path = group_path[1:] if group_path.startswith("/") else group_path
return group_path in valid_groups
def _simulate_dataset_exists(self, file_path: str, dataset_path: str) -> bool:
"""
Simulate checking if a dataset exists in an HDF5 file.
Args:
file_path: Path to the HDF5 file
dataset_path: Path to the dataset within the file
Returns:
True if the dataset exists, False otherwise
"""
# Define valid datasets for simulation
valid_datasets = [
"metadata",
"group1/temperature",
"group1/pressure",
"group2/timestamps",
"group2/subgroup1/data1",
"group2/subgroup1/data2",
"measurements/sensor1",
"measurements/sensor2",
"measurements/sensor3"
]
# Remove leading slash if present for comparison
dataset_path = dataset_path[1:] if dataset_path.startswith("/") else dataset_path
return dataset_path in valid_datasets