diff --git a/src/numpydantic/interface/hdf5.py b/src/numpydantic/interface/hdf5.py index 4fc548d..e3edac4 100644 --- a/src/numpydantic/interface/hdf5.py +++ b/src/numpydantic/interface/hdf5.py @@ -39,6 +39,7 @@ as ``S32`` isoformatted byte strings (timezones optional) like: """ +import pdb import sys from datetime import datetime from pathlib import Path @@ -321,13 +322,22 @@ class H5Interface(Interface): """ if h5py.h5t.check_string_dtype(array.dtype): # check for datetimes + pdb.set_trace() try: if array[0].dtype.type is np.datetime64: return np.datetime64 else: return str - except (AttributeError, ValueError, TypeError): # pragma: no cover + except (AttributeError, TypeError): # pragma: no cover + # it's not a datetime, but it is some kind of string return str + except (IndexError, ValueError): + # if the dataset is empty, we can't tell if something is a datetime + # or not, so we just tell the validation method what it wants to hear + if self.dtype in (np.datetime64, str): + return self.dtype + else: + return str else: return array.dtype diff --git a/tests/test_interface/test_hdf5.py b/tests/test_interface/test_hdf5.py index bf47a8d..9ca9e94 100644 --- a/tests/test_interface/test_hdf5.py +++ b/tests/test_interface/test_hdf5.py @@ -198,3 +198,23 @@ def test_datetime(hdf5_array, compound): assert instance.array[0, 0] == now instance.array[0] = now assert all(instance.array[0] == now) + + +@pytest.mark.parametrize("dtype", [int, float, str, datetime]) +def test_empty_dataset(dtype, tmp_path): + """ + Empty datasets shouldn't choke us during validation + """ + array_path = tmp_path / "test.h5" + if dtype in (str, datetime): + np_dtype = "S32" + else: + np_dtype = dtype + + with h5py.File(array_path, "w") as h5f: + _ = h5f.create_dataset(name="/data", dtype=np_dtype) + + class MyModel(BaseModel): + array: NDArray[Any, dtype] + + _ = MyModel(array=(array_path, "/data"))