From 47e5cb8da10947fddc62cbb64d12d11a7203dbd3 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 3 Sep 2024 17:41:34 -0700 Subject: [PATCH 1/2] bugfix - don't choke on empty datasets during validation --- src/numpydantic/interface/hdf5.py | 12 +++++++++++- tests/test_interface/test_hdf5.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/numpydantic/interface/hdf5.py b/src/numpydantic/interface/hdf5.py index 4fc548d..e3edac4 100644 --- a/src/numpydantic/interface/hdf5.py +++ b/src/numpydantic/interface/hdf5.py @@ -39,6 +39,7 @@ as ``S32`` isoformatted byte strings (timezones optional) like: """ +import pdb import sys from datetime import datetime from pathlib import Path @@ -321,13 +322,22 @@ class H5Interface(Interface): """ if h5py.h5t.check_string_dtype(array.dtype): # check for datetimes + pdb.set_trace() try: if array[0].dtype.type is np.datetime64: return np.datetime64 else: return str - except (AttributeError, ValueError, TypeError): # pragma: no cover + except (AttributeError, TypeError): # pragma: no cover + # it's not a datetime, but it is some kind of string return str + except (IndexError, ValueError): + # if the dataset is empty, we can't tell if something is a datetime + # or not, so we just tell the validation method what it wants to hear + if self.dtype in (np.datetime64, str): + return self.dtype + else: + return str else: return array.dtype diff --git a/tests/test_interface/test_hdf5.py b/tests/test_interface/test_hdf5.py index bf47a8d..9ca9e94 100644 --- a/tests/test_interface/test_hdf5.py +++ b/tests/test_interface/test_hdf5.py @@ -198,3 +198,23 @@ def test_datetime(hdf5_array, compound): assert instance.array[0, 0] == now instance.array[0] = now assert all(instance.array[0] == now) + + +@pytest.mark.parametrize("dtype", [int, float, str, datetime]) +def test_empty_dataset(dtype, tmp_path): + """ + Empty datasets shouldn't choke us during validation + """ + array_path = tmp_path / "test.h5" + if dtype in (str, datetime): + np_dtype = "S32" + else: + np_dtype = dtype + + with h5py.File(array_path, "w") as h5f: + _ = h5f.create_dataset(name="/data", dtype=np_dtype) + + class MyModel(BaseModel): + array: NDArray[Any, dtype] + + _ = MyModel(array=(array_path, "/data")) From 34e48b394a5365abfd4384039aa526248e3d6271 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 3 Sep 2024 17:43:11 -0700 Subject: [PATCH 2/2] rm pdb --- pyproject.toml | 4 ++-- src/numpydantic/interface/hdf5.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7c8b8ae..35bf0a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,8 +145,8 @@ select = [ "D210", "D211", # emptiness "D419", - - + # no pdb + "T100", ] ignore = [ "ANN101", "ANN102", "ANN401", "ANN204", diff --git a/src/numpydantic/interface/hdf5.py b/src/numpydantic/interface/hdf5.py index e3edac4..20cec0d 100644 --- a/src/numpydantic/interface/hdf5.py +++ b/src/numpydantic/interface/hdf5.py @@ -39,7 +39,6 @@ as ``S32`` isoformatted byte strings (timezones optional) like: """ -import pdb import sys from datetime import datetime from pathlib import Path @@ -322,7 +321,6 @@ class H5Interface(Interface): """ if h5py.h5t.check_string_dtype(array.dtype): # check for datetimes - pdb.set_trace() try: if array[0].dtype.type is np.datetime64: return np.datetime64