bugfix - don't choke on empty datasets during validation

This commit is contained in:
sneakers-the-rat 2024-09-03 17:41:34 -07:00
parent 2c625e47ac
commit 47e5cb8da1
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
2 changed files with 31 additions and 1 deletions

View file

@ -39,6 +39,7 @@ as ``S32`` isoformatted byte strings (timezones optional) like:
"""
import pdb
import sys
from datetime import datetime
from pathlib import Path
@ -321,13 +322,22 @@ class H5Interface(Interface):
"""
if h5py.h5t.check_string_dtype(array.dtype):
# check for datetimes
pdb.set_trace()
try:
if array[0].dtype.type is np.datetime64:
return np.datetime64
else:
return str
except (AttributeError, ValueError, TypeError): # pragma: no cover
except (AttributeError, TypeError): # pragma: no cover
# it's not a datetime, but it is some kind of string
return str
except (IndexError, ValueError):
# if the dataset is empty, we can't tell if something is a datetime
# or not, so we just tell the validation method what it wants to hear
if self.dtype in (np.datetime64, str):
return self.dtype
else:
return str
else:
return array.dtype

View file

@ -198,3 +198,23 @@ def test_datetime(hdf5_array, compound):
assert instance.array[0, 0] == now
instance.array[0] = now
assert all(instance.array[0] == now)
@pytest.mark.parametrize("dtype", [int, float, str, datetime])
def test_empty_dataset(dtype, tmp_path):
"""
Empty datasets shouldn't choke us during validation
"""
array_path = tmp_path / "test.h5"
if dtype in (str, datetime):
np_dtype = "S32"
else:
np_dtype = dtype
with h5py.File(array_path, "w") as h5f:
_ = h5f.create_dataset(name="/data", dtype=np_dtype)
class MyModel(BaseModel):
array: NDArray[Any, dtype]
_ = MyModel(array=(array_path, "/data"))