Merge pull request #16 from p2p-ld/bugfix-empty-hdf5

bugfix - don't choke on empty HDF5 datasets during validation
This commit is contained in:
Jonny Saunders 2024-09-03 17:48:55 -07:00 committed by GitHub
commit c5a1e8ca74
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 31 additions and 3 deletions

View file

@ -145,8 +145,8 @@ select = [
"D210", "D211", "D210", "D211",
# emptiness # emptiness
"D419", "D419",
# no pdb
"T100",
] ]
ignore = [ ignore = [
"ANN101", "ANN102", "ANN401", "ANN204", "ANN101", "ANN102", "ANN401", "ANN204",

View file

@ -326,7 +326,15 @@ class H5Interface(Interface):
return np.datetime64 return np.datetime64
else: else:
return str return str
except (AttributeError, ValueError, TypeError): # pragma: no cover except (AttributeError, TypeError): # pragma: no cover
# it's not a datetime, but it is some kind of string
return str
except (IndexError, ValueError):
# if the dataset is empty, we can't tell if something is a datetime
# or not, so we just tell the validation method what it wants to hear
if self.dtype in (np.datetime64, str):
return self.dtype
else:
return str return str
else: else:
return array.dtype return array.dtype

View file

@ -198,3 +198,23 @@ def test_datetime(hdf5_array, compound):
assert instance.array[0, 0] == now assert instance.array[0, 0] == now
instance.array[0] = now instance.array[0] = now
assert all(instance.array[0] == now) assert all(instance.array[0] == now)
@pytest.mark.parametrize("dtype", [int, float, str, datetime])
def test_empty_dataset(dtype, tmp_path):
"""
Empty datasets shouldn't choke us during validation
"""
array_path = tmp_path / "test.h5"
if dtype in (str, datetime):
np_dtype = "S32"
else:
np_dtype = dtype
with h5py.File(array_path, "w") as h5f:
_ = h5f.create_dataset(name="/data", dtype=np_dtype)
class MyModel(BaseModel):
array: NDArray[Any, dtype]
_ = MyModel(array=(array_path, "/data"))