From 0f1e0d0caf56f52d9a9bd0bde70a010c00b82454 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 2 Sep 2024 22:29:58 -0700 Subject: [PATCH] oop still have to do bytes conversion --- docs/changelog.md | 5 +++-- tests/fixtures.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 5dfe547..6959982 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -9,7 +9,8 @@ but it can be persuaded to store data in ASCII or virtualized utf-8 under somewh This PR uses h5py's string methods to expose string datasets (compound or not) via the h5proxy with the `asstr()` view method. -This also allows us to set strings with normal python strings. +This also allows us to set strings with normal python strings, +although hdf5 datasets can only be created with `bytes` or other non-unicode encodings. Since numpydantic isn't necessarily a tool for *creating* hdf5 files (nobody should be doing that), but rather an interface to them, @@ -27,7 +28,7 @@ class MyModel(BaseModel): array: NDArray[Any, str] h5f = h5py.File('my_data.h5', 'w') -data = np.random.random((10,10)).astype(str) +data = np.random.random((10,10)).astype(bytes) _ = h5f.create_dataset('/dataset', data=data) instance = MyModel(array=('my_data.h5', '/dataset')) diff --git a/tests/fixtures.py b/tests/fixtures.py index cb5b59b..9d5bba6 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -122,8 +122,12 @@ def hdf5_array( compound: bool = False, ) -> H5ArrayPath: array_path = "/" + "_".join([str(s) for s in shape]) + "__" + dtype.__name__ + if not compound: - data = np.random.random(shape).astype(dtype) + if dtype is str: + data = np.random.random(shape).astype(bytes) + else: + data = np.random.random(shape).astype(dtype) _ = hdf5_file.create_dataset(array_path, data=data) return H5ArrayPath(Path(hdf5_file.filename), array_path) else: