oop still have to do bytes conversion

This commit is contained in:
sneakers-the-rat 2024-09-02 22:29:58 -07:00
parent 067ffa0342
commit 0f1e0d0caf
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
2 changed files with 8 additions and 3 deletions

View file

@ -9,7 +9,8 @@ but it can be persuaded to store data in ASCII or virtualized utf-8 under somewh
This PR uses h5py's string methods to expose string datasets (compound or not) This PR uses h5py's string methods to expose string datasets (compound or not)
via the h5proxy with the `asstr()` view method. via the h5proxy with the `asstr()` view method.
This also allows us to set strings with normal python strings. This also allows us to set strings with normal python strings,
although hdf5 datasets can only be created with `bytes` or other non-unicode encodings.
Since numpydantic isn't necessarily a tool for *creating* hdf5 files Since numpydantic isn't necessarily a tool for *creating* hdf5 files
(nobody should be doing that), but rather an interface to them, (nobody should be doing that), but rather an interface to them,
@ -27,7 +28,7 @@ class MyModel(BaseModel):
array: NDArray[Any, str] array: NDArray[Any, str]
h5f = h5py.File('my_data.h5', 'w') h5f = h5py.File('my_data.h5', 'w')
data = np.random.random((10,10)).astype(str) data = np.random.random((10,10)).astype(bytes)
_ = h5f.create_dataset('/dataset', data=data) _ = h5f.create_dataset('/dataset', data=data)
instance = MyModel(array=('my_data.h5', '/dataset')) instance = MyModel(array=('my_data.h5', '/dataset'))

View file

@ -122,8 +122,12 @@ def hdf5_array(
compound: bool = False, compound: bool = False,
) -> H5ArrayPath: ) -> H5ArrayPath:
array_path = "/" + "_".join([str(s) for s in shape]) + "__" + dtype.__name__ array_path = "/" + "_".join([str(s) for s in shape]) + "__" + dtype.__name__
if not compound: if not compound:
data = np.random.random(shape).astype(dtype) if dtype is str:
data = np.random.random(shape).astype(bytes)
else:
data = np.random.random(shape).astype(dtype)
_ = hdf5_file.create_dataset(array_path, data=data) _ = hdf5_file.create_dataset(array_path, data=data)
return H5ArrayPath(Path(hdf5_file.filename), array_path) return H5ArrayPath(Path(hdf5_file.filename), array_path)
else: else: