continue removing NDArray type, since it's in numpydantic now.

This commit is contained in:
sneakers-the-rat 2024-07-10 00:20:01 -07:00
parent d6750f8df1
commit 1920c9f292
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
7 changed files with 10 additions and 131 deletions

View file

@ -18,13 +18,13 @@ from pathlib import Path
from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Type, Union from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Type, Union
import h5py import h5py
from numpydantic.interface.hdf5 import H5ArrayPath
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
from nwb_linkml.annotations import unwrap_optional from nwb_linkml.annotations import unwrap_optional
from nwb_linkml.maps import Map from nwb_linkml.maps import Map
from nwb_linkml.maps.hdmf import dynamictable_to_model from nwb_linkml.maps.hdmf import dynamictable_to_model
from nwb_linkml.types.hdf5 import HDF5_Path from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArrayProxy
if sys.version_info.minor >= 11: if sys.version_info.minor >= 11:
from enum import StrEnum from enum import StrEnum
@ -241,7 +241,7 @@ class ResolveDynamicTable(HDF5Map):
Dynamic tables are sort of odd in that their models don't include their fields Dynamic tables are sort of odd in that their models don't include their fields
(except as a list of strings in ``colnames`` ), (except as a list of strings in ``colnames`` ),
so we need to create a new model that includes fields for each column, so we need to create a new model that includes fields for each column,
and then we include the datasets as :class:`~.nwb_linkml.types.ndarray.NDArrayProxy` and then we include the datasets as :class:`~numpydantic.interface.hdf5.H5ArrayPath`
objects which lazy load the arrays in a thread/process safe way. objects which lazy load the arrays in a thread/process safe way.
This map also resolves the child elements, This map also resolves the child elements,
@ -386,7 +386,7 @@ class ResolveDatasetAsDict(HDF5Map):
""" """
Resolve datasets that do not have a ``neurodata_type`` of their own as a dictionary Resolve datasets that do not have a ``neurodata_type`` of their own as a dictionary
that will be packaged into a model in the next step. Grabs the array in an that will be packaged into a model in the next step. Grabs the array in an
:class:`~nwb_linkml.types.ndarray.NDArrayProxy` :class:`~numpydantic.interface.hdf5.H5ArrayPath`
under an ``array`` key, and then grabs any additional ``attrs`` as well. under an ``array`` key, and then grabs any additional ``attrs`` as well.
Mutually exclusive with :class:`.ResolveScalars` - this only applies to datasets that are larger Mutually exclusive with :class:`.ResolveScalars` - this only applies to datasets that are larger
@ -413,7 +413,7 @@ class ResolveDatasetAsDict(HDF5Map):
) -> H5ReadResult: ) -> H5ReadResult:
res = { res = {
"array": NDArrayProxy(h5f_file=src.h5f_path, path=src.path), "array": H5ArrayPath(file=src.h5f_path, path=src.path),
"hdf5_path": src.path, "hdf5_path": src.path,
"name": src.parts[-1], "name": src.parts[-1],
**src.attrs, **src.attrs,

View file

@ -7,11 +7,12 @@ from typing import Any, List, Optional, Type
import dask.array as da import dask.array as da
import h5py import h5py
import numpy as np import numpy as np
from numpydantic import NDArray
from numpydantic.interface.hdf5 import H5ArrayPath
from pydantic import BaseModel, create_model from pydantic import BaseModel, create_model
from nwb_linkml.maps.dtype import struct_from_dtype from nwb_linkml.maps.dtype import struct_from_dtype
from nwb_linkml.types.hdf5 import HDF5_Path from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
def model_from_dynamictable(group: h5py.Group, base: Optional[BaseModel] = None) -> Type[BaseModel]: def model_from_dynamictable(group: h5py.Group, base: Optional[BaseModel] = None) -> Type[BaseModel]:
@ -61,7 +62,7 @@ def dynamictable_to_model(
try: try:
items[col] = da.from_array(group[col]) items[col] = da.from_array(group[col])
except NotImplementedError: except NotImplementedError:
items[col] = NDArrayProxy(h5f_file=group.file.filename, path=group[col].name) items[col] = H5ArrayPath(file=group.file.filename, path=group[col].name)
return model.model_construct(hdf5_path=group.name, name=group.name.split("/")[-1], **items) return model.model_construct(hdf5_path=group.name, name=group.name.split("/")[-1], **items)

View file

@ -1,7 +1,3 @@
""" """
Custom types (likely deprecated) Custom types (likely deprecated)
""" """
from nwb_linkml.types.ndarray import NDArray
__all__ = ["NDArray"]

View file

@ -13,10 +13,10 @@ from typing import Optional, TypedDict
import numpy as np import numpy as np
import pytest import pytest
from numpydantic.ndarray import NDArrayMeta
from pydantic import BaseModel from pydantic import BaseModel
from nwb_linkml.generators.pydantic import NWBPydanticGenerator, compile_python from nwb_linkml.generators.pydantic import NWBPydanticGenerator, compile_python
from nwb_linkml.types.ndarray import NDArrayMeta
from ..fixtures import ( from ..fixtures import (
TestSchemas, TestSchemas,

View file

@ -8,6 +8,7 @@ from nwb_linkml.maps.hdmf import dynamictable_to_model, model_from_dynamictable
NWBFILE = "/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb" NWBFILE = "/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb"
@pytest.mark.xfail()
@pytest.mark.parametrize("dataset", ["aibs.nwb"]) @pytest.mark.parametrize("dataset", ["aibs.nwb"])
def test_make_dynamictable(data_dir, dataset): def test_make_dynamictable(data_dir, dataset):
nwbfile = data_dir / dataset nwbfile = data_dir / dataset

View file

@ -5,12 +5,12 @@ from typing import Optional
import pytest import pytest
from nptyping import Shape, UByte from nptyping import Shape, UByte
from numpydantic import NDArray
import nwb_linkml import nwb_linkml
from nwb_linkml.maps.naming import version_module_case from nwb_linkml.maps.naming import version_module_case
from nwb_linkml.providers import LinkMLProvider, PydanticProvider from nwb_linkml.providers import LinkMLProvider, PydanticProvider
from nwb_linkml.providers.git import DEFAULT_REPOS from nwb_linkml.providers.git import DEFAULT_REPOS
from nwb_linkml.types.ndarray import NDArray
CORE_MODULES = ( CORE_MODULES = (
"core.nwb.base", "core.nwb.base",

View file

@ -1,119 +0,0 @@
import json
from typing import Any, Optional, Union
import h5py
import numpy as np
import pytest
from nptyping import Number, Shape
from pydantic import BaseModel, Field, ValidationError
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
def test_ndarray_type():
class Model(BaseModel):
array: NDArray[Shape["2 x, * y"], Number]
array_any: Optional[NDArray[Any, Any]] = None
schema = Model.model_json_schema()
assert schema["properties"]["array"]["items"] == {"items": {"type": "number"}, "type": "array"}
assert schema["properties"]["array"]["maxItems"] == 2
assert schema["properties"]["array"]["minItems"] == 2
# models should instantiate correctly!
instance = Model(array=np.zeros((2, 3)))
with pytest.raises(ValidationError):
instance = Model(array=np.zeros((4, 6)))
with pytest.raises(ValidationError):
instance = Model(array=np.ones((2, 3), dtype=bool))
instance = Model(array=np.zeros((2, 3)), array_any=np.ones((3, 4, 5)))
def test_ndarray_union():
class Model(BaseModel):
array: Optional[
Union[
NDArray[Shape["* x, * y"], Number],
NDArray[Shape["* x, * y, 3 r_g_b"], Number],
NDArray[Shape["* x, * y, 3 r_g_b, 4 r_g_b_a"], Number],
]
] = Field(None)
instance = Model()
instance = Model(array=np.random.random((5, 10)))
instance = Model(array=np.random.random((5, 10, 3)))
instance = Model(array=np.random.random((5, 10, 3, 4)))
with pytest.raises(ValidationError):
instance = Model(array=np.random.random((5,)))
with pytest.raises(ValidationError):
instance = Model(array=np.random.random((5, 10, 4)))
with pytest.raises(ValidationError):
instance = Model(array=np.random.random((5, 10, 3, 6)))
with pytest.raises(ValidationError):
instance = Model(array=np.random.random((5, 10, 4, 6)))
def test_ndarray_coercion():
"""
Coerce lists to arrays
"""
class Model(BaseModel):
array: NDArray[Shape["* x"], Number]
amod = Model(array=[1, 2, 3, 4.5])
assert np.allclose(amod.array, np.array([1, 2, 3, 4.5]))
with pytest.raises(ValidationError):
amod = Model(array=["a", "b", "c"])
def test_ndarray_serialize():
"""
Large arrays should get compressed with blosc, otherwise just to list
"""
class Model(BaseModel):
large_array: NDArray[Any, Number]
small_array: NDArray[Any, Number]
mod = Model(large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3)))
mod_str = mod.model_dump_json()
mod_json = json.loads(mod_str)
for a in ("array", "shape", "dtype", "unpack_fns"):
assert a in mod_json["large_array"]
assert isinstance(mod_json["large_array"]["array"], str)
assert isinstance(mod_json["small_array"], list)
# but when we just dump to a dict we don't compress
mod_dict = mod.model_dump()
assert isinstance(mod_dict["large_array"], np.ndarray)
def test_ndarray_proxy(tmp_output_dir_func):
h5f_source = tmp_output_dir_func / "test.h5"
with h5py.File(h5f_source, "w") as h5f:
dset_good = h5f.create_dataset("/data", data=np.random.random((1024, 1024, 3)))
dset_bad = h5f.create_dataset("/data_bad", data=np.random.random((1024, 1024, 4)))
class Model(BaseModel):
array: NDArray[Shape["* x, * y, 3 z"], Number]
mod = Model(array=NDArrayProxy(h5f_file=h5f_source, path="/data"))
subarray = mod.array[0:5, 0:5, :]
assert isinstance(subarray, np.ndarray)
assert isinstance(subarray.sum(), float)
assert mod.array.name == "/data"
with pytest.raises(NotImplementedError):
mod.array[0] = 5
with pytest.raises(ValidationError):
mod = Model(array=NDArrayProxy(h5f_file=h5f_source, path="/data_bad"))