From 1920c9f2928a5453f8eaff0e4e3ce84c8ffab2c4 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Wed, 10 Jul 2024 00:20:01 -0700 Subject: [PATCH] continue removing NDArray type, since it's in numpydantic now. --- nwb_linkml/src/nwb_linkml/maps/hdf5.py | 8 +- nwb_linkml/src/nwb_linkml/maps/hdmf.py | 5 +- nwb_linkml/src/nwb_linkml/types/__init__.py | 4 - .../test_generator_pydantic.py | 2 +- nwb_linkml/tests/test_maps/test_hdmf.py | 1 + .../test_providers/test_provider_schema.py | 2 +- nwb_linkml/tests/test_types/test_ndarray.py | 119 ------------------ 7 files changed, 10 insertions(+), 131 deletions(-) delete mode 100644 nwb_linkml/tests/test_types/test_ndarray.py diff --git a/nwb_linkml/src/nwb_linkml/maps/hdf5.py b/nwb_linkml/src/nwb_linkml/maps/hdf5.py index 6e6dd2f..8ebfd85 100644 --- a/nwb_linkml/src/nwb_linkml/maps/hdf5.py +++ b/nwb_linkml/src/nwb_linkml/maps/hdf5.py @@ -18,13 +18,13 @@ from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Type, Union import h5py +from numpydantic.interface.hdf5 import H5ArrayPath from pydantic import BaseModel, ConfigDict, Field from nwb_linkml.annotations import unwrap_optional from nwb_linkml.maps import Map from nwb_linkml.maps.hdmf import dynamictable_to_model from nwb_linkml.types.hdf5 import HDF5_Path -from nwb_linkml.types.ndarray import NDArrayProxy if sys.version_info.minor >= 11: from enum import StrEnum @@ -241,7 +241,7 @@ class ResolveDynamicTable(HDF5Map): Dynamic tables are sort of odd in that their models don't include their fields (except as a list of strings in ``colnames`` ), so we need to create a new model that includes fields for each column, - and then we include the datasets as :class:`~.nwb_linkml.types.ndarray.NDArrayProxy` + and then we include the datasets as :class:`~numpydantic.interface.hdf5.H5ArrayPath` objects which lazy load the arrays in a thread/process safe way. This map also resolves the child elements, @@ -386,7 +386,7 @@ class ResolveDatasetAsDict(HDF5Map): """ Resolve datasets that do not have a ``neurodata_type`` of their own as a dictionary that will be packaged into a model in the next step. Grabs the array in an - :class:`~nwb_linkml.types.ndarray.NDArrayProxy` + :class:`~numpydantic.interface.hdf5.H5ArrayPath` under an ``array`` key, and then grabs any additional ``attrs`` as well. Mutually exclusive with :class:`.ResolveScalars` - this only applies to datasets that are larger @@ -413,7 +413,7 @@ class ResolveDatasetAsDict(HDF5Map): ) -> H5ReadResult: res = { - "array": NDArrayProxy(h5f_file=src.h5f_path, path=src.path), + "array": H5ArrayPath(file=src.h5f_path, path=src.path), "hdf5_path": src.path, "name": src.parts[-1], **src.attrs, diff --git a/nwb_linkml/src/nwb_linkml/maps/hdmf.py b/nwb_linkml/src/nwb_linkml/maps/hdmf.py index 2e1c43e..b2c552d 100644 --- a/nwb_linkml/src/nwb_linkml/maps/hdmf.py +++ b/nwb_linkml/src/nwb_linkml/maps/hdmf.py @@ -7,11 +7,12 @@ from typing import Any, List, Optional, Type import dask.array as da import h5py import numpy as np +from numpydantic import NDArray +from numpydantic.interface.hdf5 import H5ArrayPath from pydantic import BaseModel, create_model from nwb_linkml.maps.dtype import struct_from_dtype from nwb_linkml.types.hdf5 import HDF5_Path -from nwb_linkml.types.ndarray import NDArray, NDArrayProxy def model_from_dynamictable(group: h5py.Group, base: Optional[BaseModel] = None) -> Type[BaseModel]: @@ -61,7 +62,7 @@ def dynamictable_to_model( try: items[col] = da.from_array(group[col]) except NotImplementedError: - items[col] = NDArrayProxy(h5f_file=group.file.filename, path=group[col].name) + items[col] = H5ArrayPath(file=group.file.filename, path=group[col].name) return model.model_construct(hdf5_path=group.name, name=group.name.split("/")[-1], **items) diff --git a/nwb_linkml/src/nwb_linkml/types/__init__.py b/nwb_linkml/src/nwb_linkml/types/__init__.py index d531177..27a54e9 100644 --- a/nwb_linkml/src/nwb_linkml/types/__init__.py +++ b/nwb_linkml/src/nwb_linkml/types/__init__.py @@ -1,7 +1,3 @@ """ Custom types (likely deprecated) """ - -from nwb_linkml.types.ndarray import NDArray - -__all__ = ["NDArray"] diff --git a/nwb_linkml/tests/test_generators/test_generator_pydantic.py b/nwb_linkml/tests/test_generators/test_generator_pydantic.py index ce09cc7..d2ab023 100644 --- a/nwb_linkml/tests/test_generators/test_generator_pydantic.py +++ b/nwb_linkml/tests/test_generators/test_generator_pydantic.py @@ -13,10 +13,10 @@ from typing import Optional, TypedDict import numpy as np import pytest +from numpydantic.ndarray import NDArrayMeta from pydantic import BaseModel from nwb_linkml.generators.pydantic import NWBPydanticGenerator, compile_python -from nwb_linkml.types.ndarray import NDArrayMeta from ..fixtures import ( TestSchemas, diff --git a/nwb_linkml/tests/test_maps/test_hdmf.py b/nwb_linkml/tests/test_maps/test_hdmf.py index d95f61a..b6b66dc 100644 --- a/nwb_linkml/tests/test_maps/test_hdmf.py +++ b/nwb_linkml/tests/test_maps/test_hdmf.py @@ -8,6 +8,7 @@ from nwb_linkml.maps.hdmf import dynamictable_to_model, model_from_dynamictable NWBFILE = "/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb" +@pytest.mark.xfail() @pytest.mark.parametrize("dataset", ["aibs.nwb"]) def test_make_dynamictable(data_dir, dataset): nwbfile = data_dir / dataset diff --git a/nwb_linkml/tests/test_providers/test_provider_schema.py b/nwb_linkml/tests/test_providers/test_provider_schema.py index de7c67a..9072933 100644 --- a/nwb_linkml/tests/test_providers/test_provider_schema.py +++ b/nwb_linkml/tests/test_providers/test_provider_schema.py @@ -5,12 +5,12 @@ from typing import Optional import pytest from nptyping import Shape, UByte +from numpydantic import NDArray import nwb_linkml from nwb_linkml.maps.naming import version_module_case from nwb_linkml.providers import LinkMLProvider, PydanticProvider from nwb_linkml.providers.git import DEFAULT_REPOS -from nwb_linkml.types.ndarray import NDArray CORE_MODULES = ( "core.nwb.base", diff --git a/nwb_linkml/tests/test_types/test_ndarray.py b/nwb_linkml/tests/test_types/test_ndarray.py deleted file mode 100644 index f81a78e..0000000 --- a/nwb_linkml/tests/test_types/test_ndarray.py +++ /dev/null @@ -1,119 +0,0 @@ -import json -from typing import Any, Optional, Union - -import h5py -import numpy as np -import pytest -from nptyping import Number, Shape -from pydantic import BaseModel, Field, ValidationError - -from nwb_linkml.types.ndarray import NDArray, NDArrayProxy - - -def test_ndarray_type(): - - class Model(BaseModel): - array: NDArray[Shape["2 x, * y"], Number] - array_any: Optional[NDArray[Any, Any]] = None - - schema = Model.model_json_schema() - assert schema["properties"]["array"]["items"] == {"items": {"type": "number"}, "type": "array"} - assert schema["properties"]["array"]["maxItems"] == 2 - assert schema["properties"]["array"]["minItems"] == 2 - - # models should instantiate correctly! - instance = Model(array=np.zeros((2, 3))) - - with pytest.raises(ValidationError): - instance = Model(array=np.zeros((4, 6))) - - with pytest.raises(ValidationError): - instance = Model(array=np.ones((2, 3), dtype=bool)) - - instance = Model(array=np.zeros((2, 3)), array_any=np.ones((3, 4, 5))) - - -def test_ndarray_union(): - class Model(BaseModel): - array: Optional[ - Union[ - NDArray[Shape["* x, * y"], Number], - NDArray[Shape["* x, * y, 3 r_g_b"], Number], - NDArray[Shape["* x, * y, 3 r_g_b, 4 r_g_b_a"], Number], - ] - ] = Field(None) - - instance = Model() - instance = Model(array=np.random.random((5, 10))) - instance = Model(array=np.random.random((5, 10, 3))) - instance = Model(array=np.random.random((5, 10, 3, 4))) - - with pytest.raises(ValidationError): - instance = Model(array=np.random.random((5,))) - - with pytest.raises(ValidationError): - instance = Model(array=np.random.random((5, 10, 4))) - - with pytest.raises(ValidationError): - instance = Model(array=np.random.random((5, 10, 3, 6))) - - with pytest.raises(ValidationError): - instance = Model(array=np.random.random((5, 10, 4, 6))) - - -def test_ndarray_coercion(): - """ - Coerce lists to arrays - """ - - class Model(BaseModel): - array: NDArray[Shape["* x"], Number] - - amod = Model(array=[1, 2, 3, 4.5]) - assert np.allclose(amod.array, np.array([1, 2, 3, 4.5])) - with pytest.raises(ValidationError): - amod = Model(array=["a", "b", "c"]) - - -def test_ndarray_serialize(): - """ - Large arrays should get compressed with blosc, otherwise just to list - """ - - class Model(BaseModel): - large_array: NDArray[Any, Number] - small_array: NDArray[Any, Number] - - mod = Model(large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3))) - mod_str = mod.model_dump_json() - mod_json = json.loads(mod_str) - for a in ("array", "shape", "dtype", "unpack_fns"): - assert a in mod_json["large_array"] - assert isinstance(mod_json["large_array"]["array"], str) - assert isinstance(mod_json["small_array"], list) - - # but when we just dump to a dict we don't compress - mod_dict = mod.model_dump() - assert isinstance(mod_dict["large_array"], np.ndarray) - - -def test_ndarray_proxy(tmp_output_dir_func): - h5f_source = tmp_output_dir_func / "test.h5" - with h5py.File(h5f_source, "w") as h5f: - dset_good = h5f.create_dataset("/data", data=np.random.random((1024, 1024, 3))) - dset_bad = h5f.create_dataset("/data_bad", data=np.random.random((1024, 1024, 4))) - - class Model(BaseModel): - array: NDArray[Shape["* x, * y, 3 z"], Number] - - mod = Model(array=NDArrayProxy(h5f_file=h5f_source, path="/data")) - subarray = mod.array[0:5, 0:5, :] - assert isinstance(subarray, np.ndarray) - assert isinstance(subarray.sum(), float) - assert mod.array.name == "/data" - - with pytest.raises(NotImplementedError): - mod.array[0] = 5 - - with pytest.raises(ValidationError): - mod = Model(array=NDArrayProxy(h5f_file=h5f_source, path="/data_bad"))