mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-09 21:44:27 +00:00
Lint, fix olde array compression test
This commit is contained in:
parent
5b722bb6da
commit
3d1d029ab8
16 changed files with 109 additions and 33 deletions
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
@ -33,6 +33,8 @@ jobs:
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
continue-on-error: true
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
- uses: chartboost/ruff-action@v1
|
||||||
- uses: psf/black@stable
|
- uses: psf/black@stable
|
|
@ -66,7 +66,7 @@ testpaths = [
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
target-version = "py311"
|
target-version = "py311"
|
||||||
include = ["numpydantic/**/*.py", "pyproject.toml"]
|
include = ["src/numpydantic/**/*.py", "pyproject.toml"]
|
||||||
exclude = ["tests"]
|
exclude = ["tests"]
|
||||||
|
|
||||||
[tool.ruff.lint]
|
[tool.ruff.lint]
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# ruff: noqa: E402
|
# ruff: noqa: E402
|
||||||
# ruff: noqa: F401
|
# ruff: noqa: F401
|
||||||
# ruff: noqa: I001
|
# ruff: noqa: I001
|
||||||
|
# ruff: noqa: D104
|
||||||
from numpydantic.monkeypatch import apply_patches
|
from numpydantic.monkeypatch import apply_patches
|
||||||
|
|
||||||
apply_patches()
|
apply_patches()
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
"""
|
||||||
|
Exceptions used within numpydantic
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class DtypeError(TypeError):
|
class DtypeError(TypeError):
|
||||||
"""Exception raised for invalid dtypes"""
|
"""Exception raised for invalid dtypes"""
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
"""
|
||||||
|
Interfaces between nptyping types and array backends
|
||||||
|
"""
|
||||||
|
|
||||||
from numpydantic.interface.dask import DaskInterface
|
from numpydantic.interface.dask import DaskInterface
|
||||||
from numpydantic.interface.hdf5 import H5Interface
|
from numpydantic.interface.hdf5 import H5Interface
|
||||||
from numpydantic.interface.interface import Interface
|
from numpydantic.interface.interface import Interface
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
|
"""
|
||||||
|
Interface for Dask arrays
|
||||||
|
"""
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from numpydantic.interface.interface import Interface
|
from numpydantic.interface.interface import Interface
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
import pdb
|
"""
|
||||||
|
Interfaces for HDF5 Datasets
|
||||||
|
"""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, NamedTuple, Tuple, Union, TypeAlias
|
from typing import Any, NamedTuple, Tuple, TypeAlias, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
@ -28,15 +31,15 @@ class H5Proxy:
|
||||||
"""
|
"""
|
||||||
Proxy class to mimic numpy-like array behavior with an HDF5 array
|
Proxy class to mimic numpy-like array behavior with an HDF5 array
|
||||||
|
|
||||||
The attribute and item access methods only open the file for the duration of the method,
|
The attribute and item access methods only open the file for the duration of the
|
||||||
making it less perilous to share this object between threads and processes.
|
method, making it less perilous to share this object between threads and processes.
|
||||||
|
|
||||||
This class attempts to be a passthrough class to a :class:`h5py.Dataset` object,
|
This class attempts to be a passthrough class to a :class:`h5py.Dataset` object,
|
||||||
including its attributes and item getters/setters.
|
including its attributes and item getters/setters.
|
||||||
|
|
||||||
When using read-only methods, no locking is attempted (beyond the HDF5 defaults),
|
When using read-only methods, no locking is attempted (beyond the HDF5 defaults),
|
||||||
but when using the write methods (setting an array value), try and use the ``locking``
|
but when using the write methods (setting an array value), try and use the
|
||||||
methods of :class:`h5py.File` .
|
``locking`` methods of :class:`h5py.File` .
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file (pathlib.Path | str): Location of hdf5 file on filesystem
|
file (pathlib.Path | str): Location of hdf5 file on filesystem
|
||||||
|
@ -74,7 +77,7 @@ class H5Proxy:
|
||||||
obj = h5f.get(self.path)
|
obj = h5f.get(self.path)
|
||||||
obj[key] = value
|
obj[key] = value
|
||||||
|
|
||||||
def open(self, mode: str = "r"):
|
def open(self, mode: str = "r") -> "h5py.Dataset":
|
||||||
"""
|
"""
|
||||||
Return the opened :class:`h5py.Dataset` object
|
Return the opened :class:`h5py.Dataset` object
|
||||||
|
|
||||||
|
@ -84,7 +87,7 @@ class H5Proxy:
|
||||||
self._h5f = h5py.File(self.file, mode)
|
self._h5f = h5py.File(self.file, mode)
|
||||||
return self._h5f.get(self.path)
|
return self._h5f.get(self.path)
|
||||||
|
|
||||||
def close(self):
|
def close(self) -> None:
|
||||||
"""
|
"""
|
||||||
Close the :class:`h5py.File` object left open when returning the dataset with
|
Close the :class:`h5py.File` object left open when returning the dataset with
|
||||||
:meth:`.open`
|
:meth:`.open`
|
||||||
|
@ -116,7 +119,10 @@ class H5Interface(Interface):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool:
|
def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool:
|
||||||
"""Check that the given array is a :class:`.H5ArrayPath` or something that resembles one."""
|
"""
|
||||||
|
Check that the given array is a :class:`.H5ArrayPath` or something that
|
||||||
|
resembles one.
|
||||||
|
"""
|
||||||
if isinstance(array, H5ArrayPath):
|
if isinstance(array, H5ArrayPath):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -152,7 +158,8 @@ class H5Interface(Interface):
|
||||||
array = H5Proxy(file=array[0], path=array[1])
|
array = H5Proxy(file=array[0], path=array[1])
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Need to specify a file and a path within an HDF5 file to use the HDF5 Interface"
|
"Need to specify a file and a path within an HDF5 file to use the HDF5 "
|
||||||
|
"Interface"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not array.array_exists():
|
if not array.array_exists():
|
||||||
|
@ -165,6 +172,14 @@ class H5Interface(Interface):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def to_json(cls, array: H5Proxy) -> dict:
|
def to_json(cls, array: H5Proxy) -> dict:
|
||||||
|
"""
|
||||||
|
Dump to a dictionary containing
|
||||||
|
|
||||||
|
* ``file``: :attr:`.file`
|
||||||
|
* ``path``: :attr:`.path`
|
||||||
|
* ``attrs``: Any HDF5 attributes on the dataset
|
||||||
|
* ``array``: The array as a list of lists
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
dset = array.open()
|
dset = array.open()
|
||||||
meta = {
|
meta = {
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
"""
|
||||||
|
Base Interface metaclass
|
||||||
|
"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
from typing import Any, Generic, Tuple, Type, TypeVar, Union
|
from typing import Any, Generic, Tuple, Type, TypeVar, Union
|
||||||
|
@ -67,13 +71,15 @@ class Interface(ABC, Generic[T]):
|
||||||
return array
|
return array
|
||||||
if not check_shape(array.shape, self.shape):
|
if not check_shape(array.shape, self.shape):
|
||||||
raise ShapeError(
|
raise ShapeError(
|
||||||
f"Invalid shape! expected shape {self.shape.prepared_args}, got shape {array.shape}"
|
f"Invalid shape! expected shape {self.shape.prepared_args}, "
|
||||||
|
f"got shape {array.shape}"
|
||||||
)
|
)
|
||||||
return array
|
return array
|
||||||
|
|
||||||
def after_validation(self, array: NDArrayType) -> T:
|
def after_validation(self, array: NDArrayType) -> T:
|
||||||
"""
|
"""
|
||||||
Optional step post-validation that coerces the intermediate array type into the return type
|
Optional step post-validation that coerces the intermediate array type into the
|
||||||
|
return type
|
||||||
|
|
||||||
Default method is a no-op
|
Default method is a no-op
|
||||||
"""
|
"""
|
||||||
|
@ -90,13 +96,15 @@ class Interface(ABC, Generic[T]):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def enabled(cls) -> bool:
|
def enabled(cls) -> bool:
|
||||||
"""
|
"""
|
||||||
Check whether this array interface can be used (eg. its dependent packages are installed, etc.)
|
Check whether this array interface can be used (eg. its dependent packages are
|
||||||
|
installed, etc.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def to_json(cls, array: Type[T]) -> Union[list, dict]:
|
def to_json(cls, array: Type[T]) -> Union[list, dict]:
|
||||||
"""
|
"""
|
||||||
Convert an array of :attr:`.return_type` to a JSON-compatible format using base python types
|
Convert an array of :attr:`.return_type` to a JSON-compatible format using
|
||||||
|
base python types
|
||||||
"""
|
"""
|
||||||
if not isinstance(array, np.ndarray):
|
if not isinstance(array, np.ndarray):
|
||||||
array = np.array(array)
|
array = np.array(array)
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
"""
|
||||||
|
Interface to numpy arrays
|
||||||
|
"""
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from numpydantic.interface.interface import Interface
|
from numpydantic.interface.interface import Interface
|
||||||
|
@ -22,7 +26,10 @@ class NumpyInterface(Interface):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, array: Any) -> bool:
|
def check(cls, array: Any) -> bool:
|
||||||
"""Check that this is in fact a numpy ndarray or something that can be coerced to one"""
|
"""
|
||||||
|
Check that this is in fact a numpy ndarray or something that can be
|
||||||
|
coerced to one
|
||||||
|
"""
|
||||||
if isinstance(array, ndarray):
|
if isinstance(array, ndarray):
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
@ -34,7 +41,8 @@ class NumpyInterface(Interface):
|
||||||
|
|
||||||
def before_validation(self, array: Any) -> ndarray:
|
def before_validation(self, array: Any) -> ndarray:
|
||||||
"""
|
"""
|
||||||
Coerce to an ndarray. We have already checked if coercion is possible in :meth:`.check`
|
Coerce to an ndarray. We have already checked if coercion is possible
|
||||||
|
in :meth:`.check`
|
||||||
"""
|
"""
|
||||||
if not isinstance(array, ndarray):
|
if not isinstance(array, ndarray):
|
||||||
array = ndarray(array)
|
array = ndarray(array)
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
"""
|
||||||
|
Interface to xarray
|
||||||
|
|
||||||
|
(Not implemented)
|
||||||
|
"""
|
|
@ -0,0 +1,5 @@
|
||||||
|
"""
|
||||||
|
Interface to zarr arrays
|
||||||
|
|
||||||
|
(Not Implemented)
|
||||||
|
"""
|
|
@ -1,8 +1,12 @@
|
||||||
|
"""
|
||||||
|
Maps from one value system to another
|
||||||
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from nptyping import Float, Int, String, Bool
|
from nptyping import Bool, Float, Int, String
|
||||||
|
|
||||||
np_to_python = {
|
np_to_python = {
|
||||||
Any: Any,
|
Any: Any,
|
||||||
|
@ -43,6 +47,7 @@ np_to_python = {
|
||||||
},
|
},
|
||||||
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
|
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
|
||||||
}
|
}
|
||||||
|
"""Map from python types to numpy"""
|
||||||
|
|
||||||
|
|
||||||
flat_to_nptyping = {
|
flat_to_nptyping = {
|
||||||
|
@ -75,5 +80,7 @@ flat_to_nptyping = {
|
||||||
"AnyType": "Any",
|
"AnyType": "Any",
|
||||||
"object": "Object",
|
"object": "Object",
|
||||||
}
|
}
|
||||||
|
"""Map from NWB-style flat dtypes to nptyping types"""
|
||||||
|
|
||||||
python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool}
|
python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool}
|
||||||
|
"""Map from python types to nptyping types"""
|
||||||
|
|
|
@ -1,3 +1,10 @@
|
||||||
|
"""
|
||||||
|
Functions to monkeypatch dependent packages - most notably nptyping
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ruff: noqa: ANN001
|
||||||
|
|
||||||
|
|
||||||
def patch_npytyping_perf() -> None:
|
def patch_npytyping_perf() -> None:
|
||||||
"""
|
"""
|
||||||
npytyping makes an expensive call to inspect.stack()
|
npytyping makes an expensive call to inspect.stack()
|
||||||
|
@ -14,6 +21,7 @@ def patch_npytyping_perf() -> None:
|
||||||
from nptyping.pandas_ import dataframe
|
from nptyping.pandas_ import dataframe
|
||||||
|
|
||||||
# make a new __module__ methods for the affected classes
|
# make a new __module__ methods for the affected classes
|
||||||
|
|
||||||
def new_module_ndarray(cls) -> str:
|
def new_module_ndarray(cls) -> str:
|
||||||
return cls._get_module(inspect.currentframe(), "nptyping.ndarray")
|
return cls._get_module(inspect.currentframe(), "nptyping.ndarray")
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ Extension of nptyping NDArray for pydantic that allows for JSON-Schema serializa
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from typing import Any, Tuple, Union
|
from typing import TYPE_CHECKING, Any, Tuple, Union
|
||||||
|
|
||||||
import nptyping.structure
|
import nptyping.structure
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -21,6 +21,9 @@ from numpydantic.maps import np_to_python
|
||||||
# from numpydantic.proxy import NDArrayProxy
|
# from numpydantic.proxy import NDArrayProxy
|
||||||
from numpydantic.types import DtypeType, NDArrayType, ShapeType
|
from numpydantic.types import DtypeType, NDArrayType, ShapeType
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pydantic import ValidationInfo
|
||||||
|
|
||||||
COMPRESSION_THRESHOLD = 16 * 1024
|
COMPRESSION_THRESHOLD = 16 * 1024
|
||||||
"""
|
"""
|
||||||
Arrays larger than this size (in bytes) will be compressed and b64 encoded when
|
Arrays larger than this size (in bytes) will be compressed and b64 encoded when
|
||||||
|
@ -62,10 +65,11 @@ def list_of_lists_schema(shape: Shape, array_type_handler: dict) -> ListSchema:
|
||||||
|
|
||||||
def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable:
|
def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable:
|
||||||
"""
|
"""
|
||||||
Validate using a matching :class:`.Interface` class using its :meth:`.Interface.validate` method
|
Validate using a matching :class:`.Interface` class using its
|
||||||
|
:meth:`.Interface.validate` method
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def validate_interface(value: Any, info) -> NDArrayType:
|
def validate_interface(value: Any, info: "ValidationInfo") -> NDArrayType:
|
||||||
interface_cls = Interface.match(value)
|
interface_cls = Interface.match(value)
|
||||||
interface = interface_cls(shape, dtype)
|
interface = interface_cls(shape, dtype)
|
||||||
value = interface.validate(value)
|
value = interface.validate(value)
|
||||||
|
@ -99,7 +103,8 @@ class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
|
||||||
|
|
||||||
class NDArray(NPTypingType, metaclass=NDArrayMeta):
|
class NDArray(NPTypingType, metaclass=NDArrayMeta):
|
||||||
"""
|
"""
|
||||||
Constrained array type allowing npytyping syntax for dtype and shape validation and serialization.
|
Constrained array type allowing npytyping syntax for dtype and shape validation
|
||||||
|
and serialization.
|
||||||
|
|
||||||
Integrates with pydantic such that
|
Integrates with pydantic such that
|
||||||
- JSON schema for list of list encoding
|
- JSON schema for list of list encoding
|
||||||
|
|
|
@ -4,6 +4,8 @@ Types for numpydantic
|
||||||
Note that these are types as in python typing types, not classes.
|
Note that these are types as in python typing types, not classes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# ruff: noqa: D102
|
||||||
|
|
||||||
from typing import Any, Protocol, Tuple, runtime_checkable
|
from typing import Any, Protocol, Tuple, runtime_checkable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -15,6 +17,7 @@ DtypeType = np.dtype | str | type | Any | DType
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class NDArrayType(Protocol):
|
class NDArrayType(Protocol):
|
||||||
|
"""A protocol for describing types that should be considered ndarrays"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dtype(self) -> DtypeType: ...
|
def dtype(self) -> DtypeType: ...
|
||||||
|
|
|
@ -83,24 +83,18 @@ def test_ndarray_coercion():
|
||||||
|
|
||||||
def test_ndarray_serialize():
|
def test_ndarray_serialize():
|
||||||
"""
|
"""
|
||||||
Large arrays should get compressed with blosc, otherwise just to list
|
Arrays should be dumped to a list when using json, but kept as ndarray otherwise
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Model(BaseModel):
|
class Model(BaseModel):
|
||||||
large_array: NDArray[Any, Number]
|
array: NDArray[Any, Number]
|
||||||
small_array: NDArray[Any, Number]
|
|
||||||
|
|
||||||
mod = Model(
|
mod = Model(array=np.random.random((3, 3)))
|
||||||
large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3))
|
|
||||||
)
|
|
||||||
mod_str = mod.model_dump_json()
|
mod_str = mod.model_dump_json()
|
||||||
mod_json = json.loads(mod_str)
|
mod_json = json.loads(mod_str)
|
||||||
for a in ("array", "shape", "dtype", "unpack_fns"):
|
assert isinstance(mod_json["array"], list)
|
||||||
assert a in mod_json["large_array"].keys()
|
|
||||||
assert isinstance(mod_json["large_array"]["array"], str)
|
|
||||||
assert isinstance(mod_json["small_array"], list)
|
|
||||||
|
|
||||||
# but when we just dump to a dict we don't compress
|
# but when we just dump to a dict we don't coerce
|
||||||
mod_dict = mod.model_dump()
|
mod_dict = mod.model_dump()
|
||||||
assert isinstance(mod_dict["large_array"], np.ndarray)
|
assert isinstance(mod_dict["large_array"], np.ndarray)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue