mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-09 21:44:27 +00:00
Lint, fix olde array compression test
This commit is contained in:
parent
5b722bb6da
commit
3d1d029ab8
16 changed files with 109 additions and 33 deletions
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
@ -33,6 +33,8 @@ jobs:
|
|||
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: chartboost/ruff-action@v1
|
||||
- uses: psf/black@stable
|
|
@ -66,7 +66,7 @@ testpaths = [
|
|||
|
||||
[tool.ruff]
|
||||
target-version = "py311"
|
||||
include = ["numpydantic/**/*.py", "pyproject.toml"]
|
||||
include = ["src/numpydantic/**/*.py", "pyproject.toml"]
|
||||
exclude = ["tests"]
|
||||
|
||||
[tool.ruff.lint]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# ruff: noqa: E402
|
||||
# ruff: noqa: F401
|
||||
# ruff: noqa: I001
|
||||
# ruff: noqa: D104
|
||||
from numpydantic.monkeypatch import apply_patches
|
||||
|
||||
apply_patches()
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
"""
|
||||
Exceptions used within numpydantic
|
||||
"""
|
||||
|
||||
|
||||
class DtypeError(TypeError):
|
||||
"""Exception raised for invalid dtypes"""
|
||||
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
Interfaces between nptyping types and array backends
|
||||
"""
|
||||
|
||||
from numpydantic.interface.dask import DaskInterface
|
||||
from numpydantic.interface.hdf5 import H5Interface
|
||||
from numpydantic.interface.interface import Interface
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
"""
|
||||
Interface for Dask arrays
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpydantic.interface.interface import Interface
|
||||
|
||||
try:
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
import pdb
|
||||
"""
|
||||
Interfaces for HDF5 Datasets
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, NamedTuple, Tuple, Union, TypeAlias
|
||||
from typing import Any, NamedTuple, Tuple, TypeAlias, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
@ -28,15 +31,15 @@ class H5Proxy:
|
|||
"""
|
||||
Proxy class to mimic numpy-like array behavior with an HDF5 array
|
||||
|
||||
The attribute and item access methods only open the file for the duration of the method,
|
||||
making it less perilous to share this object between threads and processes.
|
||||
The attribute and item access methods only open the file for the duration of the
|
||||
method, making it less perilous to share this object between threads and processes.
|
||||
|
||||
This class attempts to be a passthrough class to a :class:`h5py.Dataset` object,
|
||||
including its attributes and item getters/setters.
|
||||
|
||||
When using read-only methods, no locking is attempted (beyond the HDF5 defaults),
|
||||
but when using the write methods (setting an array value), try and use the ``locking``
|
||||
methods of :class:`h5py.File` .
|
||||
but when using the write methods (setting an array value), try and use the
|
||||
``locking`` methods of :class:`h5py.File` .
|
||||
|
||||
Args:
|
||||
file (pathlib.Path | str): Location of hdf5 file on filesystem
|
||||
|
@ -74,7 +77,7 @@ class H5Proxy:
|
|||
obj = h5f.get(self.path)
|
||||
obj[key] = value
|
||||
|
||||
def open(self, mode: str = "r"):
|
||||
def open(self, mode: str = "r") -> "h5py.Dataset":
|
||||
"""
|
||||
Return the opened :class:`h5py.Dataset` object
|
||||
|
||||
|
@ -84,7 +87,7 @@ class H5Proxy:
|
|||
self._h5f = h5py.File(self.file, mode)
|
||||
return self._h5f.get(self.path)
|
||||
|
||||
def close(self):
|
||||
def close(self) -> None:
|
||||
"""
|
||||
Close the :class:`h5py.File` object left open when returning the dataset with
|
||||
:meth:`.open`
|
||||
|
@ -116,7 +119,10 @@ class H5Interface(Interface):
|
|||
|
||||
@classmethod
|
||||
def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool:
|
||||
"""Check that the given array is a :class:`.H5ArrayPath` or something that resembles one."""
|
||||
"""
|
||||
Check that the given array is a :class:`.H5ArrayPath` or something that
|
||||
resembles one.
|
||||
"""
|
||||
if isinstance(array, H5ArrayPath):
|
||||
return True
|
||||
|
||||
|
@ -152,7 +158,8 @@ class H5Interface(Interface):
|
|||
array = H5Proxy(file=array[0], path=array[1])
|
||||
else:
|
||||
raise ValueError(
|
||||
"Need to specify a file and a path within an HDF5 file to use the HDF5 Interface"
|
||||
"Need to specify a file and a path within an HDF5 file to use the HDF5 "
|
||||
"Interface"
|
||||
)
|
||||
|
||||
if not array.array_exists():
|
||||
|
@ -165,6 +172,14 @@ class H5Interface(Interface):
|
|||
|
||||
@classmethod
|
||||
def to_json(cls, array: H5Proxy) -> dict:
|
||||
"""
|
||||
Dump to a dictionary containing
|
||||
|
||||
* ``file``: :attr:`.file`
|
||||
* ``path``: :attr:`.path`
|
||||
* ``attrs``: Any HDF5 attributes on the dataset
|
||||
* ``array``: The array as a list of lists
|
||||
"""
|
||||
try:
|
||||
dset = array.open()
|
||||
meta = {
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
Base Interface metaclass
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from operator import attrgetter
|
||||
from typing import Any, Generic, Tuple, Type, TypeVar, Union
|
||||
|
@ -67,13 +71,15 @@ class Interface(ABC, Generic[T]):
|
|||
return array
|
||||
if not check_shape(array.shape, self.shape):
|
||||
raise ShapeError(
|
||||
f"Invalid shape! expected shape {self.shape.prepared_args}, got shape {array.shape}"
|
||||
f"Invalid shape! expected shape {self.shape.prepared_args}, "
|
||||
f"got shape {array.shape}"
|
||||
)
|
||||
return array
|
||||
|
||||
def after_validation(self, array: NDArrayType) -> T:
|
||||
"""
|
||||
Optional step post-validation that coerces the intermediate array type into the return type
|
||||
Optional step post-validation that coerces the intermediate array type into the
|
||||
return type
|
||||
|
||||
Default method is a no-op
|
||||
"""
|
||||
|
@ -90,13 +96,15 @@ class Interface(ABC, Generic[T]):
|
|||
@abstractmethod
|
||||
def enabled(cls) -> bool:
|
||||
"""
|
||||
Check whether this array interface can be used (eg. its dependent packages are installed, etc.)
|
||||
Check whether this array interface can be used (eg. its dependent packages are
|
||||
installed, etc.)
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def to_json(cls, array: Type[T]) -> Union[list, dict]:
|
||||
"""
|
||||
Convert an array of :attr:`.return_type` to a JSON-compatible format using base python types
|
||||
Convert an array of :attr:`.return_type` to a JSON-compatible format using
|
||||
base python types
|
||||
"""
|
||||
if not isinstance(array, np.ndarray):
|
||||
array = np.array(array)
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
Interface to numpy arrays
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from numpydantic.interface.interface import Interface
|
||||
|
@ -22,7 +26,10 @@ class NumpyInterface(Interface):
|
|||
|
||||
@classmethod
|
||||
def check(cls, array: Any) -> bool:
|
||||
"""Check that this is in fact a numpy ndarray or something that can be coerced to one"""
|
||||
"""
|
||||
Check that this is in fact a numpy ndarray or something that can be
|
||||
coerced to one
|
||||
"""
|
||||
if isinstance(array, ndarray):
|
||||
return True
|
||||
else:
|
||||
|
@ -34,7 +41,8 @@ class NumpyInterface(Interface):
|
|||
|
||||
def before_validation(self, array: Any) -> ndarray:
|
||||
"""
|
||||
Coerce to an ndarray. We have already checked if coercion is possible in :meth:`.check`
|
||||
Coerce to an ndarray. We have already checked if coercion is possible
|
||||
in :meth:`.check`
|
||||
"""
|
||||
if not isinstance(array, ndarray):
|
||||
array = ndarray(array)
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
"""
|
||||
Interface to xarray
|
||||
|
||||
(Not implemented)
|
||||
"""
|
|
@ -0,0 +1,5 @@
|
|||
"""
|
||||
Interface to zarr arrays
|
||||
|
||||
(Not Implemented)
|
||||
"""
|
|
@ -1,8 +1,12 @@
|
|||
"""
|
||||
Maps from one value system to another
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from nptyping import Float, Int, String, Bool
|
||||
from nptyping import Bool, Float, Int, String
|
||||
|
||||
np_to_python = {
|
||||
Any: Any,
|
||||
|
@ -43,6 +47,7 @@ np_to_python = {
|
|||
},
|
||||
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
|
||||
}
|
||||
"""Map from python types to numpy"""
|
||||
|
||||
|
||||
flat_to_nptyping = {
|
||||
|
@ -75,5 +80,7 @@ flat_to_nptyping = {
|
|||
"AnyType": "Any",
|
||||
"object": "Object",
|
||||
}
|
||||
"""Map from NWB-style flat dtypes to nptyping types"""
|
||||
|
||||
python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool}
|
||||
"""Map from python types to nptyping types"""
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
"""
|
||||
Functions to monkeypatch dependent packages - most notably nptyping
|
||||
"""
|
||||
|
||||
# ruff: noqa: ANN001
|
||||
|
||||
|
||||
def patch_npytyping_perf() -> None:
|
||||
"""
|
||||
npytyping makes an expensive call to inspect.stack()
|
||||
|
@ -14,6 +21,7 @@ def patch_npytyping_perf() -> None:
|
|||
from nptyping.pandas_ import dataframe
|
||||
|
||||
# make a new __module__ methods for the affected classes
|
||||
|
||||
def new_module_ndarray(cls) -> str:
|
||||
return cls._get_module(inspect.currentframe(), "nptyping.ndarray")
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ Extension of nptyping NDArray for pydantic that allows for JSON-Schema serializa
|
|||
"""
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Any, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Any, Tuple, Union
|
||||
|
||||
import nptyping.structure
|
||||
import numpy as np
|
||||
|
@ -21,6 +21,9 @@ from numpydantic.maps import np_to_python
|
|||
# from numpydantic.proxy import NDArrayProxy
|
||||
from numpydantic.types import DtypeType, NDArrayType, ShapeType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pydantic import ValidationInfo
|
||||
|
||||
COMPRESSION_THRESHOLD = 16 * 1024
|
||||
"""
|
||||
Arrays larger than this size (in bytes) will be compressed and b64 encoded when
|
||||
|
@ -62,10 +65,11 @@ def list_of_lists_schema(shape: Shape, array_type_handler: dict) -> ListSchema:
|
|||
|
||||
def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable:
|
||||
"""
|
||||
Validate using a matching :class:`.Interface` class using its :meth:`.Interface.validate` method
|
||||
Validate using a matching :class:`.Interface` class using its
|
||||
:meth:`.Interface.validate` method
|
||||
"""
|
||||
|
||||
def validate_interface(value: Any, info) -> NDArrayType:
|
||||
def validate_interface(value: Any, info: "ValidationInfo") -> NDArrayType:
|
||||
interface_cls = Interface.match(value)
|
||||
interface = interface_cls(shape, dtype)
|
||||
value = interface.validate(value)
|
||||
|
@ -99,7 +103,8 @@ class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
|
|||
|
||||
class NDArray(NPTypingType, metaclass=NDArrayMeta):
|
||||
"""
|
||||
Constrained array type allowing npytyping syntax for dtype and shape validation and serialization.
|
||||
Constrained array type allowing npytyping syntax for dtype and shape validation
|
||||
and serialization.
|
||||
|
||||
Integrates with pydantic such that
|
||||
- JSON schema for list of list encoding
|
||||
|
|
|
@ -4,6 +4,8 @@ Types for numpydantic
|
|||
Note that these are types as in python typing types, not classes.
|
||||
"""
|
||||
|
||||
# ruff: noqa: D102
|
||||
|
||||
from typing import Any, Protocol, Tuple, runtime_checkable
|
||||
|
||||
import numpy as np
|
||||
|
@ -15,6 +17,7 @@ DtypeType = np.dtype | str | type | Any | DType
|
|||
|
||||
@runtime_checkable
|
||||
class NDArrayType(Protocol):
|
||||
"""A protocol for describing types that should be considered ndarrays"""
|
||||
|
||||
@property
|
||||
def dtype(self) -> DtypeType: ...
|
||||
|
|
|
@ -83,24 +83,18 @@ def test_ndarray_coercion():
|
|||
|
||||
def test_ndarray_serialize():
|
||||
"""
|
||||
Large arrays should get compressed with blosc, otherwise just to list
|
||||
Arrays should be dumped to a list when using json, but kept as ndarray otherwise
|
||||
"""
|
||||
|
||||
class Model(BaseModel):
|
||||
large_array: NDArray[Any, Number]
|
||||
small_array: NDArray[Any, Number]
|
||||
array: NDArray[Any, Number]
|
||||
|
||||
mod = Model(
|
||||
large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3))
|
||||
)
|
||||
mod = Model(array=np.random.random((3, 3)))
|
||||
mod_str = mod.model_dump_json()
|
||||
mod_json = json.loads(mod_str)
|
||||
for a in ("array", "shape", "dtype", "unpack_fns"):
|
||||
assert a in mod_json["large_array"].keys()
|
||||
assert isinstance(mod_json["large_array"]["array"], str)
|
||||
assert isinstance(mod_json["small_array"], list)
|
||||
assert isinstance(mod_json["array"], list)
|
||||
|
||||
# but when we just dump to a dict we don't compress
|
||||
# but when we just dump to a dict we don't coerce
|
||||
mod_dict = mod.model_dump()
|
||||
assert isinstance(mod_dict["large_array"], np.ndarray)
|
||||
|
||||
|
|
Loading…
Reference in a new issue