Lint, fix olde array compression test

This commit is contained in:
sneakers-the-rat 2024-04-22 20:00:43 -07:00
parent 5b722bb6da
commit 3d1d029ab8
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
16 changed files with 109 additions and 33 deletions

View file

@ -33,6 +33,8 @@ jobs:
lint: lint:
runs-on: ubuntu-latest runs-on: ubuntu-latest
continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: chartboost/ruff-action@v1
- uses: psf/black@stable - uses: psf/black@stable

View file

@ -66,7 +66,7 @@ testpaths = [
[tool.ruff] [tool.ruff]
target-version = "py311" target-version = "py311"
include = ["numpydantic/**/*.py", "pyproject.toml"] include = ["src/numpydantic/**/*.py", "pyproject.toml"]
exclude = ["tests"] exclude = ["tests"]
[tool.ruff.lint] [tool.ruff.lint]

View file

@ -1,6 +1,7 @@
# ruff: noqa: E402 # ruff: noqa: E402
# ruff: noqa: F401 # ruff: noqa: F401
# ruff: noqa: I001 # ruff: noqa: I001
# ruff: noqa: D104
from numpydantic.monkeypatch import apply_patches from numpydantic.monkeypatch import apply_patches
apply_patches() apply_patches()

View file

@ -1,3 +1,8 @@
"""
Exceptions used within numpydantic
"""
class DtypeError(TypeError): class DtypeError(TypeError):
"""Exception raised for invalid dtypes""" """Exception raised for invalid dtypes"""

View file

@ -1,3 +1,7 @@
"""
Interfaces between nptyping types and array backends
"""
from numpydantic.interface.dask import DaskInterface from numpydantic.interface.dask import DaskInterface
from numpydantic.interface.hdf5 import H5Interface from numpydantic.interface.hdf5 import H5Interface
from numpydantic.interface.interface import Interface from numpydantic.interface.interface import Interface

View file

@ -1,5 +1,11 @@
"""
Interface for Dask arrays
"""
from typing import Any from typing import Any
import numpy as np import numpy as np
from numpydantic.interface.interface import Interface from numpydantic.interface.interface import Interface
try: try:

View file

@ -1,6 +1,9 @@
import pdb """
Interfaces for HDF5 Datasets
"""
from pathlib import Path from pathlib import Path
from typing import Any, NamedTuple, Tuple, Union, TypeAlias from typing import Any, NamedTuple, Tuple, TypeAlias, Union
import numpy as np import numpy as np
@ -28,15 +31,15 @@ class H5Proxy:
""" """
Proxy class to mimic numpy-like array behavior with an HDF5 array Proxy class to mimic numpy-like array behavior with an HDF5 array
The attribute and item access methods only open the file for the duration of the method, The attribute and item access methods only open the file for the duration of the
making it less perilous to share this object between threads and processes. method, making it less perilous to share this object between threads and processes.
This class attempts to be a passthrough class to a :class:`h5py.Dataset` object, This class attempts to be a passthrough class to a :class:`h5py.Dataset` object,
including its attributes and item getters/setters. including its attributes and item getters/setters.
When using read-only methods, no locking is attempted (beyond the HDF5 defaults), When using read-only methods, no locking is attempted (beyond the HDF5 defaults),
but when using the write methods (setting an array value), try and use the ``locking`` but when using the write methods (setting an array value), try and use the
methods of :class:`h5py.File` . ``locking`` methods of :class:`h5py.File` .
Args: Args:
file (pathlib.Path | str): Location of hdf5 file on filesystem file (pathlib.Path | str): Location of hdf5 file on filesystem
@ -74,7 +77,7 @@ class H5Proxy:
obj = h5f.get(self.path) obj = h5f.get(self.path)
obj[key] = value obj[key] = value
def open(self, mode: str = "r"): def open(self, mode: str = "r") -> "h5py.Dataset":
""" """
Return the opened :class:`h5py.Dataset` object Return the opened :class:`h5py.Dataset` object
@ -84,7 +87,7 @@ class H5Proxy:
self._h5f = h5py.File(self.file, mode) self._h5f = h5py.File(self.file, mode)
return self._h5f.get(self.path) return self._h5f.get(self.path)
def close(self): def close(self) -> None:
""" """
Close the :class:`h5py.File` object left open when returning the dataset with Close the :class:`h5py.File` object left open when returning the dataset with
:meth:`.open` :meth:`.open`
@ -116,7 +119,10 @@ class H5Interface(Interface):
@classmethod @classmethod
def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool: def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool:
"""Check that the given array is a :class:`.H5ArrayPath` or something that resembles one.""" """
Check that the given array is a :class:`.H5ArrayPath` or something that
resembles one.
"""
if isinstance(array, H5ArrayPath): if isinstance(array, H5ArrayPath):
return True return True
@ -152,7 +158,8 @@ class H5Interface(Interface):
array = H5Proxy(file=array[0], path=array[1]) array = H5Proxy(file=array[0], path=array[1])
else: else:
raise ValueError( raise ValueError(
"Need to specify a file and a path within an HDF5 file to use the HDF5 Interface" "Need to specify a file and a path within an HDF5 file to use the HDF5 "
"Interface"
) )
if not array.array_exists(): if not array.array_exists():
@ -165,6 +172,14 @@ class H5Interface(Interface):
@classmethod @classmethod
def to_json(cls, array: H5Proxy) -> dict: def to_json(cls, array: H5Proxy) -> dict:
"""
Dump to a dictionary containing
* ``file``: :attr:`.file`
* ``path``: :attr:`.path`
* ``attrs``: Any HDF5 attributes on the dataset
* ``array``: The array as a list of lists
"""
try: try:
dset = array.open() dset = array.open()
meta = { meta = {

View file

@ -1,3 +1,7 @@
"""
Base Interface metaclass
"""
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from operator import attrgetter from operator import attrgetter
from typing import Any, Generic, Tuple, Type, TypeVar, Union from typing import Any, Generic, Tuple, Type, TypeVar, Union
@ -67,13 +71,15 @@ class Interface(ABC, Generic[T]):
return array return array
if not check_shape(array.shape, self.shape): if not check_shape(array.shape, self.shape):
raise ShapeError( raise ShapeError(
f"Invalid shape! expected shape {self.shape.prepared_args}, got shape {array.shape}" f"Invalid shape! expected shape {self.shape.prepared_args}, "
f"got shape {array.shape}"
) )
return array return array
def after_validation(self, array: NDArrayType) -> T: def after_validation(self, array: NDArrayType) -> T:
""" """
Optional step post-validation that coerces the intermediate array type into the return type Optional step post-validation that coerces the intermediate array type into the
return type
Default method is a no-op Default method is a no-op
""" """
@ -90,13 +96,15 @@ class Interface(ABC, Generic[T]):
@abstractmethod @abstractmethod
def enabled(cls) -> bool: def enabled(cls) -> bool:
""" """
Check whether this array interface can be used (eg. its dependent packages are installed, etc.) Check whether this array interface can be used (eg. its dependent packages are
installed, etc.)
""" """
@classmethod @classmethod
def to_json(cls, array: Type[T]) -> Union[list, dict]: def to_json(cls, array: Type[T]) -> Union[list, dict]:
""" """
Convert an array of :attr:`.return_type` to a JSON-compatible format using base python types Convert an array of :attr:`.return_type` to a JSON-compatible format using
base python types
""" """
if not isinstance(array, np.ndarray): if not isinstance(array, np.ndarray):
array = np.array(array) array = np.array(array)

View file

@ -1,3 +1,7 @@
"""
Interface to numpy arrays
"""
from typing import Any from typing import Any
from numpydantic.interface.interface import Interface from numpydantic.interface.interface import Interface
@ -22,7 +26,10 @@ class NumpyInterface(Interface):
@classmethod @classmethod
def check(cls, array: Any) -> bool: def check(cls, array: Any) -> bool:
"""Check that this is in fact a numpy ndarray or something that can be coerced to one""" """
Check that this is in fact a numpy ndarray or something that can be
coerced to one
"""
if isinstance(array, ndarray): if isinstance(array, ndarray):
return True return True
else: else:
@ -34,7 +41,8 @@ class NumpyInterface(Interface):
def before_validation(self, array: Any) -> ndarray: def before_validation(self, array: Any) -> ndarray:
""" """
Coerce to an ndarray. We have already checked if coercion is possible in :meth:`.check` Coerce to an ndarray. We have already checked if coercion is possible
in :meth:`.check`
""" """
if not isinstance(array, ndarray): if not isinstance(array, ndarray):
array = ndarray(array) array = ndarray(array)

View file

@ -0,0 +1,5 @@
"""
Interface to xarray
(Not implemented)
"""

View file

@ -0,0 +1,5 @@
"""
Interface to zarr arrays
(Not Implemented)
"""

View file

@ -1,8 +1,12 @@
"""
Maps from one value system to another
"""
from datetime import datetime from datetime import datetime
from typing import Any from typing import Any
import numpy as np import numpy as np
from nptyping import Float, Int, String, Bool from nptyping import Bool, Float, Int, String
np_to_python = { np_to_python = {
Any: Any, Any: Any,
@ -43,6 +47,7 @@ np_to_python = {
}, },
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)}, **{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
} }
"""Map from python types to numpy"""
flat_to_nptyping = { flat_to_nptyping = {
@ -75,5 +80,7 @@ flat_to_nptyping = {
"AnyType": "Any", "AnyType": "Any",
"object": "Object", "object": "Object",
} }
"""Map from NWB-style flat dtypes to nptyping types"""
python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool} python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool}
"""Map from python types to nptyping types"""

View file

@ -1,3 +1,10 @@
"""
Functions to monkeypatch dependent packages - most notably nptyping
"""
# ruff: noqa: ANN001
def patch_npytyping_perf() -> None: def patch_npytyping_perf() -> None:
""" """
npytyping makes an expensive call to inspect.stack() npytyping makes an expensive call to inspect.stack()
@ -14,6 +21,7 @@ def patch_npytyping_perf() -> None:
from nptyping.pandas_ import dataframe from nptyping.pandas_ import dataframe
# make a new __module__ methods for the affected classes # make a new __module__ methods for the affected classes
def new_module_ndarray(cls) -> str: def new_module_ndarray(cls) -> str:
return cls._get_module(inspect.currentframe(), "nptyping.ndarray") return cls._get_module(inspect.currentframe(), "nptyping.ndarray")

View file

@ -5,7 +5,7 @@ Extension of nptyping NDArray for pydantic that allows for JSON-Schema serializa
""" """
from collections.abc import Callable from collections.abc import Callable
from typing import Any, Tuple, Union from typing import TYPE_CHECKING, Any, Tuple, Union
import nptyping.structure import nptyping.structure
import numpy as np import numpy as np
@ -21,6 +21,9 @@ from numpydantic.maps import np_to_python
# from numpydantic.proxy import NDArrayProxy # from numpydantic.proxy import NDArrayProxy
from numpydantic.types import DtypeType, NDArrayType, ShapeType from numpydantic.types import DtypeType, NDArrayType, ShapeType
if TYPE_CHECKING:
from pydantic import ValidationInfo
COMPRESSION_THRESHOLD = 16 * 1024 COMPRESSION_THRESHOLD = 16 * 1024
""" """
Arrays larger than this size (in bytes) will be compressed and b64 encoded when Arrays larger than this size (in bytes) will be compressed and b64 encoded when
@ -62,10 +65,11 @@ def list_of_lists_schema(shape: Shape, array_type_handler: dict) -> ListSchema:
def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable: def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable:
""" """
Validate using a matching :class:`.Interface` class using its :meth:`.Interface.validate` method Validate using a matching :class:`.Interface` class using its
:meth:`.Interface.validate` method
""" """
def validate_interface(value: Any, info) -> NDArrayType: def validate_interface(value: Any, info: "ValidationInfo") -> NDArrayType:
interface_cls = Interface.match(value) interface_cls = Interface.match(value)
interface = interface_cls(shape, dtype) interface = interface_cls(shape, dtype)
value = interface.validate(value) value = interface.validate(value)
@ -99,7 +103,8 @@ class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
class NDArray(NPTypingType, metaclass=NDArrayMeta): class NDArray(NPTypingType, metaclass=NDArrayMeta):
""" """
Constrained array type allowing npytyping syntax for dtype and shape validation and serialization. Constrained array type allowing npytyping syntax for dtype and shape validation
and serialization.
Integrates with pydantic such that Integrates with pydantic such that
- JSON schema for list of list encoding - JSON schema for list of list encoding

View file

@ -4,6 +4,8 @@ Types for numpydantic
Note that these are types as in python typing types, not classes. Note that these are types as in python typing types, not classes.
""" """
# ruff: noqa: D102
from typing import Any, Protocol, Tuple, runtime_checkable from typing import Any, Protocol, Tuple, runtime_checkable
import numpy as np import numpy as np
@ -15,6 +17,7 @@ DtypeType = np.dtype | str | type | Any | DType
@runtime_checkable @runtime_checkable
class NDArrayType(Protocol): class NDArrayType(Protocol):
"""A protocol for describing types that should be considered ndarrays"""
@property @property
def dtype(self) -> DtypeType: ... def dtype(self) -> DtypeType: ...

View file

@ -83,24 +83,18 @@ def test_ndarray_coercion():
def test_ndarray_serialize(): def test_ndarray_serialize():
""" """
Large arrays should get compressed with blosc, otherwise just to list Arrays should be dumped to a list when using json, but kept as ndarray otherwise
""" """
class Model(BaseModel): class Model(BaseModel):
large_array: NDArray[Any, Number] array: NDArray[Any, Number]
small_array: NDArray[Any, Number]
mod = Model( mod = Model(array=np.random.random((3, 3)))
large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3))
)
mod_str = mod.model_dump_json() mod_str = mod.model_dump_json()
mod_json = json.loads(mod_str) mod_json = json.loads(mod_str)
for a in ("array", "shape", "dtype", "unpack_fns"): assert isinstance(mod_json["array"], list)
assert a in mod_json["large_array"].keys()
assert isinstance(mod_json["large_array"]["array"], str)
assert isinstance(mod_json["small_array"], list)
# but when we just dump to a dict we don't compress # but when we just dump to a dict we don't coerce
mod_dict = mod.model_dump() mod_dict = mod.model_dump()
assert isinstance(mod_dict["large_array"], np.ndarray) assert isinstance(mod_dict["large_array"], np.ndarray)