Lint, fix olde array compression test

This commit is contained in:
sneakers-the-rat 2024-04-22 20:00:43 -07:00
parent 5b722bb6da
commit 3d1d029ab8
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
16 changed files with 109 additions and 33 deletions

View file

@ -33,6 +33,8 @@ jobs:
lint:
runs-on: ubuntu-latest
continue-on-error: true
steps:
- uses: actions/checkout@v3
- uses: chartboost/ruff-action@v1
- uses: psf/black@stable

View file

@ -66,7 +66,7 @@ testpaths = [
[tool.ruff]
target-version = "py311"
include = ["numpydantic/**/*.py", "pyproject.toml"]
include = ["src/numpydantic/**/*.py", "pyproject.toml"]
exclude = ["tests"]
[tool.ruff.lint]

View file

@ -1,6 +1,7 @@
# ruff: noqa: E402
# ruff: noqa: F401
# ruff: noqa: I001
# ruff: noqa: D104
from numpydantic.monkeypatch import apply_patches
apply_patches()

View file

@ -1,3 +1,8 @@
"""
Exceptions used within numpydantic
"""
class DtypeError(TypeError):
"""Exception raised for invalid dtypes"""

View file

@ -1,3 +1,7 @@
"""
Interfaces between nptyping types and array backends
"""
from numpydantic.interface.dask import DaskInterface
from numpydantic.interface.hdf5 import H5Interface
from numpydantic.interface.interface import Interface

View file

@ -1,5 +1,11 @@
"""
Interface for Dask arrays
"""
from typing import Any
import numpy as np
from numpydantic.interface.interface import Interface
try:

View file

@ -1,6 +1,9 @@
import pdb
"""
Interfaces for HDF5 Datasets
"""
from pathlib import Path
from typing import Any, NamedTuple, Tuple, Union, TypeAlias
from typing import Any, NamedTuple, Tuple, TypeAlias, Union
import numpy as np
@ -28,15 +31,15 @@ class H5Proxy:
"""
Proxy class to mimic numpy-like array behavior with an HDF5 array
The attribute and item access methods only open the file for the duration of the method,
making it less perilous to share this object between threads and processes.
The attribute and item access methods only open the file for the duration of the
method, making it less perilous to share this object between threads and processes.
This class attempts to be a passthrough class to a :class:`h5py.Dataset` object,
including its attributes and item getters/setters.
When using read-only methods, no locking is attempted (beyond the HDF5 defaults),
but when using the write methods (setting an array value), try and use the ``locking``
methods of :class:`h5py.File` .
but when using the write methods (setting an array value), try and use the
``locking`` methods of :class:`h5py.File` .
Args:
file (pathlib.Path | str): Location of hdf5 file on filesystem
@ -74,7 +77,7 @@ class H5Proxy:
obj = h5f.get(self.path)
obj[key] = value
def open(self, mode: str = "r"):
def open(self, mode: str = "r") -> "h5py.Dataset":
"""
Return the opened :class:`h5py.Dataset` object
@ -84,7 +87,7 @@ class H5Proxy:
self._h5f = h5py.File(self.file, mode)
return self._h5f.get(self.path)
def close(self):
def close(self) -> None:
"""
Close the :class:`h5py.File` object left open when returning the dataset with
:meth:`.open`
@ -116,7 +119,10 @@ class H5Interface(Interface):
@classmethod
def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool:
"""Check that the given array is a :class:`.H5ArrayPath` or something that resembles one."""
"""
Check that the given array is a :class:`.H5ArrayPath` or something that
resembles one.
"""
if isinstance(array, H5ArrayPath):
return True
@ -152,7 +158,8 @@ class H5Interface(Interface):
array = H5Proxy(file=array[0], path=array[1])
else:
raise ValueError(
"Need to specify a file and a path within an HDF5 file to use the HDF5 Interface"
"Need to specify a file and a path within an HDF5 file to use the HDF5 "
"Interface"
)
if not array.array_exists():
@ -165,6 +172,14 @@ class H5Interface(Interface):
@classmethod
def to_json(cls, array: H5Proxy) -> dict:
"""
Dump to a dictionary containing
* ``file``: :attr:`.file`
* ``path``: :attr:`.path`
* ``attrs``: Any HDF5 attributes on the dataset
* ``array``: The array as a list of lists
"""
try:
dset = array.open()
meta = {

View file

@ -1,3 +1,7 @@
"""
Base Interface metaclass
"""
from abc import ABC, abstractmethod
from operator import attrgetter
from typing import Any, Generic, Tuple, Type, TypeVar, Union
@ -67,13 +71,15 @@ class Interface(ABC, Generic[T]):
return array
if not check_shape(array.shape, self.shape):
raise ShapeError(
f"Invalid shape! expected shape {self.shape.prepared_args}, got shape {array.shape}"
f"Invalid shape! expected shape {self.shape.prepared_args}, "
f"got shape {array.shape}"
)
return array
def after_validation(self, array: NDArrayType) -> T:
"""
Optional step post-validation that coerces the intermediate array type into the return type
Optional step post-validation that coerces the intermediate array type into the
return type
Default method is a no-op
"""
@ -90,13 +96,15 @@ class Interface(ABC, Generic[T]):
@abstractmethod
def enabled(cls) -> bool:
"""
Check whether this array interface can be used (eg. its dependent packages are installed, etc.)
Check whether this array interface can be used (eg. its dependent packages are
installed, etc.)
"""
@classmethod
def to_json(cls, array: Type[T]) -> Union[list, dict]:
"""
Convert an array of :attr:`.return_type` to a JSON-compatible format using base python types
Convert an array of :attr:`.return_type` to a JSON-compatible format using
base python types
"""
if not isinstance(array, np.ndarray):
array = np.array(array)

View file

@ -1,3 +1,7 @@
"""
Interface to numpy arrays
"""
from typing import Any
from numpydantic.interface.interface import Interface
@ -22,7 +26,10 @@ class NumpyInterface(Interface):
@classmethod
def check(cls, array: Any) -> bool:
"""Check that this is in fact a numpy ndarray or something that can be coerced to one"""
"""
Check that this is in fact a numpy ndarray or something that can be
coerced to one
"""
if isinstance(array, ndarray):
return True
else:
@ -34,7 +41,8 @@ class NumpyInterface(Interface):
def before_validation(self, array: Any) -> ndarray:
"""
Coerce to an ndarray. We have already checked if coercion is possible in :meth:`.check`
Coerce to an ndarray. We have already checked if coercion is possible
in :meth:`.check`
"""
if not isinstance(array, ndarray):
array = ndarray(array)

View file

@ -0,0 +1,5 @@
"""
Interface to xarray
(Not implemented)
"""

View file

@ -0,0 +1,5 @@
"""
Interface to zarr arrays
(Not Implemented)
"""

View file

@ -1,8 +1,12 @@
"""
Maps from one value system to another
"""
from datetime import datetime
from typing import Any
import numpy as np
from nptyping import Float, Int, String, Bool
from nptyping import Bool, Float, Int, String
np_to_python = {
Any: Any,
@ -43,6 +47,7 @@ np_to_python = {
},
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
}
"""Map from python types to numpy"""
flat_to_nptyping = {
@ -75,5 +80,7 @@ flat_to_nptyping = {
"AnyType": "Any",
"object": "Object",
}
"""Map from NWB-style flat dtypes to nptyping types"""
python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool}
"""Map from python types to nptyping types"""

View file

@ -1,3 +1,10 @@
"""
Functions to monkeypatch dependent packages - most notably nptyping
"""
# ruff: noqa: ANN001
def patch_npytyping_perf() -> None:
"""
npytyping makes an expensive call to inspect.stack()
@ -14,6 +21,7 @@ def patch_npytyping_perf() -> None:
from nptyping.pandas_ import dataframe
# make a new __module__ methods for the affected classes
def new_module_ndarray(cls) -> str:
return cls._get_module(inspect.currentframe(), "nptyping.ndarray")

View file

@ -5,7 +5,7 @@ Extension of nptyping NDArray for pydantic that allows for JSON-Schema serializa
"""
from collections.abc import Callable
from typing import Any, Tuple, Union
from typing import TYPE_CHECKING, Any, Tuple, Union
import nptyping.structure
import numpy as np
@ -21,6 +21,9 @@ from numpydantic.maps import np_to_python
# from numpydantic.proxy import NDArrayProxy
from numpydantic.types import DtypeType, NDArrayType, ShapeType
if TYPE_CHECKING:
from pydantic import ValidationInfo
COMPRESSION_THRESHOLD = 16 * 1024
"""
Arrays larger than this size (in bytes) will be compressed and b64 encoded when
@ -62,10 +65,11 @@ def list_of_lists_schema(shape: Shape, array_type_handler: dict) -> ListSchema:
def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable:
"""
Validate using a matching :class:`.Interface` class using its :meth:`.Interface.validate` method
Validate using a matching :class:`.Interface` class using its
:meth:`.Interface.validate` method
"""
def validate_interface(value: Any, info) -> NDArrayType:
def validate_interface(value: Any, info: "ValidationInfo") -> NDArrayType:
interface_cls = Interface.match(value)
interface = interface_cls(shape, dtype)
value = interface.validate(value)
@ -99,7 +103,8 @@ class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
class NDArray(NPTypingType, metaclass=NDArrayMeta):
"""
Constrained array type allowing npytyping syntax for dtype and shape validation and serialization.
Constrained array type allowing npytyping syntax for dtype and shape validation
and serialization.
Integrates with pydantic such that
- JSON schema for list of list encoding

View file

@ -4,6 +4,8 @@ Types for numpydantic
Note that these are types as in python typing types, not classes.
"""
# ruff: noqa: D102
from typing import Any, Protocol, Tuple, runtime_checkable
import numpy as np
@ -15,6 +17,7 @@ DtypeType = np.dtype | str | type | Any | DType
@runtime_checkable
class NDArrayType(Protocol):
"""A protocol for describing types that should be considered ndarrays"""
@property
def dtype(self) -> DtypeType: ...

View file

@ -83,24 +83,18 @@ def test_ndarray_coercion():
def test_ndarray_serialize():
"""
Large arrays should get compressed with blosc, otherwise just to list
Arrays should be dumped to a list when using json, but kept as ndarray otherwise
"""
class Model(BaseModel):
large_array: NDArray[Any, Number]
small_array: NDArray[Any, Number]
array: NDArray[Any, Number]
mod = Model(
large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3))
)
mod = Model(array=np.random.random((3, 3)))
mod_str = mod.model_dump_json()
mod_json = json.loads(mod_str)
for a in ("array", "shape", "dtype", "unpack_fns"):
assert a in mod_json["large_array"].keys()
assert isinstance(mod_json["large_array"]["array"], str)
assert isinstance(mod_json["small_array"], list)
assert isinstance(mod_json["array"], list)
# but when we just dump to a dict we don't compress
# but when we just dump to a dict we don't coerce
mod_dict = mod.model_dump()
assert isinstance(mod_dict["large_array"], np.ndarray)