From 3d1d029ab8342af077246115d324a2d400c586b1 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 22 Apr 2024 20:00:43 -0700 Subject: [PATCH] Lint, fix olde array compression test --- .github/workflows/tests.yml | 2 ++ pyproject.toml | 2 +- src/numpydantic/__init__.py | 1 + src/numpydantic/exceptions.py | 5 ++++ src/numpydantic/interface/__init__.py | 4 +++ src/numpydantic/interface/dask.py | 6 +++++ src/numpydantic/interface/hdf5.py | 35 ++++++++++++++++++-------- src/numpydantic/interface/interface.py | 16 +++++++++--- src/numpydantic/interface/numpy.py | 12 +++++++-- src/numpydantic/interface/xarray.py | 5 ++++ src/numpydantic/interface/zarr.py | 5 ++++ src/numpydantic/maps.py | 9 ++++++- src/numpydantic/monkeypatch.py | 8 ++++++ src/numpydantic/ndarray.py | 13 +++++++--- src/numpydantic/types.py | 3 +++ tests/test_ndarray.py | 16 ++++-------- 16 files changed, 109 insertions(+), 33 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 70752f5..a0073d0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,6 +33,8 @@ jobs: lint: runs-on: ubuntu-latest + continue-on-error: true steps: - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 - uses: psf/black@stable \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3d6ce6d..03ab604 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ testpaths = [ [tool.ruff] target-version = "py311" -include = ["numpydantic/**/*.py", "pyproject.toml"] +include = ["src/numpydantic/**/*.py", "pyproject.toml"] exclude = ["tests"] [tool.ruff.lint] diff --git a/src/numpydantic/__init__.py b/src/numpydantic/__init__.py index fc4d17d..33bb17f 100644 --- a/src/numpydantic/__init__.py +++ b/src/numpydantic/__init__.py @@ -1,6 +1,7 @@ # ruff: noqa: E402 # ruff: noqa: F401 # ruff: noqa: I001 +# ruff: noqa: D104 from numpydantic.monkeypatch import apply_patches apply_patches() diff --git a/src/numpydantic/exceptions.py b/src/numpydantic/exceptions.py index ba195a0..20d4981 100644 --- a/src/numpydantic/exceptions.py +++ b/src/numpydantic/exceptions.py @@ -1,3 +1,8 @@ +""" +Exceptions used within numpydantic +""" + + class DtypeError(TypeError): """Exception raised for invalid dtypes""" diff --git a/src/numpydantic/interface/__init__.py b/src/numpydantic/interface/__init__.py index 2ad3682..3139756 100644 --- a/src/numpydantic/interface/__init__.py +++ b/src/numpydantic/interface/__init__.py @@ -1,3 +1,7 @@ +""" +Interfaces between nptyping types and array backends +""" + from numpydantic.interface.dask import DaskInterface from numpydantic.interface.hdf5 import H5Interface from numpydantic.interface.interface import Interface diff --git a/src/numpydantic/interface/dask.py b/src/numpydantic/interface/dask.py index 484cabe..ba16d5e 100644 --- a/src/numpydantic/interface/dask.py +++ b/src/numpydantic/interface/dask.py @@ -1,5 +1,11 @@ +""" +Interface for Dask arrays +""" + from typing import Any + import numpy as np + from numpydantic.interface.interface import Interface try: diff --git a/src/numpydantic/interface/hdf5.py b/src/numpydantic/interface/hdf5.py index 99ae36b..cd75a97 100644 --- a/src/numpydantic/interface/hdf5.py +++ b/src/numpydantic/interface/hdf5.py @@ -1,6 +1,9 @@ -import pdb +""" +Interfaces for HDF5 Datasets +""" + from pathlib import Path -from typing import Any, NamedTuple, Tuple, Union, TypeAlias +from typing import Any, NamedTuple, Tuple, TypeAlias, Union import numpy as np @@ -28,15 +31,15 @@ class H5Proxy: """ Proxy class to mimic numpy-like array behavior with an HDF5 array - The attribute and item access methods only open the file for the duration of the method, - making it less perilous to share this object between threads and processes. + The attribute and item access methods only open the file for the duration of the + method, making it less perilous to share this object between threads and processes. This class attempts to be a passthrough class to a :class:`h5py.Dataset` object, including its attributes and item getters/setters. When using read-only methods, no locking is attempted (beyond the HDF5 defaults), - but when using the write methods (setting an array value), try and use the ``locking`` - methods of :class:`h5py.File` . + but when using the write methods (setting an array value), try and use the + ``locking`` methods of :class:`h5py.File` . Args: file (pathlib.Path | str): Location of hdf5 file on filesystem @@ -74,7 +77,7 @@ class H5Proxy: obj = h5f.get(self.path) obj[key] = value - def open(self, mode: str = "r"): + def open(self, mode: str = "r") -> "h5py.Dataset": """ Return the opened :class:`h5py.Dataset` object @@ -84,7 +87,7 @@ class H5Proxy: self._h5f = h5py.File(self.file, mode) return self._h5f.get(self.path) - def close(self): + def close(self) -> None: """ Close the :class:`h5py.File` object left open when returning the dataset with :meth:`.open` @@ -116,7 +119,10 @@ class H5Interface(Interface): @classmethod def check(cls, array: Union[H5ArrayPath, Tuple[Union[Path, str], str]]) -> bool: - """Check that the given array is a :class:`.H5ArrayPath` or something that resembles one.""" + """ + Check that the given array is a :class:`.H5ArrayPath` or something that + resembles one. + """ if isinstance(array, H5ArrayPath): return True @@ -152,7 +158,8 @@ class H5Interface(Interface): array = H5Proxy(file=array[0], path=array[1]) else: raise ValueError( - "Need to specify a file and a path within an HDF5 file to use the HDF5 Interface" + "Need to specify a file and a path within an HDF5 file to use the HDF5 " + "Interface" ) if not array.array_exists(): @@ -165,6 +172,14 @@ class H5Interface(Interface): @classmethod def to_json(cls, array: H5Proxy) -> dict: + """ + Dump to a dictionary containing + + * ``file``: :attr:`.file` + * ``path``: :attr:`.path` + * ``attrs``: Any HDF5 attributes on the dataset + * ``array``: The array as a list of lists + """ try: dset = array.open() meta = { diff --git a/src/numpydantic/interface/interface.py b/src/numpydantic/interface/interface.py index 03cf074..86646a8 100644 --- a/src/numpydantic/interface/interface.py +++ b/src/numpydantic/interface/interface.py @@ -1,3 +1,7 @@ +""" +Base Interface metaclass +""" + from abc import ABC, abstractmethod from operator import attrgetter from typing import Any, Generic, Tuple, Type, TypeVar, Union @@ -67,13 +71,15 @@ class Interface(ABC, Generic[T]): return array if not check_shape(array.shape, self.shape): raise ShapeError( - f"Invalid shape! expected shape {self.shape.prepared_args}, got shape {array.shape}" + f"Invalid shape! expected shape {self.shape.prepared_args}, " + f"got shape {array.shape}" ) return array def after_validation(self, array: NDArrayType) -> T: """ - Optional step post-validation that coerces the intermediate array type into the return type + Optional step post-validation that coerces the intermediate array type into the + return type Default method is a no-op """ @@ -90,13 +96,15 @@ class Interface(ABC, Generic[T]): @abstractmethod def enabled(cls) -> bool: """ - Check whether this array interface can be used (eg. its dependent packages are installed, etc.) + Check whether this array interface can be used (eg. its dependent packages are + installed, etc.) """ @classmethod def to_json(cls, array: Type[T]) -> Union[list, dict]: """ - Convert an array of :attr:`.return_type` to a JSON-compatible format using base python types + Convert an array of :attr:`.return_type` to a JSON-compatible format using + base python types """ if not isinstance(array, np.ndarray): array = np.array(array) diff --git a/src/numpydantic/interface/numpy.py b/src/numpydantic/interface/numpy.py index fae7baf..75efafd 100644 --- a/src/numpydantic/interface/numpy.py +++ b/src/numpydantic/interface/numpy.py @@ -1,3 +1,7 @@ +""" +Interface to numpy arrays +""" + from typing import Any from numpydantic.interface.interface import Interface @@ -22,7 +26,10 @@ class NumpyInterface(Interface): @classmethod def check(cls, array: Any) -> bool: - """Check that this is in fact a numpy ndarray or something that can be coerced to one""" + """ + Check that this is in fact a numpy ndarray or something that can be + coerced to one + """ if isinstance(array, ndarray): return True else: @@ -34,7 +41,8 @@ class NumpyInterface(Interface): def before_validation(self, array: Any) -> ndarray: """ - Coerce to an ndarray. We have already checked if coercion is possible in :meth:`.check` + Coerce to an ndarray. We have already checked if coercion is possible + in :meth:`.check` """ if not isinstance(array, ndarray): array = ndarray(array) diff --git a/src/numpydantic/interface/xarray.py b/src/numpydantic/interface/xarray.py index e69de29..0891cfb 100644 --- a/src/numpydantic/interface/xarray.py +++ b/src/numpydantic/interface/xarray.py @@ -0,0 +1,5 @@ +""" +Interface to xarray + +(Not implemented) +""" diff --git a/src/numpydantic/interface/zarr.py b/src/numpydantic/interface/zarr.py index e69de29..4880d4e 100644 --- a/src/numpydantic/interface/zarr.py +++ b/src/numpydantic/interface/zarr.py @@ -0,0 +1,5 @@ +""" +Interface to zarr arrays + +(Not Implemented) +""" diff --git a/src/numpydantic/maps.py b/src/numpydantic/maps.py index 713b2dc..37edd7f 100644 --- a/src/numpydantic/maps.py +++ b/src/numpydantic/maps.py @@ -1,8 +1,12 @@ +""" +Maps from one value system to another +""" + from datetime import datetime from typing import Any import numpy as np -from nptyping import Float, Int, String, Bool +from nptyping import Bool, Float, Int, String np_to_python = { Any: Any, @@ -43,6 +47,7 @@ np_to_python = { }, **{n: str for n in (np.character, np.str_, np.string_, np.unicode_)}, } +"""Map from python types to numpy""" flat_to_nptyping = { @@ -75,5 +80,7 @@ flat_to_nptyping = { "AnyType": "Any", "object": "Object", } +"""Map from NWB-style flat dtypes to nptyping types""" python_to_nptyping = {float: Float, str: String, int: Int, bool: Bool} +"""Map from python types to nptyping types""" diff --git a/src/numpydantic/monkeypatch.py b/src/numpydantic/monkeypatch.py index 7940ba4..8935701 100644 --- a/src/numpydantic/monkeypatch.py +++ b/src/numpydantic/monkeypatch.py @@ -1,3 +1,10 @@ +""" +Functions to monkeypatch dependent packages - most notably nptyping +""" + +# ruff: noqa: ANN001 + + def patch_npytyping_perf() -> None: """ npytyping makes an expensive call to inspect.stack() @@ -14,6 +21,7 @@ def patch_npytyping_perf() -> None: from nptyping.pandas_ import dataframe # make a new __module__ methods for the affected classes + def new_module_ndarray(cls) -> str: return cls._get_module(inspect.currentframe(), "nptyping.ndarray") diff --git a/src/numpydantic/ndarray.py b/src/numpydantic/ndarray.py index 0615b3d..f6aabd1 100644 --- a/src/numpydantic/ndarray.py +++ b/src/numpydantic/ndarray.py @@ -5,7 +5,7 @@ Extension of nptyping NDArray for pydantic that allows for JSON-Schema serializa """ from collections.abc import Callable -from typing import Any, Tuple, Union +from typing import TYPE_CHECKING, Any, Tuple, Union import nptyping.structure import numpy as np @@ -21,6 +21,9 @@ from numpydantic.maps import np_to_python # from numpydantic.proxy import NDArrayProxy from numpydantic.types import DtypeType, NDArrayType, ShapeType +if TYPE_CHECKING: + from pydantic import ValidationInfo + COMPRESSION_THRESHOLD = 16 * 1024 """ Arrays larger than this size (in bytes) will be compressed and b64 encoded when @@ -62,10 +65,11 @@ def list_of_lists_schema(shape: Shape, array_type_handler: dict) -> ListSchema: def _get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable: """ - Validate using a matching :class:`.Interface` class using its :meth:`.Interface.validate` method + Validate using a matching :class:`.Interface` class using its + :meth:`.Interface.validate` method """ - def validate_interface(value: Any, info) -> NDArrayType: + def validate_interface(value: Any, info: "ValidationInfo") -> NDArrayType: interface_cls = Interface.match(value) interface = interface_cls(shape, dtype) value = interface.validate(value) @@ -99,7 +103,8 @@ class NDArrayMeta(_NDArrayMeta, implementation="NDArray"): class NDArray(NPTypingType, metaclass=NDArrayMeta): """ - Constrained array type allowing npytyping syntax for dtype and shape validation and serialization. + Constrained array type allowing npytyping syntax for dtype and shape validation + and serialization. Integrates with pydantic such that - JSON schema for list of list encoding diff --git a/src/numpydantic/types.py b/src/numpydantic/types.py index f54968e..30a5e5b 100644 --- a/src/numpydantic/types.py +++ b/src/numpydantic/types.py @@ -4,6 +4,8 @@ Types for numpydantic Note that these are types as in python typing types, not classes. """ +# ruff: noqa: D102 + from typing import Any, Protocol, Tuple, runtime_checkable import numpy as np @@ -15,6 +17,7 @@ DtypeType = np.dtype | str | type | Any | DType @runtime_checkable class NDArrayType(Protocol): + """A protocol for describing types that should be considered ndarrays""" @property def dtype(self) -> DtypeType: ... diff --git a/tests/test_ndarray.py b/tests/test_ndarray.py index 61bbaa5..b8697ed 100644 --- a/tests/test_ndarray.py +++ b/tests/test_ndarray.py @@ -83,24 +83,18 @@ def test_ndarray_coercion(): def test_ndarray_serialize(): """ - Large arrays should get compressed with blosc, otherwise just to list + Arrays should be dumped to a list when using json, but kept as ndarray otherwise """ class Model(BaseModel): - large_array: NDArray[Any, Number] - small_array: NDArray[Any, Number] + array: NDArray[Any, Number] - mod = Model( - large_array=np.random.random((1024, 1024)), small_array=np.random.random((3, 3)) - ) + mod = Model(array=np.random.random((3, 3))) mod_str = mod.model_dump_json() mod_json = json.loads(mod_str) - for a in ("array", "shape", "dtype", "unpack_fns"): - assert a in mod_json["large_array"].keys() - assert isinstance(mod_json["large_array"]["array"], str) - assert isinstance(mod_json["small_array"], list) + assert isinstance(mod_json["array"], list) - # but when we just dump to a dict we don't compress + # but when we just dump to a dict we don't coerce mod_dict = mod.model_dump() assert isinstance(mod_dict["large_array"], np.ndarray)