mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-09 21:44:27 +00:00
not quite working zarr implementation
This commit is contained in:
parent
a345cc6504
commit
d884055067
8 changed files with 297 additions and 7 deletions
|
@ -36,6 +36,7 @@ intersphinx_mapping = {
|
|||
"linkml-runtime": ("https://linkml.io/linkml/", None),
|
||||
"dask": ("https://docs.dask.org/en/stable/", None),
|
||||
"h5py": ("https://docs.h5py.org/en/stable/", None),
|
||||
"zarr": ("https://zarr.readthedocs.io/en/stable/", None),
|
||||
}
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
|
69
pdm.lock
69
pdm.lock
|
@ -5,7 +5,7 @@
|
|||
groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests"]
|
||||
strategy = ["cross_platform", "inherit_metadata"]
|
||||
lock_version = "4.4.1"
|
||||
content_hash = "sha256:870d3111512c0bccf768ad2c06acb01e0bd9e3091f8544bca2bcf609eea02102"
|
||||
content_hash = "sha256:4e22ffd83cb1ae3916c6c41c77f74b84db5a77e572c796cc537023bd6c3e3128"
|
||||
|
||||
[[package]]
|
||||
name = "alabaster"
|
||||
|
@ -46,6 +46,15 @@ files = [
|
|||
{file = "anyio-4.3.0.tar.gz", hash = "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asciitree"
|
||||
version = "0.3.3"
|
||||
summary = "Draws ASCII trees."
|
||||
groups = ["default"]
|
||||
files = [
|
||||
{file = "asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autodoc-pydantic"
|
||||
version = "2.1.0"
|
||||
|
@ -395,6 +404,18 @@ files = [
|
|||
{file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fasteners"
|
||||
version = "0.19"
|
||||
requires_python = ">=3.6"
|
||||
summary = "A python package that provides useful locks"
|
||||
groups = ["default"]
|
||||
marker = "sys_platform != \"emscripten\""
|
||||
files = [
|
||||
{file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"},
|
||||
{file = "fasteners-0.19.tar.gz", hash = "sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fsspec"
|
||||
version = "2024.3.1"
|
||||
|
@ -692,6 +713,35 @@ files = [
|
|||
{file = "nptyping-2.5.0.tar.gz", hash = "sha256:e3d35b53af967e6fb407c3016ff9abae954d3a0568f7cc13a461084224e8e20a"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numcodecs"
|
||||
version = "0.12.1"
|
||||
requires_python = ">=3.8"
|
||||
summary = "A Python package providing buffer compression and transformation codecs for use in data storage and communication applications."
|
||||
groups = ["default"]
|
||||
dependencies = [
|
||||
"numpy>=1.7",
|
||||
]
|
||||
files = [
|
||||
{file = "numcodecs-0.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d37f628fe92b3699e65831d5733feca74d2e33b50ef29118ffd41c13c677210e"},
|
||||
{file = "numcodecs-0.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:941b7446b68cf79f089bcfe92edaa3b154533dcbcd82474f994b28f2eedb1c60"},
|
||||
{file = "numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e79bf9d1d37199ac00a60ff3adb64757523291d19d03116832e600cac391c51"},
|
||||
{file = "numcodecs-0.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:82d7107f80f9307235cb7e74719292d101c7ea1e393fe628817f0d635b7384f5"},
|
||||
{file = "numcodecs-0.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eeaf42768910f1c6eebf6c1bb00160728e62c9343df9e2e315dc9fe12e3f6071"},
|
||||
{file = "numcodecs-0.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:135b2d47563f7b9dc5ee6ce3d1b81b0f1397f69309e909f1a35bb0f7c553d45e"},
|
||||
{file = "numcodecs-0.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a191a8e347ecd016e5c357f2bf41fbcb026f6ffe78fff50c77ab12e96701d155"},
|
||||
{file = "numcodecs-0.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:21d8267bd4313f4d16f5b6287731d4c8ebdab236038f29ad1b0e93c9b2ca64ee"},
|
||||
{file = "numcodecs-0.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2f84df6b8693206365a5b37c005bfa9d1be486122bde683a7b6446af4b75d862"},
|
||||
{file = "numcodecs-0.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:760627780a8b6afdb7f942f2a0ddaf4e31d3d7eea1d8498cf0fd3204a33c4618"},
|
||||
{file = "numcodecs-0.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c258bd1d3dfa75a9b708540d23b2da43d63607f9df76dfa0309a7597d1de3b73"},
|
||||
{file = "numcodecs-0.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:e04649ea504aff858dbe294631f098fbfd671baf58bfc04fc48d746554c05d67"},
|
||||
{file = "numcodecs-0.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2fbb12a6a1abe95926f25c65e283762d63a9bf9e43c0de2c6a1a798347dfcb40"},
|
||||
{file = "numcodecs-0.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f2207871868b2464dc11c513965fd99b958a9d7cde2629be7b2dc84fdaab013b"},
|
||||
{file = "numcodecs-0.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abff3554a6892a89aacf7b642a044e4535499edf07aeae2f2e6e8fc08c9ba07f"},
|
||||
{file = "numcodecs-0.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:ef964d4860d3e6b38df0633caf3e51dc850a6293fd8e93240473642681d95136"},
|
||||
{file = "numcodecs-0.12.1.tar.gz", hash = "sha256:05d91a433733e7eef268d7e80ec226a0232da244289614a8f3826901aec1098e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "1.26.4"
|
||||
|
@ -1459,6 +1509,23 @@ files = [
|
|||
{file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zarr"
|
||||
version = "2.17.2"
|
||||
requires_python = ">=3.9"
|
||||
summary = "An implementation of chunked, compressed, N-dimensional arrays for Python"
|
||||
groups = ["default"]
|
||||
dependencies = [
|
||||
"asciitree",
|
||||
"fasteners; sys_platform != \"emscripten\"",
|
||||
"numcodecs>=0.10.0",
|
||||
"numpy>=1.23",
|
||||
]
|
||||
files = [
|
||||
{file = "zarr-2.17.2-py3-none-any.whl", hash = "sha256:70d7cc07c24280c380ef80644151d136b7503b0d83c9f214e8000ddc0f57f69b"},
|
||||
{file = "zarr-2.17.2.tar.gz", hash = "sha256:2cbaa6cb4e342d45152d4a7a4b2013c337fcd3a8e7bc98253560180de60552ce"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zipp"
|
||||
version = "3.18.1"
|
||||
|
|
|
@ -22,8 +22,11 @@ dask = [
|
|||
hdf5 = [
|
||||
"h5py>=3.10.0"
|
||||
]
|
||||
zarr = [
|
||||
"zarr>=2.17.2",
|
||||
]
|
||||
arrays = [
|
||||
"numpydantic[dask,hdf5]"
|
||||
"numpydantic[dask,hdf5,zarr]"
|
||||
]
|
||||
tests = [
|
||||
"numpydantic[arrays]",
|
||||
|
|
|
@ -6,5 +6,12 @@ from numpydantic.interface.dask import DaskInterface
|
|||
from numpydantic.interface.hdf5 import H5Interface
|
||||
from numpydantic.interface.interface import Interface
|
||||
from numpydantic.interface.numpy import NumpyInterface
|
||||
from numpydantic.interface.zarr import ZarrInterface
|
||||
|
||||
__all__ = ["Interface", "DaskInterface", "H5Interface", "NumpyInterface"]
|
||||
__all__ = [
|
||||
"Interface",
|
||||
"DaskInterface",
|
||||
"H5Interface",
|
||||
"NumpyInterface",
|
||||
"ZarrInterface",
|
||||
]
|
||||
|
|
|
@ -1,5 +1,121 @@
|
|||
"""
|
||||
Interface to zarr arrays
|
||||
|
||||
(Not Implemented)
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Union, Sequence
|
||||
|
||||
from numpydantic.interface.interface import Interface
|
||||
|
||||
try:
|
||||
from zarr.core import Array as ZarrArray
|
||||
from zarr.storage import StoreLike
|
||||
import zarr
|
||||
except ImportError:
|
||||
ZarrArray = None
|
||||
StoreLike = None
|
||||
storage = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ZarrArrayPath:
|
||||
"""
|
||||
Map to an array within a zarr store.
|
||||
|
||||
See :func:`zarr.open`
|
||||
"""
|
||||
|
||||
file: Union[Path, str]
|
||||
"""Location of Zarr store file or directory"""
|
||||
path: Optional[str] = None
|
||||
"""Path to array within hierarchical zarr store"""
|
||||
|
||||
def open(self, **kwargs) -> ZarrArray:
|
||||
return zarr.open(str(self.file), path=self.path, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_iterable(cls, spec: Sequence) -> "ZarrArrayPath":
|
||||
if len(spec) == 1:
|
||||
return ZarrArrayPath(file=spec[0])
|
||||
elif len(spec) == 2:
|
||||
return ZarrArrayPath(file=spec[0], path=spec[1])
|
||||
else:
|
||||
raise ValueError("Only len 1-2 iterables can be used for a ZarrArrayPath")
|
||||
|
||||
|
||||
class ZarrInterface(Interface):
|
||||
"""
|
||||
Interface to in-memory or on-disk zarr arrays
|
||||
"""
|
||||
|
||||
input_types = (Path, ZarrArray, ZarrArrayPath)
|
||||
return_type = ZarrArray
|
||||
|
||||
@classmethod
|
||||
def enabled(cls) -> bool:
|
||||
"""True if zarr is installed"""
|
||||
return ZarrArray is not None
|
||||
|
||||
@staticmethod
|
||||
def _get_array(
|
||||
array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
|
||||
) -> ZarrArray:
|
||||
if isinstance(array, ZarrArray):
|
||||
return array
|
||||
|
||||
if isinstance(array, (str, Path)):
|
||||
array = ZarrArrayPath(file=array)
|
||||
elif isinstance(array, (tuple, list)):
|
||||
array = ZarrArrayPath.from_iterable(array)
|
||||
|
||||
return array.open(mode="a")
|
||||
|
||||
@classmethod
|
||||
def check(cls, array: Any) -> bool:
|
||||
"""
|
||||
Check if array is in-memory zarr array,
|
||||
a path to a zarr array, or a :class:`.ZarrArrayPath`
|
||||
"""
|
||||
if isinstance(array, ZarrArray):
|
||||
return True
|
||||
|
||||
# See if can be coerced to ZarrArrayPath
|
||||
if isinstance(array, (Path, str)):
|
||||
array = ZarrArrayPath(file=array)
|
||||
|
||||
if isinstance(array, (tuple, list)):
|
||||
# something that can be coerced to ZarrArrayPath
|
||||
with contextlib.suppress(ValueError):
|
||||
array = ZarrArrayPath.from_iterable(array)
|
||||
|
||||
if isinstance(array, ZarrArrayPath):
|
||||
with contextlib.suppress(Exception):
|
||||
arr = array.open(mode="r")
|
||||
if isinstance(arr, ZarrArray):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def before_validation(
|
||||
self, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
|
||||
) -> ZarrArray:
|
||||
"""
|
||||
Ensure that the zarr array is opened
|
||||
"""
|
||||
return self._get_array(array)
|
||||
|
||||
@classmethod
|
||||
def to_json(
|
||||
cls, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
|
||||
) -> dict:
|
||||
"""
|
||||
Dump just the metadata for an array from :meth:`zarr.core.Array.info_items`
|
||||
plus the :meth:`zarr.core.Array.hexdigest`
|
||||
"""
|
||||
array = cls._get_array(array)
|
||||
info = array.info_items()
|
||||
info_dict = {i[0]: i[1] for i in info}
|
||||
info_dict["hexdigest"] = array.hexdigest()
|
||||
return info_dict
|
||||
|
|
|
@ -7,8 +7,10 @@ import numpy as np
|
|||
import pytest
|
||||
from nptyping import Number
|
||||
from pydantic import BaseModel, Field
|
||||
import zarr
|
||||
|
||||
from numpydantic.interface.hdf5 import H5ArrayPath
|
||||
from numpydantic.interface.zarr import ZarrArrayPath
|
||||
from numpydantic import NDArray, Shape
|
||||
from numpydantic.maps import python_to_nptyping
|
||||
|
||||
|
@ -105,3 +107,21 @@ def hdf5_array(
|
|||
return H5ArrayPath(Path(hdf5_file.filename), array_path)
|
||||
|
||||
return _hdf5_array
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def zarr_nested_array(tmp_output_dir_func) -> ZarrArrayPath:
|
||||
"""Zarr array within a nested array"""
|
||||
file = tmp_output_dir_func / "nested.zarr"
|
||||
path = "a/b/c"
|
||||
root = zarr.open(str(file), mode="w")
|
||||
array = root.zeros(path, shape=(100, 100), chunks=(10, 10))
|
||||
return ZarrArrayPath(file=file, path=path)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def zarr_array(tmp_output_dir_func) -> Path:
|
||||
file = tmp_output_dir_func / "array.zarr"
|
||||
array = zarr.open(str(file), mode="w", shape=(100, 100), chunks=(10, 10))
|
||||
array[:] = 0
|
||||
return file
|
||||
|
|
|
@ -2,9 +2,10 @@ import pytest
|
|||
|
||||
import numpy as np
|
||||
import dask.array as da
|
||||
import zarr
|
||||
|
||||
from numpydantic import interface
|
||||
from tests.fixtures import hdf5_array
|
||||
from tests.fixtures import hdf5_array, zarr_nested_array, zarr_array
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
|
@ -14,8 +15,19 @@ from tests.fixtures import hdf5_array
|
|||
(np.zeros((3, 4)), interface.NumpyInterface),
|
||||
(hdf5_array, interface.H5Interface),
|
||||
(da.random.random((10, 10)), interface.DaskInterface),
|
||||
(zarr.ones((10, 10)), interface.ZarrInterface),
|
||||
(zarr_nested_array, interface.ZarrInterface),
|
||||
(zarr_array, interface.ZarrInterface),
|
||||
],
|
||||
ids=[
|
||||
"numpy_list",
|
||||
"numpy",
|
||||
"H5ArrayPath",
|
||||
"dask",
|
||||
"zarr_memory",
|
||||
"zarr_nested",
|
||||
"zarr_array",
|
||||
],
|
||||
ids=["numpy_list", "numpy", "H5ArrayPath", "dask"],
|
||||
)
|
||||
def interface_type(request):
|
||||
return request.param
|
||||
|
|
64
tests/test_interface/test_zarr.py
Normal file
64
tests/test_interface/test_zarr.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import pytest
|
||||
import zarr
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
||||
from numpydantic.interface import ZarrInterface
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def dir_array(tmp_output_dir_func) -> zarr.DirectoryStore:
|
||||
store = zarr.DirectoryStore(tmp_output_dir_func / "array.zarr")
|
||||
return store
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def zip_array(tmp_output_dir_func) -> zarr.ZipStore:
|
||||
store = zarr.ZipStore(tmp_output_dir_func / "array.zip", mode="w")
|
||||
return store
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def nested_dir_array(tmp_output_dir_func) -> zarr.NestedDirectoryStore:
|
||||
store = zarr.NestedDirectoryStore(tmp_output_dir_func / "nested")
|
||||
return store
|
||||
|
||||
|
||||
STORES = (
|
||||
dir_array,
|
||||
zip_array,
|
||||
)
|
||||
"""stores for single arrays"""
|
||||
|
||||
|
||||
def test_zarr_enabled():
|
||||
assert ZarrInterface.enabled()
|
||||
|
||||
|
||||
def test_zarr_check(interface_type):
|
||||
"""
|
||||
We should only use the zarr interface for zarr-like things
|
||||
"""
|
||||
if interface_type[1] is ZarrInterface:
|
||||
assert ZarrInterface.check(interface_type[0])
|
||||
else:
|
||||
assert not ZarrInterface.check(interface_type[0])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array,passes",
|
||||
[
|
||||
(zarr.zeros((5, 10)), True),
|
||||
(zarr.zeros((5, 10, 3)), True),
|
||||
(zarr.zeros((5, 10, 3, 4)), True),
|
||||
(zarr.zeros((5, 10, 4)), False),
|
||||
(zarr.zeros((5, 10, 3, 6)), False),
|
||||
(zarr.zeros((5, 10, 4, 6)), False),
|
||||
],
|
||||
)
|
||||
def test_zarr_shape(model_rgb, array, passes):
|
||||
if passes:
|
||||
model_rgb(array=array)
|
||||
else:
|
||||
with pytest.raises(ValidationError):
|
||||
model_rgb(array=array)
|
Loading…
Reference in a new issue