not quite working zarr implementation

This commit is contained in:
sneakers-the-rat 2024-04-29 19:49:38 -07:00
parent a345cc6504
commit d884055067
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 297 additions and 7 deletions

View file

@ -36,6 +36,7 @@ intersphinx_mapping = {
"linkml-runtime": ("https://linkml.io/linkml/", None), "linkml-runtime": ("https://linkml.io/linkml/", None),
"dask": ("https://docs.dask.org/en/stable/", None), "dask": ("https://docs.dask.org/en/stable/", None),
"h5py": ("https://docs.h5py.org/en/stable/", None), "h5py": ("https://docs.h5py.org/en/stable/", None),
"zarr": ("https://zarr.readthedocs.io/en/stable/", None),
} }
# -- Options for HTML output ------------------------------------------------- # -- Options for HTML output -------------------------------------------------

View file

@ -5,7 +5,7 @@
groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests"] groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests"]
strategy = ["cross_platform", "inherit_metadata"] strategy = ["cross_platform", "inherit_metadata"]
lock_version = "4.4.1" lock_version = "4.4.1"
content_hash = "sha256:870d3111512c0bccf768ad2c06acb01e0bd9e3091f8544bca2bcf609eea02102" content_hash = "sha256:4e22ffd83cb1ae3916c6c41c77f74b84db5a77e572c796cc537023bd6c3e3128"
[[package]] [[package]]
name = "alabaster" name = "alabaster"
@ -46,6 +46,15 @@ files = [
{file = "anyio-4.3.0.tar.gz", hash = "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6"}, {file = "anyio-4.3.0.tar.gz", hash = "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6"},
] ]
[[package]]
name = "asciitree"
version = "0.3.3"
summary = "Draws ASCII trees."
groups = ["default"]
files = [
{file = "asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e"},
]
[[package]] [[package]]
name = "autodoc-pydantic" name = "autodoc-pydantic"
version = "2.1.0" version = "2.1.0"
@ -395,6 +404,18 @@ files = [
{file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
] ]
[[package]]
name = "fasteners"
version = "0.19"
requires_python = ">=3.6"
summary = "A python package that provides useful locks"
groups = ["default"]
marker = "sys_platform != \"emscripten\""
files = [
{file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"},
{file = "fasteners-0.19.tar.gz", hash = "sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c"},
]
[[package]] [[package]]
name = "fsspec" name = "fsspec"
version = "2024.3.1" version = "2024.3.1"
@ -692,6 +713,35 @@ files = [
{file = "nptyping-2.5.0.tar.gz", hash = "sha256:e3d35b53af967e6fb407c3016ff9abae954d3a0568f7cc13a461084224e8e20a"}, {file = "nptyping-2.5.0.tar.gz", hash = "sha256:e3d35b53af967e6fb407c3016ff9abae954d3a0568f7cc13a461084224e8e20a"},
] ]
[[package]]
name = "numcodecs"
version = "0.12.1"
requires_python = ">=3.8"
summary = "A Python package providing buffer compression and transformation codecs for use in data storage and communication applications."
groups = ["default"]
dependencies = [
"numpy>=1.7",
]
files = [
{file = "numcodecs-0.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d37f628fe92b3699e65831d5733feca74d2e33b50ef29118ffd41c13c677210e"},
{file = "numcodecs-0.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:941b7446b68cf79f089bcfe92edaa3b154533dcbcd82474f994b28f2eedb1c60"},
{file = "numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e79bf9d1d37199ac00a60ff3adb64757523291d19d03116832e600cac391c51"},
{file = "numcodecs-0.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:82d7107f80f9307235cb7e74719292d101c7ea1e393fe628817f0d635b7384f5"},
{file = "numcodecs-0.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eeaf42768910f1c6eebf6c1bb00160728e62c9343df9e2e315dc9fe12e3f6071"},
{file = "numcodecs-0.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:135b2d47563f7b9dc5ee6ce3d1b81b0f1397f69309e909f1a35bb0f7c553d45e"},
{file = "numcodecs-0.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a191a8e347ecd016e5c357f2bf41fbcb026f6ffe78fff50c77ab12e96701d155"},
{file = "numcodecs-0.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:21d8267bd4313f4d16f5b6287731d4c8ebdab236038f29ad1b0e93c9b2ca64ee"},
{file = "numcodecs-0.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2f84df6b8693206365a5b37c005bfa9d1be486122bde683a7b6446af4b75d862"},
{file = "numcodecs-0.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:760627780a8b6afdb7f942f2a0ddaf4e31d3d7eea1d8498cf0fd3204a33c4618"},
{file = "numcodecs-0.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c258bd1d3dfa75a9b708540d23b2da43d63607f9df76dfa0309a7597d1de3b73"},
{file = "numcodecs-0.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:e04649ea504aff858dbe294631f098fbfd671baf58bfc04fc48d746554c05d67"},
{file = "numcodecs-0.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2fbb12a6a1abe95926f25c65e283762d63a9bf9e43c0de2c6a1a798347dfcb40"},
{file = "numcodecs-0.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f2207871868b2464dc11c513965fd99b958a9d7cde2629be7b2dc84fdaab013b"},
{file = "numcodecs-0.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abff3554a6892a89aacf7b642a044e4535499edf07aeae2f2e6e8fc08c9ba07f"},
{file = "numcodecs-0.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:ef964d4860d3e6b38df0633caf3e51dc850a6293fd8e93240473642681d95136"},
{file = "numcodecs-0.12.1.tar.gz", hash = "sha256:05d91a433733e7eef268d7e80ec226a0232da244289614a8f3826901aec1098e"},
]
[[package]] [[package]]
name = "numpy" name = "numpy"
version = "1.26.4" version = "1.26.4"
@ -1459,6 +1509,23 @@ files = [
{file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"}, {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
] ]
[[package]]
name = "zarr"
version = "2.17.2"
requires_python = ">=3.9"
summary = "An implementation of chunked, compressed, N-dimensional arrays for Python"
groups = ["default"]
dependencies = [
"asciitree",
"fasteners; sys_platform != \"emscripten\"",
"numcodecs>=0.10.0",
"numpy>=1.23",
]
files = [
{file = "zarr-2.17.2-py3-none-any.whl", hash = "sha256:70d7cc07c24280c380ef80644151d136b7503b0d83c9f214e8000ddc0f57f69b"},
{file = "zarr-2.17.2.tar.gz", hash = "sha256:2cbaa6cb4e342d45152d4a7a4b2013c337fcd3a8e7bc98253560180de60552ce"},
]
[[package]] [[package]]
name = "zipp" name = "zipp"
version = "3.18.1" version = "3.18.1"

View file

@ -22,8 +22,11 @@ dask = [
hdf5 = [ hdf5 = [
"h5py>=3.10.0" "h5py>=3.10.0"
] ]
zarr = [
"zarr>=2.17.2",
]
arrays = [ arrays = [
"numpydantic[dask,hdf5]" "numpydantic[dask,hdf5,zarr]"
] ]
tests = [ tests = [
"numpydantic[arrays]", "numpydantic[arrays]",

View file

@ -6,5 +6,12 @@ from numpydantic.interface.dask import DaskInterface
from numpydantic.interface.hdf5 import H5Interface from numpydantic.interface.hdf5 import H5Interface
from numpydantic.interface.interface import Interface from numpydantic.interface.interface import Interface
from numpydantic.interface.numpy import NumpyInterface from numpydantic.interface.numpy import NumpyInterface
from numpydantic.interface.zarr import ZarrInterface
__all__ = ["Interface", "DaskInterface", "H5Interface", "NumpyInterface"] __all__ = [
"Interface",
"DaskInterface",
"H5Interface",
"NumpyInterface",
"ZarrInterface",
]

View file

@ -1,5 +1,121 @@
""" """
Interface to zarr arrays Interface to zarr arrays
(Not Implemented)
""" """
import contextlib
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional, Union, Sequence
from numpydantic.interface.interface import Interface
try:
from zarr.core import Array as ZarrArray
from zarr.storage import StoreLike
import zarr
except ImportError:
ZarrArray = None
StoreLike = None
storage = None
@dataclass
class ZarrArrayPath:
"""
Map to an array within a zarr store.
See :func:`zarr.open`
"""
file: Union[Path, str]
"""Location of Zarr store file or directory"""
path: Optional[str] = None
"""Path to array within hierarchical zarr store"""
def open(self, **kwargs) -> ZarrArray:
return zarr.open(str(self.file), path=self.path, **kwargs)
@classmethod
def from_iterable(cls, spec: Sequence) -> "ZarrArrayPath":
if len(spec) == 1:
return ZarrArrayPath(file=spec[0])
elif len(spec) == 2:
return ZarrArrayPath(file=spec[0], path=spec[1])
else:
raise ValueError("Only len 1-2 iterables can be used for a ZarrArrayPath")
class ZarrInterface(Interface):
"""
Interface to in-memory or on-disk zarr arrays
"""
input_types = (Path, ZarrArray, ZarrArrayPath)
return_type = ZarrArray
@classmethod
def enabled(cls) -> bool:
"""True if zarr is installed"""
return ZarrArray is not None
@staticmethod
def _get_array(
array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> ZarrArray:
if isinstance(array, ZarrArray):
return array
if isinstance(array, (str, Path)):
array = ZarrArrayPath(file=array)
elif isinstance(array, (tuple, list)):
array = ZarrArrayPath.from_iterable(array)
return array.open(mode="a")
@classmethod
def check(cls, array: Any) -> bool:
"""
Check if array is in-memory zarr array,
a path to a zarr array, or a :class:`.ZarrArrayPath`
"""
if isinstance(array, ZarrArray):
return True
# See if can be coerced to ZarrArrayPath
if isinstance(array, (Path, str)):
array = ZarrArrayPath(file=array)
if isinstance(array, (tuple, list)):
# something that can be coerced to ZarrArrayPath
with contextlib.suppress(ValueError):
array = ZarrArrayPath.from_iterable(array)
if isinstance(array, ZarrArrayPath):
with contextlib.suppress(Exception):
arr = array.open(mode="r")
if isinstance(arr, ZarrArray):
return True
return False
def before_validation(
self, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> ZarrArray:
"""
Ensure that the zarr array is opened
"""
return self._get_array(array)
@classmethod
def to_json(
cls, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> dict:
"""
Dump just the metadata for an array from :meth:`zarr.core.Array.info_items`
plus the :meth:`zarr.core.Array.hexdigest`
"""
array = cls._get_array(array)
info = array.info_items()
info_dict = {i[0]: i[1] for i in info}
info_dict["hexdigest"] = array.hexdigest()
return info_dict

View file

@ -7,8 +7,10 @@ import numpy as np
import pytest import pytest
from nptyping import Number from nptyping import Number
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
import zarr
from numpydantic.interface.hdf5 import H5ArrayPath from numpydantic.interface.hdf5 import H5ArrayPath
from numpydantic.interface.zarr import ZarrArrayPath
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from numpydantic.maps import python_to_nptyping from numpydantic.maps import python_to_nptyping
@ -105,3 +107,21 @@ def hdf5_array(
return H5ArrayPath(Path(hdf5_file.filename), array_path) return H5ArrayPath(Path(hdf5_file.filename), array_path)
return _hdf5_array return _hdf5_array
@pytest.fixture(scope="function")
def zarr_nested_array(tmp_output_dir_func) -> ZarrArrayPath:
"""Zarr array within a nested array"""
file = tmp_output_dir_func / "nested.zarr"
path = "a/b/c"
root = zarr.open(str(file), mode="w")
array = root.zeros(path, shape=(100, 100), chunks=(10, 10))
return ZarrArrayPath(file=file, path=path)
@pytest.fixture(scope="function")
def zarr_array(tmp_output_dir_func) -> Path:
file = tmp_output_dir_func / "array.zarr"
array = zarr.open(str(file), mode="w", shape=(100, 100), chunks=(10, 10))
array[:] = 0
return file

View file

@ -2,9 +2,10 @@ import pytest
import numpy as np import numpy as np
import dask.array as da import dask.array as da
import zarr
from numpydantic import interface from numpydantic import interface
from tests.fixtures import hdf5_array from tests.fixtures import hdf5_array, zarr_nested_array, zarr_array
@pytest.fixture( @pytest.fixture(
@ -14,8 +15,19 @@ from tests.fixtures import hdf5_array
(np.zeros((3, 4)), interface.NumpyInterface), (np.zeros((3, 4)), interface.NumpyInterface),
(hdf5_array, interface.H5Interface), (hdf5_array, interface.H5Interface),
(da.random.random((10, 10)), interface.DaskInterface), (da.random.random((10, 10)), interface.DaskInterface),
(zarr.ones((10, 10)), interface.ZarrInterface),
(zarr_nested_array, interface.ZarrInterface),
(zarr_array, interface.ZarrInterface),
],
ids=[
"numpy_list",
"numpy",
"H5ArrayPath",
"dask",
"zarr_memory",
"zarr_nested",
"zarr_array",
], ],
ids=["numpy_list", "numpy", "H5ArrayPath", "dask"],
) )
def interface_type(request): def interface_type(request):
return request.param return request.param

View file

@ -0,0 +1,64 @@
import pytest
import zarr
from pydantic import ValidationError
from numpydantic.interface import ZarrInterface
@pytest.fixture()
def dir_array(tmp_output_dir_func) -> zarr.DirectoryStore:
store = zarr.DirectoryStore(tmp_output_dir_func / "array.zarr")
return store
@pytest.fixture()
def zip_array(tmp_output_dir_func) -> zarr.ZipStore:
store = zarr.ZipStore(tmp_output_dir_func / "array.zip", mode="w")
return store
@pytest.fixture()
def nested_dir_array(tmp_output_dir_func) -> zarr.NestedDirectoryStore:
store = zarr.NestedDirectoryStore(tmp_output_dir_func / "nested")
return store
STORES = (
dir_array,
zip_array,
)
"""stores for single arrays"""
def test_zarr_enabled():
assert ZarrInterface.enabled()
def test_zarr_check(interface_type):
"""
We should only use the zarr interface for zarr-like things
"""
if interface_type[1] is ZarrInterface:
assert ZarrInterface.check(interface_type[0])
else:
assert not ZarrInterface.check(interface_type[0])
@pytest.mark.parametrize(
"array,passes",
[
(zarr.zeros((5, 10)), True),
(zarr.zeros((5, 10, 3)), True),
(zarr.zeros((5, 10, 3, 4)), True),
(zarr.zeros((5, 10, 4)), False),
(zarr.zeros((5, 10, 3, 6)), False),
(zarr.zeros((5, 10, 4, 6)), False),
],
)
def test_zarr_shape(model_rgb, array, passes):
if passes:
model_rgb(array=array)
else:
with pytest.raises(ValidationError):
model_rgb(array=array)