diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 24c2ab8..d7f0b02 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -19,4 +19,5 @@ python: - method: pip path: . extra_requirements: - - docs \ No newline at end of file + - docs + - arrays \ No newline at end of file diff --git a/docs/api/dtype.md b/docs/api/dtype.md new file mode 100644 index 0000000..19bf54a --- /dev/null +++ b/docs/api/dtype.md @@ -0,0 +1 @@ +# DType \ No newline at end of file diff --git a/docs/api/interface/dask.md b/docs/api/interface/dask.md new file mode 100644 index 0000000..44525fb --- /dev/null +++ b/docs/api/interface/dask.md @@ -0,0 +1,6 @@ +# Dask + +```{eval-rst} +.. automodule:: numpydantic.interface.dask + :members: +``` \ No newline at end of file diff --git a/docs/api/interface/hdf5.md b/docs/api/interface/hdf5.md new file mode 100644 index 0000000..d16ef66 --- /dev/null +++ b/docs/api/interface/hdf5.md @@ -0,0 +1,6 @@ +# HDF5 + +```{eval-rst} +.. automodule:: numpydantic.interface.hdf5 + :members: +``` \ No newline at end of file diff --git a/docs/api/interface/index.md b/docs/api/interface/index.md new file mode 100644 index 0000000..7ade4fc --- /dev/null +++ b/docs/api/interface/index.md @@ -0,0 +1,13 @@ +# Interfaces + +```{eval-rst} +.. automodule:: numpydantic.interface.interface + :members: +``` + +```{toctree} +dask +hdf5 +numpy +zarr +``` \ No newline at end of file diff --git a/docs/api/interface/numpy.md b/docs/api/interface/numpy.md new file mode 100644 index 0000000..23399d9 --- /dev/null +++ b/docs/api/interface/numpy.md @@ -0,0 +1,6 @@ +# Numpy + +```{eval-rst} +.. automodule:: numpydantic.interface.numpy + :members: +``` \ No newline at end of file diff --git a/docs/api/interface/zarr.md b/docs/api/interface/zarr.md new file mode 100644 index 0000000..112b78f --- /dev/null +++ b/docs/api/interface/zarr.md @@ -0,0 +1,6 @@ +# Zarr + +```{eval-rst} +.. automodule:: numpydantic.interface.zarr + :members: +``` \ No newline at end of file diff --git a/docs/api/proxy.md b/docs/api/proxy.md deleted file mode 100644 index 13851a0..0000000 --- a/docs/api/proxy.md +++ /dev/null @@ -1,6 +0,0 @@ -# proxy - -```{eval-rst} -.. automodule:: numpydantic.proxy - :members: -``` \ No newline at end of file diff --git a/docs/api/types.md b/docs/api/types.md new file mode 100644 index 0000000..3b30843 --- /dev/null +++ b/docs/api/types.md @@ -0,0 +1 @@ +# Types \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index b4406a5..0ad2d1a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,6 +19,7 @@ extensions = [ "sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic", "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", "sphinx_design", "myst_parser", "sphinx.ext.todo", @@ -48,12 +49,7 @@ html_static_path = ["_static"] # autodoc autodoc_pydantic_model_show_json_error_strategy = "coerce" autodoc_pydantic_model_show_json = False -autodoc_mock_imports = [ - "dask", - "h5py", - "linkml", - "linkml-runtime", -] + autoclass_content = "both" autodoc_member_order = "bysource" add_module_names = False diff --git a/docs/index.md b/docs/index.md index 3d6bca0..f87a856 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,9 +1,166 @@ # numpydantic -Type and shape validation and serialization for numpy arrays in pydantic models +A python package for array types in pydantic. + +## Features: +- **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping)) + for specifying arrays in pydantic models +- **Validation** - Shape, dtype, and other array validations +- **Seralization** - JSON-Schema List-of-list schema generation +- **Interfaces** - Works with numpy, dask, HDF5, zarr, and a simple extension system to make it work with + whatever else you want! + +Coming soon: +- **Metadata** - This package was built to be used with [linkml arrays](https://linkml.io/linkml/schemas/arrays.html), + so we will be extending it to include any metadata included in the type annotation object in the JSON schema representation. +- (see [todo](./todo.md)) + +## Usage + +Specify an array using [nptyping syntax](https://github.com/ramonhagenaars/nptyping/blob/master/USERDOCS.md) +and use it with your favorite array library :) + +```{todo} +We will be moving away from using nptyping in v2.0.0. + +It was written for an older era in python before the dramatic changes in the Python +type system and is no longer actively maintained. We will be reimplementing a syntax +that extends its array specification syntax to include things like ranges and extensible +dtypes with varying precision (and is much less finnicky to deal with). +``` + +Use the {class}`~numpydantic.NDArray` class like you would any other python type, +combine it with {class}`typing.Union`, make it {class}`~typing.Optional`, etc. + +```python +from typing import Union +from pydantic import BaseModel +import numpy as np + +from numpydantic import NDArray, Shape + +class Image(BaseModel): + """ + Images: grayscale, RGB, RGBA, and videos too! + """ + array: Union[ + NDArray[Shape["* x, * y"], np.uint8], + NDArray[Shape["* x, * y, 3 rgb"], np.uint8], + NDArray[Shape["* x, * y, 4 rgba"], np.uint8], + NDArray[Shape["* t, * x, * y, 3 rgb"], np.uint8], + NDArray[Shape["* t, * x, * y, 4 rgba"], np.float64] + ] +``` + +And then use that as a transparent interface to your favorite array library! + +### Numpy + +The Coca-Cola of array libraries + +```python +import numpy as np +# works +frame_gray = Image(array=np.ones((1280, 720), dtype=np.uint8)) +frame_rgb = Image(array=np.ones((1280, 720, 3), dtype=np.uint8)) +frame_rgba = Image(array=np.ones((1280, 720, 4), dtype=np.uint8)) +video_rgb = Image(array=np.ones((100, 1280, 720, 3), dtype=np.uint8)) + +# fails +wrong_n_dimensions = Image(array=np.ones((1280,), dtype=np.uint8)) +wrong_shape = Image(array=np.ones((1280,720,10), dtype=np.uint8)) +wrong_type = Image(array=np.ones((1280,720,3), dtype=np.float64)) + +# shapes and types are checked together, so.. +# this works +float_video = Image(array=np.ones((100, 1280, 720, 4), dtype=float)) +# this doesn't +wrong_shape_float_video = Image(array=np.ones((100, 1280, 720, 3), dtype=float)) +``` + +### Dask + +High performance chunked arrays! The backend for many new array libraries! + +Works exactly the same as numpy arrays + +```python +import dask.array as da + +# validate a huge video +video_array = da.zeros(shape=(1920,1080,1000000,3), dtype=np.uint8) + +# this works +dask_video = Image(array=video_array) +``` + +### HDF5 + +Array work increasingly can't fit on memory, but dealing with arrays on disk +can become a pain in concurrent applications. Numpydantic allows you to +specify the location of an array within an hdf5 file on disk and use it just like +any other array! + +eg. Make an array on disk... + +```python +from pathlib import Path +import h5py +from numpydantic.interface.hdf5 import H5ArrayPath + +h5f_file = Path('my_file.h5') +array_path = "/nested/array" + +# make an HDF5 array +h5f = h5py.File(h5f_file, "w") +array = np.random.random((1920,1080,3)).astype(np.uint8) +h5f.create_dataset(array_path, data=array) +h5f.close() +``` + +Then use it in your model! numpydantic will only open the file as long as it's needed + +```python +>>> h5f_image = Image(array=H5ArrayPath(file=h5f_file, path=array_path)) +>>> h5f_image.array[0:5,0:5,0] +array([[0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], dtype=uint8) +>>> h5f_image.array[0:2,0:2,0] = 1 +>>> h5f_image.array[0:5,0:5,0] +array([[1, 1, 0, 0, 0], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], dtype=uint8) +``` + +Numpydantic tries to be a smart but transparent proxy, exposing the methods and attributes +of the source type even when we aren't directly using them, like when dealing with on-disk HDF5 arrays. + +If you want, you can take full control and directly interact with the underlying :class:`h5py.Dataset` +object and leave the file open between calls: + +```python +>>> dataset = h5f_image.array.open() +>>> # do some stuff that requires the datset to be held open +>>> h5f_image.array.close() +``` + +### Zarr + +Zarr works similarly! + +Use it with any of Zarr's backends: Nested, Zipfile, S3, it's all the same! + +```{todo} +Add the zarr examples! +``` + + -- **Provide types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping)) - for specifying numpy arrays in pydantic models, and ```{toctree} :maxdepth: 2 @@ -21,12 +178,13 @@ todo :caption: API :hidden: true +api/interface/index api/index +api/dtype api/ndarray -api/proxy -api/linkml/index api/maps api/monkeypatch +api/types ``` diff --git a/docs/linkml.md b/docs/linkml.md deleted file mode 100644 index df0f2b6..0000000 --- a/docs/linkml.md +++ /dev/null @@ -1,2 +0,0 @@ -# LinkML Generation -