mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-09 21:44:27 +00:00
continuing on docs, adding interfaces diagram
This commit is contained in:
parent
5e3ad790d7
commit
0937fd7c0d
6 changed files with 142 additions and 13 deletions
|
@ -24,6 +24,7 @@ extensions = [
|
|||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.doctest",
|
||||
"sphinx_design",
|
||||
"sphinxcontrib.mermaid",
|
||||
"myst_parser",
|
||||
"sphinx.ext.todo",
|
||||
]
|
||||
|
|
|
@ -6,6 +6,10 @@ We want to bring the tidyness of modeling data with pydantic to the universe of
|
|||
software that uses arrays - particularly formats and packages that need to be very
|
||||
particular about what *kind* of arrays they are able to handle or match a specific schema.
|
||||
|
||||
To support a new generation of data formats and data analysis libraries that can
|
||||
model the *structure* of data independently from its *implementation,* we made
|
||||
numpydantic as a bridge between abstract schemas and programmatic use.
|
||||
|
||||
## Challenges
|
||||
|
||||
The Python type annotation system is weird and not like the rest of Python!
|
||||
|
@ -17,10 +21,26 @@ together to make some kind of type validation frankenstein.
|
|||
|
||||
The first problem is that type annotations are evaluated statically by python, mypy,
|
||||
etc. This means you can't use typical python syntax for declaring types - it has to
|
||||
be present at the time `__new__` is called, rather than `__init__`.
|
||||
be present at the time `__new__` is called, rather than `__init__`. So
|
||||
|
||||
- pydantic schema
|
||||
- validation
|
||||
- serialization
|
||||
- lazy loading
|
||||
- compression
|
||||
Different implementations of arrays behave differently! HDF5 files need to be carefully
|
||||
opened and closed to avoid corruption, video files don't typically allow normal array
|
||||
slicing operations, and only some array libraries support lazy loading of arrays on disk.
|
||||
|
||||
We can't anticipate all the possible array libraries that exist now or in the future,
|
||||
so it has to be possible to extend support to them without needing to go through
|
||||
a potentially lengthy contribution process.
|
||||
|
||||
## Strategy
|
||||
|
||||
Numpydantic uses {class}`~numpydantic.NDArray` as an abstract specification of
|
||||
an array that uses one of several [interface](interfaces.md) classes to validate
|
||||
and interact with an array. These interface classes will set the instance attribute
|
||||
either as the passed array itself, or a transparent proxy class (eg.
|
||||
{class}`~numpydantic.interface.hdf5.H5Proxy`) in the case that the native array format
|
||||
doesn't support numpy-like array operations out of the box.
|
||||
|
||||
- type hinting
|
||||
- nptyping syntax
|
||||
- not trying to be an array library
|
||||
- dtyping, mapping & schematization
|
|
@ -8,7 +8,9 @@ A python package for specifying, validating, and serializing arrays with arbitra
|
|||
|
||||
but ...
|
||||
|
||||
3) if you try and specify an array in pydantic, this happens:
|
||||
3) Typical type annotations would only work for a single array library implementation
|
||||
4) They wouldn't allow you to specify array shapes and dtypes, and
|
||||
5) If you try and specify an array in pydantic, this happens:
|
||||
|
||||
```python
|
||||
>>> from pydantic import BaseModel
|
||||
|
@ -22,8 +24,39 @@ Set `arbitrary_types_allowed=True` in the model_config to ignore this error
|
|||
or implement `__get_pydantic_core_schema__` on your type to fully support it.
|
||||
```
|
||||
|
||||
And setting `arbitrary_types_allowed = True` still prohibits you from
|
||||
generating JSON Schema, serialization to JSON
|
||||
**Solution:**
|
||||
|
||||
Numpydantic allows you to do this:
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from numpydantic import NDArray, Shape
|
||||
|
||||
class MyModel(BaseModel):
|
||||
array: NDArray[Shape["3 x, 4 y, * z"], int]
|
||||
```
|
||||
|
||||
And use it with your favorite array library:
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
import dask.array as da
|
||||
import zarr
|
||||
|
||||
# numpy
|
||||
model = MyModel(array=np.zeros((3, 4, 5), dtype=int))
|
||||
# dask
|
||||
model = MyModel(array=da.zeros((3, 4, 5), dtype=int))
|
||||
# hdf5 datasets
|
||||
model = MyModel(array=('data.h5', '/nested/dataset'))
|
||||
# zarr arrays
|
||||
model = MyModel(array=zarr.zeros((3,4,5), dtype=int))
|
||||
model = MyModel(array='data.zarr')
|
||||
model = MyModel(array=('data.zarr', '/nested/dataset'))
|
||||
# video files
|
||||
model = MyModel(array="data.mp4")
|
||||
```
|
||||
|
||||
|
||||
## Features:
|
||||
- **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping))
|
||||
|
@ -31,7 +64,9 @@ generating JSON Schema, serialization to JSON
|
|||
- **Validation** - Shape, dtype, and other array validations
|
||||
- **Interfaces** - Works with {mod}`~.interface.numpy`, {mod}`~.interface.dask`, {mod}`~.interface.hdf5`,
|
||||
{mod}`~.interface.video`, and {mod}`~.interface.zarr`,
|
||||
and a simple extension system to make it work with whatever else you want!
|
||||
and a simple extension system to make it work with whatever else you want! Provides
|
||||
a uniform and transparent interface so you can both use common indexing operations
|
||||
and also access any special features of a given array library.
|
||||
- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
||||
recreate the model in the native format
|
||||
- **Schema Generation** - Correct JSON Schema for arrays, complete with shape and dtype constraints, to
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# Interfaces
|
||||
|
||||
|
||||
```{mermaid}
|
||||
flowchart LR
|
||||
classDef data fill:#2b8cee,color:#ffffff;
|
||||
classDef X fill:transparent,border:none,color:#ff0000;
|
||||
|
||||
input
|
||||
|
||||
subgraph Interface
|
||||
match
|
||||
end
|
||||
|
||||
subgraph Numpy
|
||||
numpy_check["check"]
|
||||
end
|
||||
|
||||
subgraph Dask
|
||||
direction TB
|
||||
|
||||
dask_check["check"]
|
||||
|
||||
subgraph Validation
|
||||
direction TB
|
||||
|
||||
before_validation --> validate_dtype
|
||||
validate_dtype --> validate_shape
|
||||
validate_shape --> after_validation
|
||||
end
|
||||
|
||||
dask_check --> Validation
|
||||
|
||||
end
|
||||
|
||||
subgraph Zarr
|
||||
zarr_check["check"]
|
||||
end
|
||||
|
||||
subgraph Model
|
||||
output
|
||||
end
|
||||
|
||||
zarr_x["X"]
|
||||
numpy_x["X"]
|
||||
|
||||
input --> match
|
||||
match --> numpy_check
|
||||
match --> zarr_check
|
||||
match --> Dask
|
||||
zarr_check --> zarr_x
|
||||
numpy_check --> numpy_x
|
||||
|
||||
Validation --> Model
|
||||
|
||||
class input data
|
||||
class output data
|
||||
class zarr_x X
|
||||
class numpy_x X
|
||||
```
|
15
pdm.lock
15
pdm.lock
|
@ -5,7 +5,7 @@
|
|||
groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests", "video"]
|
||||
strategy = ["cross_platform", "inherit_metadata"]
|
||||
lock_version = "4.4.1"
|
||||
content_hash = "sha256:893fe47e35966aa6ed1564645326f6f67d1c64b984b5ea6f6b45f58b4fd732c2"
|
||||
content_hash = "sha256:1f5280f8be86c071c2692fa04f9d885eba84895275404db27f04f317ac5e6f2b"
|
||||
|
||||
[[package]]
|
||||
name = "alabaster"
|
||||
|
@ -792,7 +792,7 @@ name = "opencv-python"
|
|||
version = "4.9.0.80"
|
||||
requires_python = ">=3.6"
|
||||
summary = "Wrapper package for OpenCV python bindings."
|
||||
groups = ["video"]
|
||||
groups = ["arrays", "dev", "tests", "video"]
|
||||
dependencies = [
|
||||
"numpy>=1.17.0; python_version >= \"3.7\"",
|
||||
"numpy>=1.17.3; python_version >= \"3.8\"",
|
||||
|
@ -1293,6 +1293,17 @@ files = [
|
|||
{file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sphinxcontrib-mermaid"
|
||||
version = "0.9.2"
|
||||
requires_python = ">=3.7"
|
||||
summary = "Mermaid diagrams in yours Sphinx powered docs"
|
||||
groups = ["dev", "docs"]
|
||||
files = [
|
||||
{file = "sphinxcontrib-mermaid-0.9.2.tar.gz", hash = "sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af"},
|
||||
{file = "sphinxcontrib_mermaid-0.9.2-py3-none-any.whl", hash = "sha256:6795a72037ca55e65663d2a2c1a043d636dc3d30d418e56dd6087d1459d98a5d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sphinxcontrib-qthelp"
|
||||
version = "1.0.7"
|
||||
|
|
|
@ -45,7 +45,9 @@ docs = [
|
|||
"furo>=2024.1.29",
|
||||
"myst-parser<3.0.0,>=2.0.0",
|
||||
"autodoc-pydantic<3.0.0,>=2.0.1",
|
||||
"sphinx-design<1.0.0,>=0.5.0"]
|
||||
"sphinx-design<1.0.0,>=0.5.0",
|
||||
"sphinxcontrib-mermaid>=0.9.2",
|
||||
]
|
||||
dev = [
|
||||
"numpydantic[tests,docs]",
|
||||
"sphinx-autobuild>=2021.3.14",
|
||||
|
|
Loading…
Reference in a new issue