mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-10 05:54:26 +00:00
continuing on docs, adding interfaces diagram
This commit is contained in:
parent
5e3ad790d7
commit
0937fd7c0d
6 changed files with 142 additions and 13 deletions
|
@ -24,6 +24,7 @@ extensions = [
|
||||||
"sphinx.ext.viewcode",
|
"sphinx.ext.viewcode",
|
||||||
"sphinx.ext.doctest",
|
"sphinx.ext.doctest",
|
||||||
"sphinx_design",
|
"sphinx_design",
|
||||||
|
"sphinxcontrib.mermaid",
|
||||||
"myst_parser",
|
"myst_parser",
|
||||||
"sphinx.ext.todo",
|
"sphinx.ext.todo",
|
||||||
]
|
]
|
||||||
|
|
|
@ -6,6 +6,10 @@ We want to bring the tidyness of modeling data with pydantic to the universe of
|
||||||
software that uses arrays - particularly formats and packages that need to be very
|
software that uses arrays - particularly formats and packages that need to be very
|
||||||
particular about what *kind* of arrays they are able to handle or match a specific schema.
|
particular about what *kind* of arrays they are able to handle or match a specific schema.
|
||||||
|
|
||||||
|
To support a new generation of data formats and data analysis libraries that can
|
||||||
|
model the *structure* of data independently from its *implementation,* we made
|
||||||
|
numpydantic as a bridge between abstract schemas and programmatic use.
|
||||||
|
|
||||||
## Challenges
|
## Challenges
|
||||||
|
|
||||||
The Python type annotation system is weird and not like the rest of Python!
|
The Python type annotation system is weird and not like the rest of Python!
|
||||||
|
@ -17,10 +21,26 @@ together to make some kind of type validation frankenstein.
|
||||||
|
|
||||||
The first problem is that type annotations are evaluated statically by python, mypy,
|
The first problem is that type annotations are evaluated statically by python, mypy,
|
||||||
etc. This means you can't use typical python syntax for declaring types - it has to
|
etc. This means you can't use typical python syntax for declaring types - it has to
|
||||||
be present at the time `__new__` is called, rather than `__init__`.
|
be present at the time `__new__` is called, rather than `__init__`. So
|
||||||
|
|
||||||
- pydantic schema
|
Different implementations of arrays behave differently! HDF5 files need to be carefully
|
||||||
- validation
|
opened and closed to avoid corruption, video files don't typically allow normal array
|
||||||
- serialization
|
slicing operations, and only some array libraries support lazy loading of arrays on disk.
|
||||||
- lazy loading
|
|
||||||
- compression
|
We can't anticipate all the possible array libraries that exist now or in the future,
|
||||||
|
so it has to be possible to extend support to them without needing to go through
|
||||||
|
a potentially lengthy contribution process.
|
||||||
|
|
||||||
|
## Strategy
|
||||||
|
|
||||||
|
Numpydantic uses {class}`~numpydantic.NDArray` as an abstract specification of
|
||||||
|
an array that uses one of several [interface](interfaces.md) classes to validate
|
||||||
|
and interact with an array. These interface classes will set the instance attribute
|
||||||
|
either as the passed array itself, or a transparent proxy class (eg.
|
||||||
|
{class}`~numpydantic.interface.hdf5.H5Proxy`) in the case that the native array format
|
||||||
|
doesn't support numpy-like array operations out of the box.
|
||||||
|
|
||||||
|
- type hinting
|
||||||
|
- nptyping syntax
|
||||||
|
- not trying to be an array library
|
||||||
|
- dtyping, mapping & schematization
|
|
@ -8,7 +8,9 @@ A python package for specifying, validating, and serializing arrays with arbitra
|
||||||
|
|
||||||
but ...
|
but ...
|
||||||
|
|
||||||
3) if you try and specify an array in pydantic, this happens:
|
3) Typical type annotations would only work for a single array library implementation
|
||||||
|
4) They wouldn't allow you to specify array shapes and dtypes, and
|
||||||
|
5) If you try and specify an array in pydantic, this happens:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from pydantic import BaseModel
|
>>> from pydantic import BaseModel
|
||||||
|
@ -22,8 +24,39 @@ Set `arbitrary_types_allowed=True` in the model_config to ignore this error
|
||||||
or implement `__get_pydantic_core_schema__` on your type to fully support it.
|
or implement `__get_pydantic_core_schema__` on your type to fully support it.
|
||||||
```
|
```
|
||||||
|
|
||||||
And setting `arbitrary_types_allowed = True` still prohibits you from
|
**Solution:**
|
||||||
generating JSON Schema, serialization to JSON
|
|
||||||
|
Numpydantic allows you to do this:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from numpydantic import NDArray, Shape
|
||||||
|
|
||||||
|
class MyModel(BaseModel):
|
||||||
|
array: NDArray[Shape["3 x, 4 y, * z"], int]
|
||||||
|
```
|
||||||
|
|
||||||
|
And use it with your favorite array library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import dask.array as da
|
||||||
|
import zarr
|
||||||
|
|
||||||
|
# numpy
|
||||||
|
model = MyModel(array=np.zeros((3, 4, 5), dtype=int))
|
||||||
|
# dask
|
||||||
|
model = MyModel(array=da.zeros((3, 4, 5), dtype=int))
|
||||||
|
# hdf5 datasets
|
||||||
|
model = MyModel(array=('data.h5', '/nested/dataset'))
|
||||||
|
# zarr arrays
|
||||||
|
model = MyModel(array=zarr.zeros((3,4,5), dtype=int))
|
||||||
|
model = MyModel(array='data.zarr')
|
||||||
|
model = MyModel(array=('data.zarr', '/nested/dataset'))
|
||||||
|
# video files
|
||||||
|
model = MyModel(array="data.mp4")
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Features:
|
## Features:
|
||||||
- **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping))
|
- **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping))
|
||||||
|
@ -31,7 +64,9 @@ generating JSON Schema, serialization to JSON
|
||||||
- **Validation** - Shape, dtype, and other array validations
|
- **Validation** - Shape, dtype, and other array validations
|
||||||
- **Interfaces** - Works with {mod}`~.interface.numpy`, {mod}`~.interface.dask`, {mod}`~.interface.hdf5`,
|
- **Interfaces** - Works with {mod}`~.interface.numpy`, {mod}`~.interface.dask`, {mod}`~.interface.hdf5`,
|
||||||
{mod}`~.interface.video`, and {mod}`~.interface.zarr`,
|
{mod}`~.interface.video`, and {mod}`~.interface.zarr`,
|
||||||
and a simple extension system to make it work with whatever else you want!
|
and a simple extension system to make it work with whatever else you want! Provides
|
||||||
|
a uniform and transparent interface so you can both use common indexing operations
|
||||||
|
and also access any special features of a given array library.
|
||||||
- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
||||||
recreate the model in the native format
|
recreate the model in the native format
|
||||||
- **Schema Generation** - Correct JSON Schema for arrays, complete with shape and dtype constraints, to
|
- **Schema Generation** - Correct JSON Schema for arrays, complete with shape and dtype constraints, to
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
# Interfaces
|
||||||
|
|
||||||
|
|
||||||
|
```{mermaid}
|
||||||
|
flowchart LR
|
||||||
|
classDef data fill:#2b8cee,color:#ffffff;
|
||||||
|
classDef X fill:transparent,border:none,color:#ff0000;
|
||||||
|
|
||||||
|
input
|
||||||
|
|
||||||
|
subgraph Interface
|
||||||
|
match
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Numpy
|
||||||
|
numpy_check["check"]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Dask
|
||||||
|
direction TB
|
||||||
|
|
||||||
|
dask_check["check"]
|
||||||
|
|
||||||
|
subgraph Validation
|
||||||
|
direction TB
|
||||||
|
|
||||||
|
before_validation --> validate_dtype
|
||||||
|
validate_dtype --> validate_shape
|
||||||
|
validate_shape --> after_validation
|
||||||
|
end
|
||||||
|
|
||||||
|
dask_check --> Validation
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Zarr
|
||||||
|
zarr_check["check"]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Model
|
||||||
|
output
|
||||||
|
end
|
||||||
|
|
||||||
|
zarr_x["X"]
|
||||||
|
numpy_x["X"]
|
||||||
|
|
||||||
|
input --> match
|
||||||
|
match --> numpy_check
|
||||||
|
match --> zarr_check
|
||||||
|
match --> Dask
|
||||||
|
zarr_check --> zarr_x
|
||||||
|
numpy_check --> numpy_x
|
||||||
|
|
||||||
|
Validation --> Model
|
||||||
|
|
||||||
|
class input data
|
||||||
|
class output data
|
||||||
|
class zarr_x X
|
||||||
|
class numpy_x X
|
||||||
|
```
|
15
pdm.lock
15
pdm.lock
|
@ -5,7 +5,7 @@
|
||||||
groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests", "video"]
|
groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests", "video"]
|
||||||
strategy = ["cross_platform", "inherit_metadata"]
|
strategy = ["cross_platform", "inherit_metadata"]
|
||||||
lock_version = "4.4.1"
|
lock_version = "4.4.1"
|
||||||
content_hash = "sha256:893fe47e35966aa6ed1564645326f6f67d1c64b984b5ea6f6b45f58b4fd732c2"
|
content_hash = "sha256:1f5280f8be86c071c2692fa04f9d885eba84895275404db27f04f317ac5e6f2b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alabaster"
|
name = "alabaster"
|
||||||
|
@ -792,7 +792,7 @@ name = "opencv-python"
|
||||||
version = "4.9.0.80"
|
version = "4.9.0.80"
|
||||||
requires_python = ">=3.6"
|
requires_python = ">=3.6"
|
||||||
summary = "Wrapper package for OpenCV python bindings."
|
summary = "Wrapper package for OpenCV python bindings."
|
||||||
groups = ["video"]
|
groups = ["arrays", "dev", "tests", "video"]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"numpy>=1.17.0; python_version >= \"3.7\"",
|
"numpy>=1.17.0; python_version >= \"3.7\"",
|
||||||
"numpy>=1.17.3; python_version >= \"3.8\"",
|
"numpy>=1.17.3; python_version >= \"3.8\"",
|
||||||
|
@ -1293,6 +1293,17 @@ files = [
|
||||||
{file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
|
{file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sphinxcontrib-mermaid"
|
||||||
|
version = "0.9.2"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Mermaid diagrams in yours Sphinx powered docs"
|
||||||
|
groups = ["dev", "docs"]
|
||||||
|
files = [
|
||||||
|
{file = "sphinxcontrib-mermaid-0.9.2.tar.gz", hash = "sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af"},
|
||||||
|
{file = "sphinxcontrib_mermaid-0.9.2-py3-none-any.whl", hash = "sha256:6795a72037ca55e65663d2a2c1a043d636dc3d30d418e56dd6087d1459d98a5d"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sphinxcontrib-qthelp"
|
name = "sphinxcontrib-qthelp"
|
||||||
version = "1.0.7"
|
version = "1.0.7"
|
||||||
|
|
|
@ -45,7 +45,9 @@ docs = [
|
||||||
"furo>=2024.1.29",
|
"furo>=2024.1.29",
|
||||||
"myst-parser<3.0.0,>=2.0.0",
|
"myst-parser<3.0.0,>=2.0.0",
|
||||||
"autodoc-pydantic<3.0.0,>=2.0.1",
|
"autodoc-pydantic<3.0.0,>=2.0.1",
|
||||||
"sphinx-design<1.0.0,>=0.5.0"]
|
"sphinx-design<1.0.0,>=0.5.0",
|
||||||
|
"sphinxcontrib-mermaid>=0.9.2",
|
||||||
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"numpydantic[tests,docs]",
|
"numpydantic[tests,docs]",
|
||||||
"sphinx-autobuild>=2021.3.14",
|
"sphinx-autobuild>=2021.3.14",
|
||||||
|
|
Loading…
Reference in a new issue