mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-10 05:54:26 +00:00
restructuring docs, getting started on design but need 2 go home
This commit is contained in:
parent
927964d3ff
commit
5e3ad790d7
7 changed files with 37 additions and 144 deletions
|
@ -1 +1,7 @@
|
||||||
# DType
|
# dtype
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: numpydantic.dtype
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
```
|
7
docs/api/schema.md
Normal file
7
docs/api/schema.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# schema
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: numpydantic.schema
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
```
|
|
@ -1 +1,7 @@
|
||||||
# Types
|
# types
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: numpydantic.types
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
```
|
|
@ -1,9 +1,18 @@
|
||||||
# Overview
|
# Design
|
||||||
|
|
||||||
|
## Why do this?
|
||||||
|
|
||||||
|
We want to bring the tidyness of modeling data with pydantic to the universe of
|
||||||
|
software that uses arrays - particularly formats and packages that need to be very
|
||||||
|
particular about what *kind* of arrays they are able to handle or match a specific schema.
|
||||||
|
|
||||||
|
## Challenges
|
||||||
|
|
||||||
The Python type annotation system is weird and not like the rest of Python!
|
The Python type annotation system is weird and not like the rest of Python!
|
||||||
(at least until [PEP 0649](https://peps.python.org/pep-0649/) gets mainlined).
|
(at least until [PEP 0649](https://peps.python.org/pep-0649/) gets mainlined).
|
||||||
Similarly, Pydantic 2's core_schema system is wonderful but still relatively poorly
|
Similarly, Pydantic 2's core_schema system is wonderful but still has a few mysteries
|
||||||
documented for custom types! This package does the work of plugging them in
|
lurking under the documented surface.
|
||||||
|
This package does the work of plugging them in
|
||||||
together to make some kind of type validation frankenstein.
|
together to make some kind of type validation frankenstein.
|
||||||
|
|
||||||
The first problem is that type annotations are evaluated statically by python, mypy,
|
The first problem is that type annotations are evaluated statically by python, mypy,
|
|
@ -416,9 +416,8 @@ dumped = instance.model_dump_json(context={'zarr_dump_array': True})
|
||||||
:caption: Contents
|
:caption: Contents
|
||||||
:hidden: true
|
:hidden: true
|
||||||
|
|
||||||
overview
|
design
|
||||||
ndarray
|
interfaces
|
||||||
hooks
|
|
||||||
todo
|
todo
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -427,12 +426,13 @@ todo
|
||||||
:caption: API
|
:caption: API
|
||||||
:hidden: true
|
:hidden: true
|
||||||
|
|
||||||
api/interface/index
|
|
||||||
api/index
|
api/index
|
||||||
|
api/interface/index
|
||||||
api/dtype
|
api/dtype
|
||||||
api/ndarray
|
api/ndarray
|
||||||
api/maps
|
api/maps
|
||||||
api/monkeypatch
|
api/monkeypatch
|
||||||
|
api/schema
|
||||||
api/types
|
api/types
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
0
docs/interfaces.md
Normal file
0
docs/interfaces.md
Normal file
135
docs/ndarray.md
135
docs/ndarray.md
|
@ -1,135 +0,0 @@
|
||||||
# Constrained Arrays
|
|
||||||
|
|
||||||
## Implementation details
|
|
||||||
|
|
||||||
```{todo}
|
|
||||||
**Docs:**
|
|
||||||
|
|
||||||
Describe implementation details!
|
|
||||||
```
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
### Declaration
|
|
||||||
|
|
||||||
Type with a single {class}`~numpydantic.NDArray` class, or use a {class}`~typing.Union`
|
|
||||||
to express more complex array constraints.
|
|
||||||
|
|
||||||
This package is effectively a Pydantic interface to [nptyping](https://github.com/ramonhagenaars/nptyping),
|
|
||||||
so any array syntax is valid there. (see [TODO](todo) for caveats)
|
|
||||||
|
|
||||||
```python
|
|
||||||
from typing import Union
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from src.numpydantic import NDArray, Shape, UInt8, Float, Int
|
|
||||||
|
|
||||||
|
|
||||||
class Image(BaseModel):
|
|
||||||
"""
|
|
||||||
Data values. Data can be in 1-D, 2-D, 3-D, or 4-D. The first dimension should always represent time. This can also be used to store binary data (e.g., image frames). This can also be a link to data stored in an external file.
|
|
||||||
"""
|
|
||||||
array: Union[
|
|
||||||
NDArray[Shape["* x, * y"], UInt8],
|
|
||||||
NDArray[Shape["* x, * y, 3 rgb"], UInt8],
|
|
||||||
NDArray[Shape["* x, * y, 4 rgba"], UInt8],
|
|
||||||
NDArray[Shape["* t, * x, * y, 3 rgb"], UInt8],
|
|
||||||
NDArray[Shape["* t, * x, * y, 4 rgba"], Float]
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
### Validation:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import numpy as np
|
|
||||||
# works
|
|
||||||
frame_gray = Image(array=np.ones((1280, 720), dtype=np.uint8))
|
|
||||||
frame_rgb = Image(array=np.ones((1280, 720, 3), dtype=np.uint8))
|
|
||||||
frame_rgba = Image(array=np.ones((1280, 720, 4), dtype=np.uint8))
|
|
||||||
video_rgb = Image(array=np.ones((100, 1280, 720, 3), dtype=np.uint8))
|
|
||||||
|
|
||||||
# fails
|
|
||||||
wrong_n_dimensions = Image(array=np.ones((1280,), dtype=np.uint8))
|
|
||||||
wrong_shape = Image(array=np.ones((1280,720,10), dtype=np.uint8))
|
|
||||||
wrong_type = Image(array=np.ones((1280,720,3), dtype=np.float64))
|
|
||||||
|
|
||||||
# shapes and types are checked together
|
|
||||||
float_video = Image(array=np.ones((100, 1280, 720, 4),dtype=float))
|
|
||||||
wrong_shape_float_video = Image(array=np.ones((100, 1280, 720, 3),dtype=float))
|
|
||||||
```
|
|
||||||
|
|
||||||
### JSON schema generation:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class MyArray(BaseModel):
|
|
||||||
array: NDArray[Shape["2 x, * y, 4 z"], Float]
|
|
||||||
```
|
|
||||||
|
|
||||||
```python
|
|
||||||
>>> print(json.dumps(MyArray.model_json_schema(), indent=2))
|
|
||||||
```
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"array": {
|
|
||||||
"items": {
|
|
||||||
"items": {
|
|
||||||
"items": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"maxItems": 4,
|
|
||||||
"minItems": 4,
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
"maxItems": 2,
|
|
||||||
"minItems": 2,
|
|
||||||
"title": "Array",
|
|
||||||
"type": "array"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": [
|
|
||||||
"array"
|
|
||||||
],
|
|
||||||
"title": "MyArray",
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Serialization
|
|
||||||
|
|
||||||
```python
|
|
||||||
class SmolArray(BaseModel):
|
|
||||||
array: NDArray[Shape["2 x, 2 y"], Int]
|
|
||||||
|
|
||||||
class BigArray(BaseModel):
|
|
||||||
array: NDArray[Shape["1000 x, 1000 y"], Int]
|
|
||||||
```
|
|
||||||
|
|
||||||
Serialize small arrays as lists of lists, and big arrays as a b64-encoded blosc compressed string
|
|
||||||
|
|
||||||
```python
|
|
||||||
>>> smol = SmolArray(array=np.array([[1,2],[3,4]], dtype=int))
|
|
||||||
>>> big = BigArray(array=np.random.randint(0,255,(1000,1000),int))
|
|
||||||
|
|
||||||
>>> print(smol.model_dump_json())
|
|
||||||
{"array":[[1,2],[3,4]]}
|
|
||||||
>>> print(big.model_dump_json())
|
|
||||||
{
|
|
||||||
"array": "( long b64 encoded string )",
|
|
||||||
"shape": [1000, 1000],
|
|
||||||
"dtype": "int64",
|
|
||||||
"unpack_fns": ["base64.b64decode", "blosc2.unpack_array2"],
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## TODO
|
|
||||||
|
|
||||||
```{todo}
|
|
||||||
Implement structured arrays
|
|
||||||
```
|
|
||||||
|
|
||||||
```{todo}
|
|
||||||
Implement pandas dataframe validation?
|
|
||||||
```
|
|
Loading…
Reference in a new issue