mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2024-11-12 17:54:29 +00:00
restructuring docs, getting started on design but need 2 go home
This commit is contained in:
parent
927964d3ff
commit
5e3ad790d7
7 changed files with 37 additions and 144 deletions
|
@ -1 +1,7 @@
|
|||
# DType
|
||||
# dtype
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: numpydantic.dtype
|
||||
:members:
|
||||
:undoc-members:
|
||||
```
|
7
docs/api/schema.md
Normal file
7
docs/api/schema.md
Normal file
|
@ -0,0 +1,7 @@
|
|||
# schema
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: numpydantic.schema
|
||||
:members:
|
||||
:undoc-members:
|
||||
```
|
|
@ -1 +1,7 @@
|
|||
# Types
|
||||
# types
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: numpydantic.types
|
||||
:members:
|
||||
:undoc-members:
|
||||
```
|
|
@ -1,9 +1,18 @@
|
|||
# Overview
|
||||
# Design
|
||||
|
||||
## Why do this?
|
||||
|
||||
We want to bring the tidyness of modeling data with pydantic to the universe of
|
||||
software that uses arrays - particularly formats and packages that need to be very
|
||||
particular about what *kind* of arrays they are able to handle or match a specific schema.
|
||||
|
||||
## Challenges
|
||||
|
||||
The Python type annotation system is weird and not like the rest of Python!
|
||||
(at least until [PEP 0649](https://peps.python.org/pep-0649/) gets mainlined).
|
||||
Similarly, Pydantic 2's core_schema system is wonderful but still relatively poorly
|
||||
documented for custom types! This package does the work of plugging them in
|
||||
Similarly, Pydantic 2's core_schema system is wonderful but still has a few mysteries
|
||||
lurking under the documented surface.
|
||||
This package does the work of plugging them in
|
||||
together to make some kind of type validation frankenstein.
|
||||
|
||||
The first problem is that type annotations are evaluated statically by python, mypy,
|
|
@ -416,9 +416,8 @@ dumped = instance.model_dump_json(context={'zarr_dump_array': True})
|
|||
:caption: Contents
|
||||
:hidden: true
|
||||
|
||||
overview
|
||||
ndarray
|
||||
hooks
|
||||
design
|
||||
interfaces
|
||||
todo
|
||||
```
|
||||
|
||||
|
@ -427,12 +426,13 @@ todo
|
|||
:caption: API
|
||||
:hidden: true
|
||||
|
||||
api/interface/index
|
||||
api/index
|
||||
api/interface/index
|
||||
api/dtype
|
||||
api/ndarray
|
||||
api/maps
|
||||
api/monkeypatch
|
||||
api/schema
|
||||
api/types
|
||||
|
||||
```
|
||||
|
|
0
docs/interfaces.md
Normal file
0
docs/interfaces.md
Normal file
135
docs/ndarray.md
135
docs/ndarray.md
|
@ -1,135 +0,0 @@
|
|||
# Constrained Arrays
|
||||
|
||||
## Implementation details
|
||||
|
||||
```{todo}
|
||||
**Docs:**
|
||||
|
||||
Describe implementation details!
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Declaration
|
||||
|
||||
Type with a single {class}`~numpydantic.NDArray` class, or use a {class}`~typing.Union`
|
||||
to express more complex array constraints.
|
||||
|
||||
This package is effectively a Pydantic interface to [nptyping](https://github.com/ramonhagenaars/nptyping),
|
||||
so any array syntax is valid there. (see [TODO](todo) for caveats)
|
||||
|
||||
```python
|
||||
from typing import Union
|
||||
from pydantic import BaseModel
|
||||
from src.numpydantic import NDArray, Shape, UInt8, Float, Int
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
"""
|
||||
Data values. Data can be in 1-D, 2-D, 3-D, or 4-D. The first dimension should always represent time. This can also be used to store binary data (e.g., image frames). This can also be a link to data stored in an external file.
|
||||
"""
|
||||
array: Union[
|
||||
NDArray[Shape["* x, * y"], UInt8],
|
||||
NDArray[Shape["* x, * y, 3 rgb"], UInt8],
|
||||
NDArray[Shape["* x, * y, 4 rgba"], UInt8],
|
||||
NDArray[Shape["* t, * x, * y, 3 rgb"], UInt8],
|
||||
NDArray[Shape["* t, * x, * y, 4 rgba"], Float]
|
||||
]
|
||||
```
|
||||
|
||||
### Validation:
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
# works
|
||||
frame_gray = Image(array=np.ones((1280, 720), dtype=np.uint8))
|
||||
frame_rgb = Image(array=np.ones((1280, 720, 3), dtype=np.uint8))
|
||||
frame_rgba = Image(array=np.ones((1280, 720, 4), dtype=np.uint8))
|
||||
video_rgb = Image(array=np.ones((100, 1280, 720, 3), dtype=np.uint8))
|
||||
|
||||
# fails
|
||||
wrong_n_dimensions = Image(array=np.ones((1280,), dtype=np.uint8))
|
||||
wrong_shape = Image(array=np.ones((1280,720,10), dtype=np.uint8))
|
||||
wrong_type = Image(array=np.ones((1280,720,3), dtype=np.float64))
|
||||
|
||||
# shapes and types are checked together
|
||||
float_video = Image(array=np.ones((100, 1280, 720, 4),dtype=float))
|
||||
wrong_shape_float_video = Image(array=np.ones((100, 1280, 720, 3),dtype=float))
|
||||
```
|
||||
|
||||
### JSON schema generation:
|
||||
|
||||
```python
|
||||
class MyArray(BaseModel):
|
||||
array: NDArray[Shape["2 x, * y, 4 z"], Float]
|
||||
```
|
||||
|
||||
```python
|
||||
>>> print(json.dumps(MyArray.model_json_schema(), indent=2))
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"properties": {
|
||||
"array": {
|
||||
"items": {
|
||||
"items": {
|
||||
"items": {
|
||||
"type": "number"
|
||||
},
|
||||
"maxItems": 4,
|
||||
"minItems": 4,
|
||||
"type": "array"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"maxItems": 2,
|
||||
"minItems": 2,
|
||||
"title": "Array",
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"array"
|
||||
],
|
||||
"title": "MyArray",
|
||||
"type": "object"
|
||||
}
|
||||
```
|
||||
|
||||
### Serialization
|
||||
|
||||
```python
|
||||
class SmolArray(BaseModel):
|
||||
array: NDArray[Shape["2 x, 2 y"], Int]
|
||||
|
||||
class BigArray(BaseModel):
|
||||
array: NDArray[Shape["1000 x, 1000 y"], Int]
|
||||
```
|
||||
|
||||
Serialize small arrays as lists of lists, and big arrays as a b64-encoded blosc compressed string
|
||||
|
||||
```python
|
||||
>>> smol = SmolArray(array=np.array([[1,2],[3,4]], dtype=int))
|
||||
>>> big = BigArray(array=np.random.randint(0,255,(1000,1000),int))
|
||||
|
||||
>>> print(smol.model_dump_json())
|
||||
{"array":[[1,2],[3,4]]}
|
||||
>>> print(big.model_dump_json())
|
||||
{
|
||||
"array": "( long b64 encoded string )",
|
||||
"shape": [1000, 1000],
|
||||
"dtype": "int64",
|
||||
"unpack_fns": ["base64.b64decode", "blosc2.unpack_array2"],
|
||||
}
|
||||
```
|
||||
|
||||
## TODO
|
||||
|
||||
```{todo}
|
||||
Implement structured arrays
|
||||
```
|
||||
|
||||
```{todo}
|
||||
Implement pandas dataframe validation?
|
||||
```
|
Loading…
Reference in a new issue