mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2024-11-10 00:34:29 +00:00
docs! add recursive any shaped arrays!
This commit is contained in:
parent
ce74a0482a
commit
a4d82f0879
9 changed files with 378 additions and 54 deletions
|
@ -9,5 +9,6 @@
|
||||||
dask
|
dask
|
||||||
hdf5
|
hdf5
|
||||||
numpy
|
numpy
|
||||||
|
video
|
||||||
zarr
|
zarr
|
||||||
```
|
```
|
6
docs/api/interface/video.md
Normal file
6
docs/api/interface/video.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# Video
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: numpydantic.interface.video
|
||||||
|
:members:
|
||||||
|
```
|
|
@ -6,10 +6,12 @@
|
||||||
# -- Project information -----------------------------------------------------
|
# -- Project information -----------------------------------------------------
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||||
|
|
||||||
|
import importlib.metadata as metadata
|
||||||
|
|
||||||
project = "numpydantic"
|
project = "numpydantic"
|
||||||
copyright = "2024, Jonny Saunders"
|
copyright = "2024, Jonny Saunders"
|
||||||
author = "Jonny Saunders"
|
author = "Jonny Saunders"
|
||||||
release = "v0.0.0"
|
release = metadata.version("numpydantic")
|
||||||
|
|
||||||
# -- General configuration ---------------------------------------------------
|
# -- General configuration ---------------------------------------------------
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||||
|
|
284
docs/index.md
284
docs/index.md
|
@ -25,12 +25,12 @@ or implement `__get_pydantic_core_schema__` on your type to fully support it.
|
||||||
And setting `arbitrary_types_allowed = True` still prohibits you from
|
And setting `arbitrary_types_allowed = True` still prohibits you from
|
||||||
generating JSON Schema, serialization to JSON
|
generating JSON Schema, serialization to JSON
|
||||||
|
|
||||||
|
|
||||||
## Features:
|
## Features:
|
||||||
- **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping))
|
- **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping))
|
||||||
for specifying arrays in pydantic models
|
for specifying arrays in pydantic models
|
||||||
- **Validation** - Shape, dtype, and other array validations
|
- **Validation** - Shape, dtype, and other array validations
|
||||||
- **Interfaces** - Works with {mod}`~.interface.numpy`, {mod}`~.interface.dask`, {mod}`~.interface.hdf5`, {mod}`~.interface.zarr`,
|
- **Interfaces** - Works with {mod}`~.interface.numpy`, {mod}`~.interface.dask`, {mod}`~.interface.hdf5`,
|
||||||
|
{mod}`~.interface.video`, and {mod}`~.interface.zarr`,
|
||||||
and a simple extension system to make it work with whatever else you want!
|
and a simple extension system to make it work with whatever else you want!
|
||||||
- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
||||||
recreate the model in the native format
|
recreate the model in the native format
|
||||||
|
@ -47,6 +47,26 @@ Coming soon:
|
||||||
minimum and maximum precision ranges, and so on as type maps provided by interface classes :)
|
minimum and maximum precision ranges, and so on as type maps provided by interface classes :)
|
||||||
- (see [todo](./todo.md))
|
- (see [todo](./todo.md))
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
numpydantic tries to keep dependencies minimal, so by default it only comes with
|
||||||
|
dependencies to use the numpy interface. Add the extra relevant to your favorite
|
||||||
|
array library to be able to use it!
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install numpydantic
|
||||||
|
# dask
|
||||||
|
pip install 'numpydantic[dask]'
|
||||||
|
# hdf5
|
||||||
|
pip install 'numpydantic[hdf5]'
|
||||||
|
# video
|
||||||
|
pip install 'numpydantic[video]'
|
||||||
|
# zarr
|
||||||
|
pip install 'numpydantic[zarr]'
|
||||||
|
# all array formats
|
||||||
|
pip intsall 'numpydantic[array]'
|
||||||
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Specify an array using [nptyping syntax](https://github.com/ramonhagenaars/nptyping/blob/master/USERDOCS.md)
|
Specify an array using [nptyping syntax](https://github.com/ramonhagenaars/nptyping/blob/master/USERDOCS.md)
|
||||||
|
@ -55,7 +75,10 @@ and use it with your favorite array library :)
|
||||||
Use the {class}`~numpydantic.NDArray` class like you would any other python type,
|
Use the {class}`~numpydantic.NDArray` class like you would any other python type,
|
||||||
combine it with {class}`typing.Union`, make it {class}`~typing.Optional`, etc.
|
combine it with {class}`typing.Union`, make it {class}`~typing.Optional`, etc.
|
||||||
|
|
||||||
For example, to support a
|
For example, to specify a very special type of image that can either be
|
||||||
|
- a 2D float array where the axes can be any size, or
|
||||||
|
- a 3D uint8 array where the third axis must be size 3
|
||||||
|
- a 1080p video
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
@ -65,43 +88,36 @@ import numpy as np
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
|
|
||||||
class Image(BaseModel):
|
class Image(BaseModel):
|
||||||
"""
|
|
||||||
Images: grayscale, RGB, RGBA, and videos too!
|
|
||||||
"""
|
|
||||||
array: Union[
|
array: Union[
|
||||||
NDArray[Shape["* x, * y"], np.uint8],
|
NDArray[Shape["* x, * y"], float],
|
||||||
NDArray[Shape["* x, * y, 3 rgb"], np.uint8],
|
NDArray[Shape["* x, * y, 3 rgb"], np.uint8],
|
||||||
NDArray[Shape["* t, * x, * y, 4 rgba"], np.float64]
|
NDArray[Shape["* t, 1080 y, 1920 x, 3 rgb"], np.uint8]
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
And then use that as a transparent interface to your favorite array library!
|
And then use that as a transparent interface to your favorite array library!
|
||||||
|
|
||||||
### Numpy
|
### Interfaces
|
||||||
|
|
||||||
|
#### Numpy
|
||||||
|
|
||||||
The Coca-Cola of array libraries
|
The Coca-Cola of array libraries
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# works
|
# works
|
||||||
frame_gray = Image(array=np.ones((1280, 720), dtype=np.uint8))
|
frame_gray = Image(array=np.ones((1280, 720), dtype=float))
|
||||||
frame_rgb = Image(array=np.ones((1280, 720, 3), dtype=np.uint8))
|
frame_rgb = Image(array=np.ones((1280, 720, 3), dtype=np.uint8))
|
||||||
frame_rgba = Image(array=np.ones((1280, 720, 4), dtype=np.uint8))
|
|
||||||
video_rgb = Image(array=np.ones((100, 1280, 720, 3), dtype=np.uint8))
|
|
||||||
|
|
||||||
# fails
|
# fails
|
||||||
wrong_n_dimensions = Image(array=np.ones((1280,), dtype=np.uint8))
|
wrong_n_dimensions = Image(array=np.ones((1280,), dtype=float))
|
||||||
wrong_shape = Image(array=np.ones((1280,720,10), dtype=np.uint8))
|
wrong_shape = Image(array=np.ones((1280,720,10), dtype=np.uint8))
|
||||||
wrong_type = Image(array=np.ones((1280,720,3), dtype=np.float64))
|
|
||||||
|
|
||||||
# shapes and types are checked together, so..
|
# shapes and types are checked together, so this also fails
|
||||||
# this works
|
wrong_shape_dtype_combo = Image(array=np.ones((1280, 720, 3), dtype=float))
|
||||||
float_video = Image(array=np.ones((100, 1280, 720, 4), dtype=float))
|
|
||||||
# this doesn't
|
|
||||||
wrong_shape_float_video = Image(array=np.ones((100, 1280, 720, 3), dtype=float))
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Dask
|
#### Dask
|
||||||
|
|
||||||
High performance chunked arrays! The backend for many new array libraries!
|
High performance chunked arrays! The backend for many new array libraries!
|
||||||
|
|
||||||
|
@ -110,14 +126,12 @@ Works exactly the same as numpy arrays
|
||||||
```python
|
```python
|
||||||
import dask.array as da
|
import dask.array as da
|
||||||
|
|
||||||
# validate a huge video
|
# validate a humongous image without having to load it into memory
|
||||||
video_array = da.zeros(shape=(1920,1080,1000000,3), dtype=np.uint8)
|
video_array = da.zeros(shape=(1e10,1e20,3), dtype=np.uint8)
|
||||||
|
|
||||||
# this works
|
|
||||||
dask_video = Image(array=video_array)
|
dask_video = Image(array=video_array)
|
||||||
```
|
```
|
||||||
|
|
||||||
### HDF5
|
#### HDF5
|
||||||
|
|
||||||
Array work increasingly can't fit on memory, but dealing with arrays on disk
|
Array work increasingly can't fit on memory, but dealing with arrays on disk
|
||||||
can become a pain in concurrent applications. Numpydantic allows you to
|
can become a pain in concurrent applications. Numpydantic allows you to
|
||||||
|
@ -136,7 +150,7 @@ array_path = "/nested/array"
|
||||||
|
|
||||||
# make an HDF5 array
|
# make an HDF5 array
|
||||||
h5f = h5py.File(h5f_file, "w")
|
h5f = h5py.File(h5f_file, "w")
|
||||||
array = np.random.random((1920,1080,3)).astype(np.uint8)
|
array = np.random.randint(0, 255, (1920,1080,3), np.uint8)
|
||||||
h5f.create_dataset(array_path, data=array)
|
h5f.create_dataset(array_path, data=array)
|
||||||
h5f.close()
|
h5f.close()
|
||||||
```
|
```
|
||||||
|
@ -172,17 +186,229 @@ object and leave the file open between calls:
|
||||||
>>> h5f_image.array.close()
|
>>> h5f_image.array.close()
|
||||||
```
|
```
|
||||||
|
|
||||||
### Zarr
|
#### Video
|
||||||
|
|
||||||
|
Videos are just arrays with fancy encoding! Numpydantic can validate shape and dtype
|
||||||
|
as well as lazy load chunks of frames with arraylike syntax!
|
||||||
|
|
||||||
|
Say we have some video `data.mp4` ...
|
||||||
|
|
||||||
|
```python
|
||||||
|
video = Image(array='data.mp4')
|
||||||
|
# get a single frame
|
||||||
|
video.array[5]
|
||||||
|
# or a range of frames!
|
||||||
|
video.array[5:10]
|
||||||
|
# or whatever slicing you want to do!
|
||||||
|
video.array[5:50:5, 0:10, 50:70]
|
||||||
|
```
|
||||||
|
|
||||||
|
As elsewhere, a proxy class is a transparent pass-through interface to the underlying
|
||||||
|
opencv class, so we can get the rest of the video properties ...
|
||||||
|
|
||||||
|
```python
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
# get the total frames from opencv
|
||||||
|
video.array.get(cv2.CAP_PROP_FRAME_COUNT)
|
||||||
|
# the proxy class also provides a convenience property
|
||||||
|
video.array.n_frames
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Zarr
|
||||||
|
|
||||||
Zarr works similarly!
|
Zarr works similarly!
|
||||||
|
|
||||||
Use it with any of Zarr's backends: Nested, Zipfile, S3, it's all the same!
|
Use it with any of Zarr's backends: Nested, Zipfile, S3, it's all the same!
|
||||||
|
|
||||||
```{todo}
|
Eg. create a nested zarr array on disk and use it...
|
||||||
Add the zarr examples!
|
|
||||||
|
```python
|
||||||
|
import zarr
|
||||||
|
from numpydantic.interface.zarr import ZarrArrayPath
|
||||||
|
|
||||||
|
array_file = 'data/array.zarr'
|
||||||
|
nested_path = 'data/sets/here'
|
||||||
|
|
||||||
|
root = zarr.open(array_file, mode='w')
|
||||||
|
nested_array = root.zeros(
|
||||||
|
nested_path,
|
||||||
|
shape=(1000, 1080, 1920, 3),
|
||||||
|
dtype=np.uint8
|
||||||
|
)
|
||||||
|
|
||||||
|
# validates just fine!
|
||||||
|
zarr_video = Image(array=ZarrArrayPath(array_file, nested_path))
|
||||||
|
# or just pass a tuple, the interface can discover it's a zarr array
|
||||||
|
zarr_video = Image(array=(array_file, nested_path))
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### JSON Schema
|
||||||
|
|
||||||
|
Numpydantic generates JSON Schema for all its array specifications, so for the above
|
||||||
|
model, we get a schema for each of the possible array types that properly handles
|
||||||
|
the shape and dtype constraints and includes the origin numpy type as a `dtype` annotation.
|
||||||
|
|
||||||
|
```python
|
||||||
|
Image.model_json_schema()
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"array": {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"items": {"items": {"type": "number"}, "type": "array"},
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dtype": "numpy.uint8",
|
||||||
|
"items": {
|
||||||
|
"items": {
|
||||||
|
"items": {
|
||||||
|
"maximum": 255,
|
||||||
|
"minimum": 0,
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"maxItems": 3,
|
||||||
|
"minItems": 3,
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dtype": "numpy.uint8",
|
||||||
|
"items": {
|
||||||
|
"items": {
|
||||||
|
"items": {
|
||||||
|
"items": {
|
||||||
|
"maximum": 255,
|
||||||
|
"minimum": 0,
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"maxItems": 3,
|
||||||
|
"minItems": 3,
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
"maxItems": 1920,
|
||||||
|
"minItems": 1920,
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
"maxItems": 1080,
|
||||||
|
"minItems": 1080,
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
"type": "array"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Array"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["array"],
|
||||||
|
"title": "Image",
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
numpydantic can even handle shapes with unbounded numbers of dimensions by using
|
||||||
|
recursive JSON schema!!!
|
||||||
|
|
||||||
|
So the any-shaped array (using nptyping's ellipsis notation):
|
||||||
|
|
||||||
|
```python
|
||||||
|
class AnyShape(BaseModel):
|
||||||
|
array: NDArray[Shape["*, ..."], np.uint8]
|
||||||
|
```
|
||||||
|
|
||||||
|
is rendered to JSON-Schema like this:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"$defs": {
|
||||||
|
"any-shape-array-9b5d89838a990d79": {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/$defs/any-shape-array-9b5d89838a990d79"
|
||||||
|
},
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{"maximum": 255, "minimum": 0, "type": "integer"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"array": {
|
||||||
|
"dtype": "numpy.uint8",
|
||||||
|
"items": {"$ref": "#/$defs/any-shape-array-9b5d89838a990d79"},
|
||||||
|
"title": "Array",
|
||||||
|
"type": "array"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["array"],
|
||||||
|
"title": "AnyShape",
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
where the key `"any-shape-array-9b5d89838a990d79"` uses a (blake2b) hash of the
|
||||||
|
inner dtype specification so that having multiple any-shaped arrays in a single
|
||||||
|
model schema are deduplicated without conflicts.
|
||||||
|
|
||||||
|
### Dumping
|
||||||
|
|
||||||
|
One of the main reasons to use chunked array libraries like zarr is to avoid
|
||||||
|
needing to load the entire array into memory. When dumping data to JSON, numpydantic
|
||||||
|
tries to mirror this behavior, by default only dumping the metadata that is
|
||||||
|
necessary to identify the array.
|
||||||
|
|
||||||
|
For example, with zarr:
|
||||||
|
|
||||||
|
```python
|
||||||
|
array = zarr.array([[1,2,3],[4,5,6],[7,8,9]], dtype=float)
|
||||||
|
instance = Image(array=array)
|
||||||
|
dumped = instance.model_dump_json()
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"array":
|
||||||
|
{
|
||||||
|
"Chunk shape": "(3, 3)",
|
||||||
|
"Chunks initialized": "1/1",
|
||||||
|
"Compressor": "Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)",
|
||||||
|
"Data type": "float64",
|
||||||
|
"No. bytes": "72",
|
||||||
|
"No. bytes stored": "421",
|
||||||
|
"Order": "C",
|
||||||
|
"Read-only": "False",
|
||||||
|
"Shape": "(3, 3)",
|
||||||
|
"Storage ratio": "0.2",
|
||||||
|
"Store type": "zarr.storage.KVStore",
|
||||||
|
"Type": "zarr.core.Array",
|
||||||
|
"hexdigest": "c51604eace325fe42bbebf39146c0956bd2ed13c"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
To print the whole array, we use pydantic's serialization contexts:
|
||||||
|
|
||||||
|
```python
|
||||||
|
dumped = instance.model_dump_json(context={'zarr_dump_array': True})
|
||||||
|
```
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"array":
|
||||||
|
{
|
||||||
|
"same thing,": "except also...",
|
||||||
|
"array": [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
||||||
|
"hexdigest": "c51604eace325fe42bbebf39146c0956bd2ed13c"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -22,12 +22,12 @@ dask = [
|
||||||
hdf5 = [
|
hdf5 = [
|
||||||
"h5py>=3.10.0"
|
"h5py>=3.10.0"
|
||||||
]
|
]
|
||||||
zarr = [
|
|
||||||
"zarr>=2.17.2",
|
|
||||||
]
|
|
||||||
video = [
|
video = [
|
||||||
"opencv-python>=4.9.0.80",
|
"opencv-python>=4.9.0.80",
|
||||||
]
|
]
|
||||||
|
zarr = [
|
||||||
|
"zarr>=2.17.2",
|
||||||
|
]
|
||||||
arrays = [
|
arrays = [
|
||||||
"numpydantic[dask,hdf5,zarr,video]"
|
"numpydantic[dask,hdf5,zarr,video]"
|
||||||
]
|
]
|
||||||
|
|
|
@ -125,12 +125,29 @@ class VideoProxy:
|
||||||
raise ValueError(f"Could not get frame {frame}")
|
raise ValueError(f"Could not get frame {frame}")
|
||||||
return frame
|
return frame
|
||||||
|
|
||||||
|
def _complete_slice(self, slice_: slice) -> slice:
|
||||||
|
"""Get a fully-built slice that can be passed to range"""
|
||||||
|
if slice_.step is None:
|
||||||
|
slice_ = slice(slice_.start, slice_.stop, 1)
|
||||||
|
if slice_.stop is None:
|
||||||
|
slice_ = slice(slice_.start, self.n_frames, slice_.step)
|
||||||
|
if slice_.start is None:
|
||||||
|
slice_ = slice(0, slice_.stop, slice_.step)
|
||||||
|
return slice_
|
||||||
|
|
||||||
def __getitem__(self, item: Union[int, slice, tuple]) -> np.ndarray:
|
def __getitem__(self, item: Union[int, slice, tuple]) -> np.ndarray:
|
||||||
if isinstance(item, int):
|
if isinstance(item, int):
|
||||||
# want a single frame
|
# want a single frame
|
||||||
return self._get_frame(item)
|
return self._get_frame(item)
|
||||||
|
elif isinstance(item, slice):
|
||||||
|
# slice of frames
|
||||||
|
item = self._complete_slice(item)
|
||||||
|
frames = []
|
||||||
|
for i in range(item.start, item.stop, item.step):
|
||||||
|
frames.append(self._get_frame(i))
|
||||||
|
return np.stack(frames)
|
||||||
else:
|
else:
|
||||||
# slices are passes as tuples
|
# slices are passed as tuples
|
||||||
# first arg needs to be handled specially
|
# first arg needs to be handled specially
|
||||||
if isinstance(item[0], int):
|
if isinstance(item[0], int):
|
||||||
# single frame
|
# single frame
|
||||||
|
@ -142,13 +159,7 @@ class VideoProxy:
|
||||||
elif isinstance(item[0], slice):
|
elif isinstance(item[0], slice):
|
||||||
frames = []
|
frames = []
|
||||||
# make a new slice since range cant take Nones, filling in missing vals
|
# make a new slice since range cant take Nones, filling in missing vals
|
||||||
fslice = item[0]
|
fslice = self._complete_slice(item[0])
|
||||||
if fslice.step is None:
|
|
||||||
fslice = slice(fslice.start, fslice.stop, 1)
|
|
||||||
if fslice.stop is None:
|
|
||||||
fslice = slice(fslice.start, self.n_frames, fslice.step)
|
|
||||||
if fslice.start is None:
|
|
||||||
fslice = slice(0, fslice.stop, fslice.step)
|
|
||||||
|
|
||||||
for i in range(fslice.start, fslice.stop, fslice.step):
|
for i in range(fslice.start, fslice.stop, fslice.step):
|
||||||
frames.append(self._get_frame(i))
|
frames.append(self._get_frame(i))
|
||||||
|
|
|
@ -3,7 +3,9 @@ Helper functions for use with :class:`~numpydantic.NDArray` - see the note in
|
||||||
:mod:`~numpydantic.ndarray` for why these are separated.
|
:mod:`~numpydantic.ndarray` for why these are separated.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Any, Callable, Union
|
import hashlib
|
||||||
|
import json
|
||||||
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import nptyping.structure
|
import nptyping.structure
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -124,6 +126,8 @@ def list_of_lists_schema(shape: Shape, array_type: CoreSchema) -> ListSchema:
|
||||||
# make the current level list schema, accounting for shape
|
# make the current level list schema, accounting for shape
|
||||||
if arg == "*":
|
if arg == "*":
|
||||||
list_schema = core_schema.list_schema(inner_schema, metadata=metadata)
|
list_schema = core_schema.list_schema(inner_schema, metadata=metadata)
|
||||||
|
elif arg == "...":
|
||||||
|
list_schema = _unbounded_shape(inner_schema, metadata=metadata)
|
||||||
else:
|
else:
|
||||||
arg = int(arg)
|
arg = int(arg)
|
||||||
list_schema = core_schema.list_schema(
|
list_schema = core_schema.list_schema(
|
||||||
|
@ -132,6 +136,50 @@ def list_of_lists_schema(shape: Shape, array_type: CoreSchema) -> ListSchema:
|
||||||
return list_schema
|
return list_schema
|
||||||
|
|
||||||
|
|
||||||
|
def _hash_schema(schema: CoreSchema) -> str:
|
||||||
|
"""
|
||||||
|
Make a hex-encoded 8-byte blake2b hash from a pydantic core schema.
|
||||||
|
Collisions are really not important or likely here, but we do want the same schema
|
||||||
|
to produce the same hash.
|
||||||
|
"""
|
||||||
|
schema_str = json.dumps(
|
||||||
|
schema, sort_keys=True, indent=None, separators=(",", ":")
|
||||||
|
).encode("utf-8")
|
||||||
|
hasher = hashlib.blake2b(digest_size=8)
|
||||||
|
hasher.update(schema_str)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _unbounded_shape(
|
||||||
|
inner_type: CoreSchema, metadata: Optional[dict] = None
|
||||||
|
) -> core_schema.DefinitionsSchema:
|
||||||
|
"""
|
||||||
|
Make a recursive schema that refers to itself using a hashed version of the inner
|
||||||
|
type
|
||||||
|
"""
|
||||||
|
|
||||||
|
schema_hash = _hash_schema(inner_type)
|
||||||
|
array_ref = f"any-shape-array-{schema_hash}"
|
||||||
|
|
||||||
|
schema = core_schema.definitions_schema(
|
||||||
|
core_schema.list_schema(
|
||||||
|
core_schema.definition_reference_schema(array_ref), metadata=metadata
|
||||||
|
),
|
||||||
|
[
|
||||||
|
core_schema.union_schema(
|
||||||
|
[
|
||||||
|
core_schema.list_schema(
|
||||||
|
core_schema.definition_reference_schema(array_ref)
|
||||||
|
),
|
||||||
|
inner_type,
|
||||||
|
],
|
||||||
|
ref=array_ref,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return schema
|
||||||
|
|
||||||
|
|
||||||
def make_json_schema(
|
def make_json_schema(
|
||||||
shape: ShapeType, dtype: DtypeType, _handler: _handler_type
|
shape: ShapeType, dtype: DtypeType, _handler: _handler_type
|
||||||
) -> ListSchema:
|
) -> ListSchema:
|
||||||
|
@ -154,7 +202,8 @@ def make_json_schema(
|
||||||
|
|
||||||
# get the names of the shape constraints, if any
|
# get the names of the shape constraints, if any
|
||||||
if shape is Any:
|
if shape is Any:
|
||||||
list_schema = core_schema.list_schema(core_schema.any_schema())
|
list_schema = _unbounded_shape(dtype_schema)
|
||||||
|
# list_schema = core_schema.list_schema(core_schema.any_schema())
|
||||||
else:
|
else:
|
||||||
list_schema = list_of_lists_schema(shape, dtype_schema)
|
list_schema = list_of_lists_schema(shape, dtype_schema)
|
||||||
|
|
||||||
|
|
|
@ -122,6 +122,12 @@ def test_video_getitem(avi_video):
|
||||||
assert single_slice.shape == (10, 5, 3)
|
assert single_slice.shape == (10, 5, 3)
|
||||||
|
|
||||||
# also get a range of frames
|
# also get a range of frames
|
||||||
|
# range without further slices
|
||||||
|
range_slice = instance.array[3:5]
|
||||||
|
assert range_slice.shape == (2, 100, 50, 3)
|
||||||
|
assert range_slice[0, 3, 3, 0] == 3
|
||||||
|
assert range_slice[0, 4, 4, 0] == 0
|
||||||
|
|
||||||
# full range
|
# full range
|
||||||
range_slice = instance.array[3:5, 0:10, 0:5]
|
range_slice = instance.array[3:5, 0:10, 0:5]
|
||||||
assert range_slice.shape == (2, 10, 5, 3)
|
assert range_slice.shape == (2, 10, 5, 3)
|
||||||
|
|
|
@ -14,9 +14,6 @@ from numpydantic.exceptions import ShapeError, DtypeError
|
||||||
from numpydantic import dtype
|
from numpydantic import dtype
|
||||||
|
|
||||||
|
|
||||||
# from .fixtures import tmp_output_dir_func
|
|
||||||
|
|
||||||
|
|
||||||
def test_ndarray_type():
|
def test_ndarray_type():
|
||||||
class Model(BaseModel):
|
class Model(BaseModel):
|
||||||
array: NDArray[Shape["2 x, * y"], Number]
|
array: NDArray[Shape["2 x, * y"], Number]
|
||||||
|
@ -186,17 +183,43 @@ def test_json_schema_dtype_builtin(dtype, expected, array_model):
|
||||||
assert inner_type["type"] == expected
|
assert inner_type["type"] == expected
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip("Not implemented yet")
|
def _recursive_array(schema):
|
||||||
def test_json_schema_wildcard():
|
assert "$defs" in schema
|
||||||
"""
|
# get the key uses for the array
|
||||||
NDarray types should generate a JSON schema without shape constraints
|
array_key = list(schema["$defs"].keys())[0]
|
||||||
"""
|
|
||||||
pass
|
# the array property should be a ref to the recursive array
|
||||||
|
# get the innermost part of the field schema
|
||||||
|
field_schema = schema["properties"]["array"]
|
||||||
|
while "items" in field_schema:
|
||||||
|
field_schema = field_schema["items"]
|
||||||
|
assert field_schema["$ref"] == f"#/$defs/{array_key}"
|
||||||
|
|
||||||
|
# and the recursive array should indeed be recursive...
|
||||||
|
# specifically it should be an array whose items can be itself or
|
||||||
|
# of the type specified by the dtype
|
||||||
|
any_of = schema["$defs"][array_key]["anyOf"]
|
||||||
|
assert any_of[0]["items"]["$ref"] == f"#/$defs/{array_key}"
|
||||||
|
assert any_of[0]["type"] == "array"
|
||||||
|
# here we are just assuming that it's a uint8 array..
|
||||||
|
assert any_of[1]["type"] == "integer"
|
||||||
|
assert any_of[1]["maximum"] == 255
|
||||||
|
assert any_of[1]["minimum"] == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip("Not implemented yet")
|
|
||||||
def test_json_schema_ellipsis():
|
def test_json_schema_ellipsis():
|
||||||
"""
|
"""
|
||||||
NDArray types should create a recursive JSON schema for any-shaped arrays
|
NDArray types should create a recursive JSON schema for any-shaped arrays
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
class AnyShape(BaseModel):
|
||||||
|
array: NDArray[Shape["*, ..."], np.uint8]
|
||||||
|
|
||||||
|
schema = AnyShape.model_json_schema()
|
||||||
|
_recursive_array(schema)
|
||||||
|
|
||||||
|
class ConstrainedAnyShape(BaseModel):
|
||||||
|
array: NDArray[Shape["3, 4, ..."], np.uint8]
|
||||||
|
|
||||||
|
schema = ConstrainedAnyShape.model_json_schema()
|
||||||
|
_recursive_array(schema)
|
||||||
|
|
Loading…
Reference in a new issue