From e2231cc9f0ed0f31886a511b7f5b8e4f2445db5e Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 20 May 2024 19:17:46 -0700 Subject: [PATCH] video interface! --- docs/conf.py | 1 + docs/index.md | 55 +++++-- docs/todo.md | 11 ++ pdm.lock | 41 +++++- pyproject.toml | 6 +- src/numpydantic/interface/__init__.py | 2 + src/numpydantic/interface/video.py | 205 ++++++++++++++++++++++++++ tests/test_interface/test_video.py | 186 +++++++++++++++++++++++ 8 files changed, 483 insertions(+), 24 deletions(-) create mode 100644 src/numpydantic/interface/video.py create mode 100644 tests/test_interface/test_video.py diff --git a/docs/conf.py b/docs/conf.py index 0ad2d1a..c31d0a7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,7 @@ extensions = [ "sphinxcontrib.autodoc_pydantic", "sphinx.ext.intersphinx", "sphinx.ext.viewcode", + "sphinx.ext.doctest", "sphinx_design", "myst_parser", "sphinx.ext.todo", diff --git a/docs/index.md b/docs/index.md index f87a856..a79eee2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,18 +1,50 @@ # numpydantic -A python package for array types in pydantic. +A python package for specifying, validating, and serializing arrays with arbitrary backends in pydantic. + +**Problem:** +1) Pydantic is great for modeling data. +2) Arrays are one of a few elemental types in computing, + +but ... + +3) if you try and specify an array in pydantic, this happens: + +```python +>>> from pydantic import BaseModel +>>> import numpy as np + +>>> class MyModel(BaseModel): +>>> array: np.ndarray +pydantic.errors.PydanticSchemaGenerationError: +Unable to generate pydantic-core schema for . +Set `arbitrary_types_allowed=True` in the model_config to ignore this error +or implement `__get_pydantic_core_schema__` on your type to fully support it. +``` + +And setting `arbitrary_types_allowed = True` still prohibits you from +generating JSON Schema, serialization to JSON + ## Features: - **Types** - Annotations (based on [npytyping](https://github.com/ramonhagenaars/nptyping)) for specifying arrays in pydantic models - **Validation** - Shape, dtype, and other array validations -- **Seralization** - JSON-Schema List-of-list schema generation -- **Interfaces** - Works with numpy, dask, HDF5, zarr, and a simple extension system to make it work with - whatever else you want! +- **Interfaces** - Works with {mod}`~.interface.numpy`, {mod}`~.interface.dask`, {mod}`~.interface.hdf5`, {mod}`~.interface.zarr`, + and a simple extension system to make it work with whatever else you want! +- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to + recreate the model in the native format +- **Schema Generation** - Correct JSON Schema for arrays, complete with shape and dtype constraints, to + make your models interoperable Coming soon: - **Metadata** - This package was built to be used with [linkml arrays](https://linkml.io/linkml/schemas/arrays.html), - so we will be extending it to include any metadata included in the type annotation object in the JSON schema representation. + so we will be extending it to include arbitrary metadata included in the type annotation object in the JSON schema representation. +- **Extensible Specification** - for v1, we are implementing the existing nptyping syntax, but + for v2 we will be updating that to an extensible specification syntax to allow interfaces to validate additional + constraints like chunk sizes, as well as make array specifications more introspectable and friendly to runtime usage. +- **Advanced dtype handling** - handling dtypes that only exist in some array backends, allowing + minimum and maximum precision ranges, and so on as type maps provided by interface classes :) - (see [todo](./todo.md)) ## Usage @@ -20,18 +52,11 @@ Coming soon: Specify an array using [nptyping syntax](https://github.com/ramonhagenaars/nptyping/blob/master/USERDOCS.md) and use it with your favorite array library :) -```{todo} -We will be moving away from using nptyping in v2.0.0. - -It was written for an older era in python before the dramatic changes in the Python -type system and is no longer actively maintained. We will be reimplementing a syntax -that extends its array specification syntax to include things like ranges and extensible -dtypes with varying precision (and is much less finnicky to deal with). -``` - Use the {class}`~numpydantic.NDArray` class like you would any other python type, combine it with {class}`typing.Union`, make it {class}`~typing.Optional`, etc. +For example, to support a + ```python from typing import Union from pydantic import BaseModel @@ -46,8 +71,6 @@ class Image(BaseModel): array: Union[ NDArray[Shape["* x, * y"], np.uint8], NDArray[Shape["* x, * y, 3 rgb"], np.uint8], - NDArray[Shape["* x, * y, 4 rgba"], np.uint8], - NDArray[Shape["* t, * x, * y, 3 rgb"], np.uint8], NDArray[Shape["* t, * x, * y, 4 rgba"], np.float64] ] ``` diff --git a/docs/todo.md b/docs/todo.md index a790241..86d9c28 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -1,5 +1,16 @@ # TODO +## Syntax + +```{todo} +We will be moving away from using nptyping in v2.0.0. + +It was written for an older era in python before the dramatic changes in the Python +type system and is no longer actively maintained. We will be reimplementing a syntax +that extends its array specification syntax to include things like ranges and extensible +dtypes with varying precision (and is much less finnicky to deal with). +``` + ## Validation ```{todo} diff --git a/pdm.lock b/pdm.lock index ce47f8a..1678d2b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests"] +groups = ["default", "arrays", "dask", "dev", "docs", "hdf5", "tests", "video"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:4e22ffd83cb1ae3916c6c41c77f74b84db5a77e572c796cc537023bd6c3e3128" +content_hash = "sha256:893fe47e35966aa6ed1564645326f6f67d1c64b984b5ea6f6b45f58b4fd732c2" [[package]] name = "alabaster" @@ -50,7 +50,7 @@ files = [ name = "asciitree" version = "0.3.3" summary = "Draws ASCII trees." -groups = ["default"] +groups = ["arrays", "dev", "tests"] files = [ {file = "asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e"}, ] @@ -409,7 +409,7 @@ name = "fasteners" version = "0.19" requires_python = ">=3.6" summary = "A python package that provides useful locks" -groups = ["default"] +groups = ["arrays", "dev", "tests"] marker = "sys_platform != \"emscripten\"" files = [ {file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"}, @@ -718,7 +718,7 @@ name = "numcodecs" version = "0.12.1" requires_python = ">=3.8" summary = "A Python package providing buffer compression and transformation codecs for use in data storage and communication applications." -groups = ["default"] +groups = ["arrays", "dev", "tests"] dependencies = [ "numpy>=1.7", ] @@ -747,7 +747,7 @@ name = "numpy" version = "1.26.4" requires_python = ">=3.9" summary = "Fundamental package for array computing in Python" -groups = ["arrays", "default", "dev", "hdf5", "tests"] +groups = ["arrays", "default", "dev", "hdf5", "tests", "video"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -787,6 +787,33 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] +[[package]] +name = "opencv-python" +version = "4.9.0.80" +requires_python = ">=3.6" +summary = "Wrapper package for OpenCV python bindings." +groups = ["video"] +dependencies = [ + "numpy>=1.17.0; python_version >= \"3.7\"", + "numpy>=1.17.3; python_version >= \"3.8\"", + "numpy>=1.19.3; python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\"", + "numpy>=1.19.3; python_version >= \"3.9\"", + "numpy>=1.21.0; python_version <= \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\"", + "numpy>=1.21.2; python_version >= \"3.10\"", + "numpy>=1.21.4; python_version >= \"3.10\" and platform_system == \"Darwin\"", + "numpy>=1.23.5; python_version >= \"3.11\"", + "numpy>=1.26.0; python_version >= \"3.12\"", +] +files = [ + {file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"}, +] + [[package]] name = "packaging" version = "24.0" @@ -1514,7 +1541,7 @@ name = "zarr" version = "2.17.2" requires_python = ">=3.9" summary = "An implementation of chunked, compressed, N-dimensional arrays for Python" -groups = ["default"] +groups = ["arrays", "dev", "tests"] dependencies = [ "asciitree", "fasteners; sys_platform != \"emscripten\"", diff --git a/pyproject.toml b/pyproject.toml index 1be5f48..a0fefe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,11 @@ hdf5 = [ zarr = [ "zarr>=2.17.2", ] +video = [ + "opencv-python>=4.9.0.80", +] arrays = [ - "numpydantic[dask,hdf5,zarr]" + "numpydantic[dask,hdf5,zarr,video]" ] tests = [ "numpydantic[arrays]", @@ -50,6 +53,7 @@ dev = [ "ruff<1.0.0,>=0.2.0" ] + [tool.pdm] distribution = true diff --git a/src/numpydantic/interface/__init__.py b/src/numpydantic/interface/__init__.py index 2a98d8b..0a0c490 100644 --- a/src/numpydantic/interface/__init__.py +++ b/src/numpydantic/interface/__init__.py @@ -6,6 +6,7 @@ from numpydantic.interface.dask import DaskInterface from numpydantic.interface.hdf5 import H5Interface from numpydantic.interface.interface import Interface from numpydantic.interface.numpy import NumpyInterface +from numpydantic.interface.video import VideoInterface from numpydantic.interface.zarr import ZarrInterface __all__ = [ @@ -13,5 +14,6 @@ __all__ = [ "DaskInterface", "H5Interface", "NumpyInterface", + "VideoInterface", "ZarrInterface", ] diff --git a/src/numpydantic/interface/video.py b/src/numpydantic/interface/video.py new file mode 100644 index 0000000..f2092c7 --- /dev/null +++ b/src/numpydantic/interface/video.py @@ -0,0 +1,205 @@ +""" +Interface to support treating videos like arrays using OpenCV +""" + +import pdb +from pathlib import Path +from typing import Any, Optional, Tuple, Union + +import numpy as np + +from numpydantic.interface.interface import Interface + +try: + import cv2 + from cv2 import VideoCapture +except ImportError: + cv2 = None + VideoCapture = None + +VIDEO_EXTENSIONS = (".mp4", ".avi", ".mov", ".mkv") + + +class VideoProxy: + """ + Passthrough proxy class to interact with videos as arrays + """ + + def __init__( + self, path: Optional[Path] = None, video: Optional[VideoCapture] = None + ): + if path is None and video is None: + raise ValueError( + "Need to either supply a path or an opened VideoCapture object" + ) + + if path is not None: + path = Path(path) + self.path = path + + self._video = video # type: Optional[VideoCapture] + self._n_frames = None # type: Optional[int] + self._dtype = None # type: Optional[np.dtype] + self._shape = None # type: Optional[Tuple[int, ...]] + self._sample_frame = None # type: Optional[np.ndarray] + + @property + def video(self) -> VideoCapture: + """Opened video capture object""" + if self._video is None: + if self.path is None: + raise RuntimeError( + "Instantiated with a VideoCapture object that has been closed, " + "and it cant be reopened since source path cant be gotten " + "from VideoCapture objects" + ) + self._video = VideoCapture(str(self.path)) + return self._video + + def close(self) -> None: + """Close the opened VideoCapture object""" + if self._video is not None: + self._video.release() + self._video = None + + @property + def sample_frame(self) -> np.ndarray: + """A stored frame from the video to use when calculating shape and dtype""" + if self._sample_frame is None: + current_frame = int(self.video.get(cv2.CAP_PROP_POS_FRAMES)) + + self.video.set(cv2.CAP_PROP_POS_FRAMES, max(0, current_frame - 1)) + status, frame = self.video.read() + if not status: # pragma: no cover + raise RuntimeError("Could not read frame from video") + self.video.set(cv2.CAP_PROP_POS_FRAMES, current_frame) + self._sample_frame = frame + return self._sample_frame + + @property + def shape(self) -> Tuple[int, ...]: + """ + Shape of video like + ``(n_frames, height, width, channels)`` + + Note that this order flips the order of height and width from typical resolution + specifications: eg. 1080p video is typically 1920x1080, but here it would be + 1080x1920. This follows opencv's ordering, which matches expectations when + eg. an image is read and plotted with matplotlib: the first index is the position + in the 0th dimension - the height, or "y" axis - and the second is the width/x. + """ + if self._shape is None: + self._shape = (self.n_frames, *self.sample_frame.shape) + return self._shape + + @property + def dtype(self) -> np.dtype: + """Numpy dtype (from ``sample_frame`` )""" + return self.sample_frame.dtype + + @property + def n_frames(self) -> int: + """ + Try to get number of frames using opencv metadata, and manually count if no + t""" + if self._n_frames is None: + n_frames = self.video.get(cv2.CAP_PROP_FRAME_COUNT) + if n_frames == 0: + # have to count manually for some containers with bad metadata + current_frame = self.video.get(cv2.CAP_PROP_POS_FRAMES) + self.video.set(cv2.CAP_PROP_POS_FRAMES, 0) + n_frames = 0 + while True: + status, _ = self.video.read() + if not status: + break + n_frames += 1 + self.video.set(cv2.CAP_PROP_POS_FRAMES, current_frame) + self._n_frames = int(n_frames) + return self._n_frames + + def _get_frame(self, frame: int): + self.video.set(cv2.CAP_PROP_POS_FRAMES, frame) + status, frame = self.video.read() + if not status: # pragma: no cover + raise ValueError(f"Could not get frame {frame}") + return frame + + def __getitem__(self, item: Union[int, slice, tuple]) -> np.ndarray: + if isinstance(item, int): + # want a single frame + return self._get_frame(item) + else: + # slices are passes as tuples + # first arg needs to be handled specially + if isinstance(item[0], int): + # single frame + frame = self._get_frame(item[0]) + return frame[*item[1:]] + + elif isinstance(item[0], slice): + frames = [] + # make a new slice since range cant take Nones, filling in missing vals + fslice = item[0] + if fslice.step is None: + fslice = slice(fslice.start, fslice.stop, 1) + if fslice.stop is None: + fslice = slice(fslice.start, self.n_frames, fslice.step) + if fslice.start is None: + fslice = slice(0, fslice.stop, fslice.step) + + for i in range(fslice.start, fslice.stop, fslice.step): + frames.append(self._get_frame(i)) + frame = np.stack(frames) + return frame[:, *item[1:]] + else: # pragma: no cover + raise ValueError(f"indices must be an int or a slice! got {item}") + + def __setitem__(self, key: Union[int, slice], value: Union[int, float, np.ndarray]): + raise NotImplementedError("Setting pixel values on videos is not supported!") + + def __getattr__(self, item: str): + return getattr(self.video, item) + + +class VideoInterface(Interface): + """ + OpenCV interface to treat videos as arrays. + """ + + input_types = (str, Path, VideoCapture) + return_type = VideoProxy + + @classmethod + def enabled(cls) -> bool: + """Check if opencv-python is available in the environment""" + return cv2 is not None + + @classmethod + def check(cls, array: Any) -> bool: + """ + Check if array is a string or Path with a supported video extension, + or an opened VideoCapture object + """ + if VideoCapture is not None and isinstance(array, VideoCapture): + return True + + if isinstance(array, str): + try: + array = Path(array) + except TypeError: + # fine, just not a video + return False + + if isinstance(array, Path) and array.suffix.lower() in VIDEO_EXTENSIONS: + return True + + return False + + def before_validation(self, array: Any) -> VideoProxy: + """Get a :class:`.VideoProxy` object for this video""" + if isinstance(array, VideoCapture): + proxy = VideoProxy(video=array) + else: + proxy = VideoProxy(path=array) + return proxy diff --git a/tests/test_interface/test_video.py b/tests/test_interface/test_video.py new file mode 100644 index 0000000..e6c96b9 --- /dev/null +++ b/tests/test_interface/test_video.py @@ -0,0 +1,186 @@ +""" +Needs to be refactored to DRY, but works for now +""" + +import pdb + +import numpy as np +import pytest + +from pathlib import Path +import cv2 + +from pydantic import BaseModel, ValidationError + +from numpydantic import NDArray, Shape +from numpydantic import dtype as dt +from numpydantic.interface.video import VideoProxy + + +@pytest.fixture(scope="function") +def avi_video(tmp_path): + video_path = tmp_path / "test.avi" + + def _make_video(shape=(100, 50), frames=10, is_color=True) -> Path: + writer = cv2.VideoWriter( + str(video_path), + cv2.VideoWriter_fourcc(*"RGBA"), # raw video for testing purposes + 30, + (shape[1], shape[0]), + is_color, + ) + if is_color: + shape = (*shape, 3) + + for i in range(frames): + # make fresh array every time bc opencv eats them + array = np.zeros(shape, dtype=np.uint8) + if not is_color: + array[i, i] = i + else: + array[i, i, :] = i + writer.write(array) + writer.release() + return video_path + + yield _make_video + + video_path.unlink(missing_ok=True) + + +def test_video_validation(avi_video): + """Color videos should validate for normal uint8 shape specs""" + + shape = (100, 50) + vid = avi_video(shape=shape, is_color=True) + shape_str = f"*, {shape[0]}, {shape[1]}, 3" + + class MyModel(BaseModel): + array: NDArray[Shape[shape_str], dt.UInt8] + + # should correctly validate :) + instance = MyModel(array=vid) + assert isinstance(instance.array, VideoProxy) + + +def test_video_from_videocapture(avi_video): + """Should be able to pass an opened videocapture object""" + shape = (100, 50) + vid = avi_video(shape=shape, is_color=True) + shape_str = f"*, {shape[0]}, {shape[1]}, 3" + + class MyModel(BaseModel): + array: NDArray[Shape[shape_str], dt.UInt8] + + # should still correctly validate! + opened_vid = cv2.VideoCapture(str(vid)) + try: + instance = MyModel(array=opened_vid) + assert isinstance(instance.array, VideoProxy) + finally: + opened_vid.release() + + +def test_video_wrong_shape(avi_video): + shape = (100, 50) + + # generate video with purposely wrong shape + vid = avi_video(shape=(shape[0] + 10, shape[1] + 10), is_color=True) + + shape_str = f"*, {shape[0]}, {shape[1]}, 3" + + class MyModel(BaseModel): + array: NDArray[Shape[shape_str], dt.UInt8] + + # should correctly validate :) + with pytest.raises(ValidationError): + instance = MyModel(array=vid) + + +def test_video_getitem(avi_video): + """ + Should be able to get individual frames and slices as if it were a normal array + """ + shape = (100, 50) + vid = avi_video(shape=shape, frames=10, is_color=True) + shape_str = f"*, {shape[0]}, {shape[1]}, 3" + + class MyModel(BaseModel): + array: NDArray[Shape[shape_str], dt.UInt8] + + instance = MyModel(array=vid) + fifth_frame = instance.array[5] + # the first frame should have 1's in the 1,1 position + assert (fifth_frame[5, 5, :] == [5, 5, 5]).all() + # and nothing in the 6th position + assert (fifth_frame[6, 6, :] == [0, 0, 0]).all() + + # slicing should also work as if it were just a numpy array + single_slice = instance.array[3, 0:10, 0:5] + assert single_slice[3, 3, 0] == 3 + assert single_slice[4, 4, 0] == 0 + assert single_slice.shape == (10, 5, 3) + + # also get a range of frames + # full range + range_slice = instance.array[3:5, 0:10, 0:5] + assert range_slice.shape == (2, 10, 5, 3) + assert range_slice[0, 3, 3, 0] == 3 + assert range_slice[0, 4, 4, 0] == 0 + + # starting range + range_slice = instance.array[6:, 0:10, 0:10] + assert range_slice.shape == (4, 10, 10, 3) + assert range_slice[-1, 9, 9, 0] == 9 + assert range_slice[-2, 9, 9, 0] == 0 + + # ending range + range_slice = instance.array[:3, 0:5, 0:5] + assert range_slice.shape == (3, 5, 5, 3) + + # stepped range + range_slice = instance.array[0:5:2, 0:6, 0:6] + # second slice should be the second frame (instead of the first) + assert range_slice.shape == (3, 6, 6, 3) + assert range_slice[1, 2, 2, 0] == 2 + assert range_slice[1, 3, 3, 0] == 0 + # and the third should be the fourth (instead of the second) + assert range_slice[2, 4, 4, 0] == 4 + assert range_slice[2, 5, 5, 0] == 0 + + with pytest.raises(NotImplementedError): + # shouldn't be allowed to set + instance.array[5] = 10 + + +def test_video_attrs(avi_video): + """Should be able to access opencv properties""" + shape = (100, 50) + vid = avi_video(shape=shape, is_color=True) + shape_str = f"*, {shape[0]}, {shape[1]}, 3" + + class MyModel(BaseModel): + array: NDArray[Shape[shape_str], dt.UInt8] + + instance = MyModel(array=vid) + + instance.array.set(cv2.CAP_PROP_POS_FRAMES, 5) + assert int(instance.array.get(cv2.CAP_PROP_POS_FRAMES)) == 5 + + +def test_video_close(avi_video): + """Should close and reopen video file if needed""" + shape = (100, 50) + vid = avi_video(shape=shape, is_color=True) + shape_str = f"*, {shape[0]}, {shape[1]}, 3" + + class MyModel(BaseModel): + array: NDArray[Shape[shape_str], dt.UInt8] + + instance = MyModel(array=vid) + assert isinstance(instance.array.video, cv2.VideoCapture) + # closes releases and removed reference + instance.array.close() + assert instance.array._video is None + # reopen + assert isinstance(instance.array.video, cv2.VideoCapture)