diff --git a/docs/api/index.md b/docs/api/index.md new file mode 100644 index 0000000..988dffd --- /dev/null +++ b/docs/api/index.md @@ -0,0 +1,9 @@ +# numpydantic + +Top-level API contents + +```{eval-rst} +.. automodule:: numpydantic + :members: + :imported-members: +``` \ No newline at end of file diff --git a/docs/api/linkml/index.md b/docs/api/linkml/index.md new file mode 100644 index 0000000..7034541 --- /dev/null +++ b/docs/api/linkml/index.md @@ -0,0 +1,10 @@ +# linkml + +```{toctree} +:caption: LinkML + +ndarraygen +pydanticgen +template +``` + diff --git a/docs/api/linkml/ndarraygen.md b/docs/api/linkml/ndarraygen.md new file mode 100644 index 0000000..8212088 --- /dev/null +++ b/docs/api/linkml/ndarraygen.md @@ -0,0 +1,6 @@ +# ndarraygen + +```{eval-rst} +.. automodule:: numpydantic.linkml.ndarraygen + :members: +``` \ No newline at end of file diff --git a/docs/api/linkml/pydanticgen.md b/docs/api/linkml/pydanticgen.md new file mode 100644 index 0000000..4f7e77e --- /dev/null +++ b/docs/api/linkml/pydanticgen.md @@ -0,0 +1,6 @@ +# pydanticgen + +```{eval-rst} +.. automodule:: numpydantic.linkml.pydanticgen + :members: +``` \ No newline at end of file diff --git a/docs/api/linkml/template.md b/docs/api/linkml/template.md new file mode 100644 index 0000000..d54db25 --- /dev/null +++ b/docs/api/linkml/template.md @@ -0,0 +1,6 @@ +# template + +```{eval-rst} +.. automodule:: numpydantic.linkml.template + :members: +``` \ No newline at end of file diff --git a/docs/api/maps.md b/docs/api/maps.md new file mode 100644 index 0000000..f0bd106 --- /dev/null +++ b/docs/api/maps.md @@ -0,0 +1,6 @@ +# maps + +```{eval-rst} +.. automodule:: numpydantic.maps + :members: +``` \ No newline at end of file diff --git a/docs/api/monkeypatch.md b/docs/api/monkeypatch.md new file mode 100644 index 0000000..d397869 --- /dev/null +++ b/docs/api/monkeypatch.md @@ -0,0 +1,6 @@ +# monkeypatch + +```{eval-rst} +.. automodule:: numpydantic.monkeypatch + :members: +``` \ No newline at end of file diff --git a/docs/api/ndarray.md b/docs/api/ndarray.md new file mode 100644 index 0000000..2cd0e2f --- /dev/null +++ b/docs/api/ndarray.md @@ -0,0 +1,6 @@ +# ndarray + +```{eval-rst} +.. automodule:: numpydantic.ndarray + :members: +``` \ No newline at end of file diff --git a/docs/api/proxy.md b/docs/api/proxy.md new file mode 100644 index 0000000..13851a0 --- /dev/null +++ b/docs/api/proxy.md @@ -0,0 +1,6 @@ +# proxy + +```{eval-rst} +.. automodule:: numpydantic.proxy + :members: +``` \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index f3c017f..2eb7a6e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,48 +6,53 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = 'numpydantic' -copyright = '2024, Jonny Saunders' -author = 'Jonny Saunders' -release = 'v0.0.0' +project = "numpydantic" +copyright = "2024, Jonny Saunders" +author = "Jonny Saunders" +release = "v0.0.0" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ - 'sphinx.ext.napoleon', - 'sphinx.ext.autodoc', - 'sphinxcontrib.autodoc_pydantic', - 'sphinx.ext.intersphinx', + "sphinx.ext.napoleon", + "sphinx.ext.autodoc", + "sphinxcontrib.autodoc_pydantic", + "sphinx.ext.intersphinx", "sphinx_design", - 'myst_parser', - 'sphinx.ext.todo' + "myst_parser", + "sphinx.ext.todo", ] -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), - 'numpy': ('https://numpy.org/doc/stable/', None), - 'pydantic': ('https://docs.pydantic.dev/latest/', None), - 'linkml': ('https://linkml.io/linkml/', None), - 'linkml_runtime': ('https://linkml.io/linkml/', None), - 'linkml-runtime': ('https://linkml.io/linkml/', None) + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "pydantic": ("https://docs.pydantic.dev/latest/", None), + "linkml": ("https://linkml.io/linkml/", None), + "linkml_runtime": ("https://linkml.io/linkml/", None), + "linkml-runtime": ("https://linkml.io/linkml/", None), } # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'furo' -html_static_path = ['_static'] +html_theme = "furo" +html_static_path = ["_static"] # autodoc -autodoc_pydantic_model_show_json_error_strategy = 'coerce' +autodoc_pydantic_model_show_json_error_strategy = "coerce" autodoc_pydantic_model_show_json = False -autodoc_mock_imports = [] +autodoc_mock_imports = [ + "dask", + "h5py", + "linkml", + "linkml-runtime", +] autoclass_content = "both" -autodoc_member_order='bysource' +autodoc_member_order = "bysource" add_module_names = False # Napoleon settings @@ -68,4 +73,4 @@ napoleon_attr_annotations = True # todo todo_include_todos = True -todo_link_only = True \ No newline at end of file +todo_link_only = True diff --git a/docs/hooks.md b/docs/hooks.md index 7cd254e..42a3b81 100644 --- a/docs/hooks.md +++ b/docs/hooks.md @@ -1,5 +1,11 @@ # Hooks -## TODO +What hooks do we want to expose to downstream users so they can use this without needing +to override everything? -- nwb compatibility: allowable precision map in dtype check \ No newline at end of file +```{todo} +**NWB Compatibility** + +**Precision:** NWB allows for a sort of hierarchy of type specification - +a less precise type also allows the data to be specified in a more precise type +``` \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 1c4cec6..7b03f5a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,30 +14,32 @@ It does two primary things: - **Generate models from LinkML** - extend the LinkML pydantic generator to create models that that use the [linkml-arrays](https://github.com/linkml/linkml-arrays) syntax -## Overview -The Python type annotation system is weird and not like the rest of Python! -(at least until [PEP 0649](https://peps.python.org/pep-0649/) gets mainlined). -Similarly, Pydantic 2's core_schema system is wonderful but still relatively poorly -documented for custom types! This package does the work of plugging them in -together to make some kind of type validation frankenstein. -The first problem is that type annotations are evaluated statically by python, mypy, -etc. This means you can't use typical python syntax for declaring types - it has to -be present at the time `__new__` is called, rather than `__init__`. - -- pydantic schema -- validation -- serialization -- lazy loading -- compression + +```{toctree} +:maxdepth: 2 +:caption: Contents +:hidden: true +overview +ndarray +linkml +hooks +todo +``` ```{toctree} :maxdepth: 2 -:caption: Contents: -:hidden: +:caption: API +:hidden: true + +api/index +api/ndarray +api/proxy +api/linkml/index +api/maps +api/monkeypatch -hooks ``` diff --git a/docs/linkml.md b/docs/linkml.md new file mode 100644 index 0000000..df0f2b6 --- /dev/null +++ b/docs/linkml.md @@ -0,0 +1,2 @@ +# LinkML Generation + diff --git a/docs/ndarray.md b/docs/ndarray.md new file mode 100644 index 0000000..5559ada --- /dev/null +++ b/docs/ndarray.md @@ -0,0 +1,134 @@ +# Constrained Arrays + +## Implementation details + +```{todo} +**Docs:** + +Describe implementation details! +``` + +## Examples + +### Declaration + +Type with a single {class}`~numpydantic.NDArray` class, or use a {class}`~typing.Union` +to express more complex array constraints. + +This package is effectively a Pydantic interface to [nptyping](https://github.com/ramonhagenaars/nptyping), +so any array syntax is valid there. (see [TODO](todo) for caveats) + +```python +from typing import Union +from pydantic import BaseModel +from numpydantic import NDArray, Shape, UInt8, Float, Int + +class Image(BaseModel): + """ + Data values. Data can be in 1-D, 2-D, 3-D, or 4-D. The first dimension should always represent time. This can also be used to store binary data (e.g., image frames). This can also be a link to data stored in an external file. + """ + array: Union[ + NDArray[Shape["* x, * y"], UInt8], + NDArray[Shape["* x, * y, 3 rgb"], UInt8], + NDArray[Shape["* x, * y, 4 rgba"], UInt8], + NDArray[Shape["* t, * x, * y, 3 rgb"], UInt8], + NDArray[Shape["* t, * x, * y, 4 rgba"], Float] + ] +``` + +### Validation: + +```python +import numpy as np +# works +frame_gray = Image(array=np.ones((1280, 720), dtype=np.uint8)) +frame_rgb = Image(array=np.ones((1280, 720, 3), dtype=np.uint8)) +frame_rgba = Image(array=np.ones((1280, 720, 4), dtype=np.uint8)) +video_rgb = Image(array=np.ones((100, 1280, 720, 3), dtype=np.uint8)) + +# fails +wrong_n_dimensions = Image(array=np.ones((1280,), dtype=np.uint8)) +wrong_shape = Image(array=np.ones((1280,720,10), dtype=np.uint8)) +wrong_type = Image(array=np.ones((1280,720,3), dtype=np.float64)) + +# shapes and types are checked together +float_video = Image(array=np.ones((100, 1280, 720, 4),dtype=float)) +wrong_shape_float_video = Image(array=np.ones((100, 1280, 720, 3),dtype=float)) +``` + +### JSON schema generation: + +```python +class MyArray(BaseModel): + array: NDArray[Shape["2 x, * y, 4 z"], Float] +``` + +```python +>>> print(json.dumps(MyArray.model_json_schema(), indent=2)) +``` + +```json +{ + "properties": { + "array": { + "items": { + "items": { + "items": { + "type": "number" + }, + "maxItems": 4, + "minItems": 4, + "type": "array" + }, + "type": "array" + }, + "maxItems": 2, + "minItems": 2, + "title": "Array", + "type": "array" + } + }, + "required": [ + "array" + ], + "title": "MyArray", + "type": "object" +} +``` + +### Serialization + +```python +class SmolArray(BaseModel): + array: NDArray[Shape["2 x, 2 y"], Int] + +class BigArray(BaseModel): + array: NDArray[Shape["1000 x, 1000 y"], Int] +``` + +Serialize small arrays as lists of lists, and big arrays as a b64-encoded blosc compressed string + +```python +>>> smol = SmolArray(array=np.array([[1,2],[3,4]], dtype=int)) +>>> big = BigArray(array=np.random.randint(0,255,(1000,1000),int)) + +>>> print(smol.model_dump_json()) +{"array":[[1,2],[3,4]]} +>>> print(big.model_dump_json()) +{ + "array": "( long b64 encoded string )", + "shape": [1000, 1000], + "dtype": "int64", + "unpack_fns": ["base64.b64decode", "blosc2.unpack_array2"], +} +``` + +## TODO + +```{todo} +Implement structured arrays +``` + +```{todo} +Implement pandas dataframe validation? +``` \ No newline at end of file diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 0000000..d1185dd --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,17 @@ +# Overview + +The Python type annotation system is weird and not like the rest of Python! +(at least until [PEP 0649](https://peps.python.org/pep-0649/) gets mainlined). +Similarly, Pydantic 2's core_schema system is wonderful but still relatively poorly +documented for custom types! This package does the work of plugging them in +together to make some kind of type validation frankenstein. + +The first problem is that type annotations are evaluated statically by python, mypy, +etc. This means you can't use typical python syntax for declaring types - it has to +be present at the time `__new__` is called, rather than `__init__`. + +- pydantic schema +- validation +- serialization +- lazy loading +- compression \ No newline at end of file diff --git a/docs/todo.md b/docs/todo.md new file mode 100644 index 0000000..9626352 --- /dev/null +++ b/docs/todo.md @@ -0,0 +1,5 @@ +# TODO + +```{todolist} + +``` \ No newline at end of file diff --git a/numpydantic/__init__.py b/numpydantic/__init__.py index 28f040a..d0cd680 100644 --- a/numpydantic/__init__.py +++ b/numpydantic/__init__.py @@ -1,12 +1,13 @@ # ruff: noqa: E402 # ruff: noqa: F401 +# ruff: noqa: I001 from numpydantic.monkeypatch import apply_patches apply_patches() +from numpydantic.ndarray import NDArray + # convenience imports for typing - finish this! from typing import Any from nptyping import Float, Int, Number, Shape, UInt8 - -from numpydantic.ndarray import NDArray diff --git a/numpydantic/linkml/__init__.py b/numpydantic/linkml/__init__.py index e69de29..750dcbc 100644 --- a/numpydantic/linkml/__init__.py +++ b/numpydantic/linkml/__init__.py @@ -0,0 +1,8 @@ +# ruff: noqa: E402 +# ruff: noqa: F401 +from numpydantic.linkml.ndarraygen import ( + ArrayFormat, + LinkMLDataArray, + LinkMLNDArray, + NWBLinkMLArraylike, +) diff --git a/tests/fixtures.py b/tests/fixtures.py index 3f15076..fc57120 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -2,6 +2,7 @@ import shutil from pathlib import Path import pytest +from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition @pytest.fixture(scope="session") @@ -38,3 +39,36 @@ def tmp_output_dir_mod(tmp_output_dir) -> Path: shutil.rmtree(str(subpath)) subpath.mkdir() return subpath + + +@pytest.fixture() +def nwb_linkml_array() -> tuple[ClassDefinition, str]: + classdef = ClassDefinition( + name="NWB_Linkml Array", + description="Main class's array", + is_a="Arraylike", + attributes=[ + SlotDefinition(name="x", range="numeric", required=True), + SlotDefinition(name="y", range="numeric", required=True), + SlotDefinition( + name="z", + range="numeric", + required=False, + maximum_cardinality=3, + minimum_cardinality=3, + ), + SlotDefinition( + name="a", + range="numeric", + required=False, + minimum_cardinality=4, + maximum_cardinality=4, + ), + ], + ) + generated = """Union[ + NDArray[Shape["* x, * y"], Number], + NDArray[Shape["* x, * y, 3 z"], Number], + NDArray[Shape["* x, * y, 3 z, 4 a"], Number] + ]""" + return classdef, generated diff --git a/tests/test_linkml/test_ndarraygen.py b/tests/test_linkml/test_ndarraygen.py new file mode 100644 index 0000000..ba1e554 --- /dev/null +++ b/tests/test_linkml/test_ndarraygen.py @@ -0,0 +1,14 @@ +import pytest + +from numpydantic.linkml import ArrayFormat, NWBLinkMLArraylike + +from ..fixtures import nwb_linkml_array + + +def test_nwb_linkml_array(nwb_linkml_array): + classdef, generated = nwb_linkml_array + + assert ArrayFormat.is_array(classdef) + assert NWBLinkMLArraylike.check(classdef) + assert ArrayFormat.get(classdef) is NWBLinkMLArraylike + assert generated == NWBLinkMLArraylike.make(classdef)