mirror of
https://github.com/p2p-ld/numpydantic.git
synced 2025-01-09 21:44:27 +00:00
docs and docs and docs and docs
This commit is contained in:
parent
02855852b7
commit
d3ad8dac5c
17 changed files with 1525 additions and 54 deletions
|
@ -25,7 +25,7 @@ extensions = [
|
|||
"sphinx.ext.doctest",
|
||||
"sphinx_design",
|
||||
"sphinxcontrib.mermaid",
|
||||
"myst_parser",
|
||||
"myst_nb",
|
||||
"sphinx.ext.todo",
|
||||
]
|
||||
|
||||
|
@ -77,3 +77,8 @@ napoleon_attr_annotations = True
|
|||
# todo
|
||||
todo_include_todos = True
|
||||
todo_link_only = True
|
||||
|
||||
# myst
|
||||
# myst-nb
|
||||
nb_render_markdown_format = "myst"
|
||||
nb_execution_show_tb = True
|
||||
|
|
BIN
docs/data/test.avi
Normal file
BIN
docs/data/test.avi
Normal file
Binary file not shown.
BIN
docs/data/test.h5
Normal file
BIN
docs/data/test.h5
Normal file
Binary file not shown.
22
docs/data/test.zarr/.zarray
Normal file
22
docs/data/test.zarr/.zarray
Normal file
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"chunks": [
|
||||
2,
|
||||
2
|
||||
],
|
||||
"compressor": {
|
||||
"blocksize": 0,
|
||||
"clevel": 5,
|
||||
"cname": "lz4",
|
||||
"id": "blosc",
|
||||
"shuffle": 1
|
||||
},
|
||||
"dtype": "<i8",
|
||||
"fill_value": 0,
|
||||
"filters": null,
|
||||
"order": "C",
|
||||
"shape": [
|
||||
2,
|
||||
2
|
||||
],
|
||||
"zarr_format": 2
|
||||
}
|
BIN
docs/data/test.zarr/0.0
Normal file
BIN
docs/data/test.zarr/0.0
Normal file
Binary file not shown.
84
docs/development.md
Normal file
84
docs/development.md
Normal file
|
@ -0,0 +1,84 @@
|
|||
# Development
|
||||
|
||||
## Versioning
|
||||
|
||||
This package uses a colloquial form of [semantic versioning 2](https://semver.org/).
|
||||
|
||||
Specifically:
|
||||
|
||||
- Major version `2.*.*` is reserved for the transition from nptyping to using
|
||||
`TypeVarTuple`, `Generic`, and `Protocol`. Until `2.*.*`...
|
||||
- breaking changes will be indicated with an advance in `MINOR`
|
||||
version, taking the place of `MAJOR` in semver
|
||||
- backwards-compatible bugfixes **and** additions in functionality
|
||||
will be indicated by a `PATCH` release, taking the place of `MINOR` and
|
||||
`PATCH` in semver.
|
||||
- After `2.*.*`, semver as usual will resume
|
||||
|
||||
You are encouraged to set an upper bound on your version dependencies until
|
||||
we pass `2.*.*`, as the major function of numpydantic is stable,
|
||||
but there is still a decent amount of jostling things around to be expected.
|
||||
|
||||
|
||||
### API Stability
|
||||
|
||||
- All breaking changes to the **public API** will be signaled by a major
|
||||
version's worth of deprecation warnings
|
||||
- All breaking changes to the **development API** will be signaled by a
|
||||
minor version's worth of deprecation warnings.
|
||||
- Changes to the remainder of the package, whether marked as private with a
|
||||
leading underscore or not, including the import structure of the package,
|
||||
are not considered part of the API and should not be relied on as stable
|
||||
until explicitly marked otherwise.
|
||||
|
||||
#### Public API
|
||||
|
||||
**Only the {class}`.NDArray` and {class}`.Shape` classes should be considered
|
||||
part of the stable public API.**
|
||||
|
||||
All associated functionality for validation should also be considered
|
||||
a stable part of the `NDArray` and `Shape` classes - functionality
|
||||
will only be added here, and the departure for the string-form of the
|
||||
shape specifications (and its removal) will take place in `v3.*.*`
|
||||
|
||||
End-users of numpydantic should pin an upper bound for the `MAJOR` version
|
||||
until after `v2.*.*`, after which time it is up to your discretion -
|
||||
no breaking changes are planned, but they would be signaled by a major version change.
|
||||
|
||||
#### Development API
|
||||
|
||||
**Only the {class}`.Interface` class and its subclasses,
|
||||
along with the Public API,
|
||||
should be considered part of the stable development API.**
|
||||
|
||||
The `Interface` class is the primary point of external development expected
|
||||
for numpydantic. It is still somewhat in flux, but it is prioritized for stability
|
||||
and deprecation warnings above the rest of the package.
|
||||
|
||||
Dependent packages that define their own `Interface`s should pin an upper
|
||||
bound for the `PATCH` version until `2.*.*`, and afterwards likely pin a `MINOR` version.
|
||||
Tests are designed such that it should be easy to test major features against
|
||||
each interface, and that work is also ongoing. Once the test suite reaches
|
||||
maturity, it should be possible for any downstream interfaces to simply use those to
|
||||
ensure they are compatible with the latest version.
|
||||
|
||||
## Release Schedule
|
||||
|
||||
There is no release schedule. Versions are released according to need and available labor.
|
||||
|
||||
## Contributing
|
||||
|
||||
### Dev environment
|
||||
|
||||
```{todo}
|
||||
Document dev environment
|
||||
|
||||
Really it's very simple, you just clone a fork and install
|
||||
the `dev` environment like `pip install '.[dev]'`
|
||||
```
|
||||
|
||||
### Pull Requests
|
||||
|
||||
```{todo}
|
||||
Document pull requests if we ever receive one
|
||||
```
|
|
@ -1,11 +0,0 @@
|
|||
# Hooks
|
||||
|
||||
What hooks do we want to expose to downstream users so they can use this without needing
|
||||
to override everything?
|
||||
|
||||
```{todo}
|
||||
**NWB Compatibility**
|
||||
|
||||
**Precision:** NWB allows for a sort of hierarchy of type specification -
|
||||
a less precise type also allows the data to be specified in a more precise type
|
||||
```
|
|
@ -86,8 +86,8 @@ isinstance(np.zeros((1,2,3), dtype=float), array_type)
|
|||
and a simple extension system to make it work with whatever else you want! Provides
|
||||
a uniform and transparent interface so you can both use common indexing operations
|
||||
and also access any special features of a given array library.
|
||||
- **Serialization** - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
||||
recreate the model in the native format
|
||||
- [**Serialization**](./serialization.md) - Dump an array as a JSON-compatible array-of-arrays with enough metadata to be able to
|
||||
recreate the model in the native format. Full roundtripping is supported :)
|
||||
- **Schema Generation** - Correct JSON Schema for arrays, complete with shape and dtype constraints, to
|
||||
make your models interoperable
|
||||
|
||||
|
@ -496,6 +496,14 @@ api/types
|
|||
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:caption: Meta
|
||||
:hidden: true
|
||||
|
||||
development
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
- [`jaxtyping`](https://docs.kidger.site/jaxtyping/)
|
||||
|
|
|
@ -1,2 +1,313 @@
|
|||
---
|
||||
file_format: mystnb
|
||||
mystnb:
|
||||
output_stderr: remove
|
||||
render_text_lexer: python
|
||||
render_markdown_format: myst
|
||||
myst:
|
||||
enable_extensions: ["colon_fence"]
|
||||
---
|
||||
|
||||
# Serialization
|
||||
|
||||
## Python
|
||||
|
||||
In most cases, dumping to python should work as expected.
|
||||
|
||||
When a given array framework doesn't provide a tidy means of interacting
|
||||
with it from python, we substitute a proxy class like {class}`.hdf5.H5Proxy`,
|
||||
but aside from that numpydantic {class}`.NDArray` annotations
|
||||
should be passthrough when using {func}`~pydantic.BaseModel.model_dump` .
|
||||
|
||||
## JSON
|
||||
|
||||
JSON is the ~ ♥ fun one ♥ ~
|
||||
|
||||
There isn't necessarily a single optimal way to represent all possible
|
||||
arrays in JSON. The standard way that n-dimensional arrays are rendered
|
||||
in json is as a list-of-lists (or array of arrays, in JSON parlance),
|
||||
but that's almost never what is desirable, especially for large arrays.
|
||||
|
||||
### Normal Style[^normalstyle]
|
||||
|
||||
Lists-of-lists are the standard, however, so it is the default behavior
|
||||
for all interfaces, and all interfaces must support it.
|
||||
|
||||
```{code-cell}
|
||||
---
|
||||
tags: [hide-cell]
|
||||
---
|
||||
|
||||
from pathlib import Path
|
||||
from pydantic import BaseModel
|
||||
from numpydantic import NDArray, Shape
|
||||
from numpydantic.interface.dask import DaskJsonDict
|
||||
from numpydantic.interface.numpy import NumpyJsonDict
|
||||
import numpy as np
|
||||
import dask.array as da
|
||||
import zarr
|
||||
import json
|
||||
from rich import print
|
||||
from rich.console import Console
|
||||
|
||||
def print_json(string:str):
|
||||
data = json.loads(string)
|
||||
console = Console(width=74)
|
||||
console.print(data)
|
||||
```
|
||||
|
||||
For our humble model:
|
||||
|
||||
```{code-cell}
|
||||
class MyModel(BaseModel):
|
||||
array: NDArray
|
||||
```
|
||||
|
||||
We should get the same thing for each interface:
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=[[1,2],[3,4]])
|
||||
print(model.model_dump_json())
|
||||
```
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=da.array([[1,2],[3,4]], dtype=int))
|
||||
print(model.model_dump_json())
|
||||
```
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=zarr.array([[1,2],[3,4]], dtype=int))
|
||||
print(model.model_dump_json())
|
||||
```
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array="docs/data/test.avi")
|
||||
print(model.model_dump_json())
|
||||
```
|
||||
|
||||
(ok maybe not that last one, since the video reader still incorrectly
|
||||
reads grayscale videos as BGR values for now, but you get the idea)
|
||||
|
||||
Since by default arrays are dumped into unadorned JSON arrays,
|
||||
when they are re-validated, they will always be handled by the
|
||||
{class}`.NumpyInterface`
|
||||
|
||||
```{code-cell}
|
||||
dask_array = da.array([[1,2],[3,4]], dtype=int)
|
||||
model = MyModel(array=dask_array)
|
||||
type(model.array)
|
||||
```
|
||||
|
||||
```{code-cell}
|
||||
model_json = model.model_dump_json()
|
||||
deserialized_model = MyModel.model_validate_json(model_json)
|
||||
type(deserialized_model.array)
|
||||
```
|
||||
|
||||
All information about `dtype` will be lost, and numbers will be either parsed
|
||||
as `int` ({class}`numpy.int64`) or `float` ({class}`numpy.float64`)
|
||||
|
||||
## Roundtripping
|
||||
|
||||
To roundtrip make arrays round-trippable, use the `round_trip` argument
|
||||
to {func}`~pydantic.BaseModel.model_dump_json`
|
||||
|
||||
|
||||
```{code-cell}
|
||||
print_json(model.model_dump_json(round_trip=True))
|
||||
```
|
||||
|
||||
Each interface should[^notenforced] implement a dataclass that describes a
|
||||
json-able roundtrip form (see {class}`.interface.JsonDict`).
|
||||
|
||||
That dataclass then has a {meth}`JsonDict.is_valid` method that checks
|
||||
whether an incoming dict matches its schema
|
||||
|
||||
```{code-cell}
|
||||
roundtrip_json = json.loads(model.model_dump_json(round_trip=True))['array']
|
||||
DaskJsonDict.is_valid(roundtrip_json)
|
||||
```
|
||||
|
||||
```{code-cell}
|
||||
NumpyJsonDict.is_valid(roundtrip_json)
|
||||
```
|
||||
|
||||
#### Controlling paths
|
||||
|
||||
When possible, the full content of the array is omitted in favor
|
||||
of the path to the file that provided it.
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array="docs/data/test.avi")
|
||||
print_json(model.model_dump_json(round_trip=True))
|
||||
```
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=("docs/data/test.h5", "/data"))
|
||||
print_json(model.model_dump_json(round_trip=True))
|
||||
```
|
||||
|
||||
You may notice the relative, rather than absolute paths.
|
||||
|
||||
|
||||
We expect that when people are dumping data to json in this roundtripped
|
||||
form that they are either working locally
|
||||
(e.g. transmitting an array specification across a socket in multiprocessing
|
||||
or in a computing cluster),
|
||||
or exporting to some directory structure of data,
|
||||
where they are making an index file that refers to datasets in a directory
|
||||
as part of a data standard or vernacular format.
|
||||
|
||||
By default, numpydantic uses the current working directory as the root to find
|
||||
paths relative to, but this can be controlled by the [`relative_to`](#relative_to)
|
||||
context parameter:
|
||||
|
||||
For example if you're working on data in many subdirectories,
|
||||
you might want to serialize relative to each of them:
|
||||
|
||||
```{code-cell}
|
||||
print_json(
|
||||
model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"relative_to": Path('./docs/data')}
|
||||
))
|
||||
```
|
||||
|
||||
Or in the other direction:
|
||||
|
||||
```{code-cell}
|
||||
print_json(
|
||||
model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"relative_to": Path('../')}
|
||||
))
|
||||
```
|
||||
|
||||
Or you might be working in some completely different place,
|
||||
numpydantic will try and find the way from here to there as long as it exists,
|
||||
even if it means traversing to the root of the readthedocs filesystem
|
||||
|
||||
```{code-cell}
|
||||
print_json(
|
||||
model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"relative_to": Path('/a/long/distance/directory')}
|
||||
))
|
||||
```
|
||||
|
||||
You can force absolute paths with the `absolute_paths` context parameter
|
||||
|
||||
```{code-cell}
|
||||
print_json(
|
||||
model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"absolute_paths": True}
|
||||
))
|
||||
```
|
||||
|
||||
#### Durable Interface Metadata
|
||||
|
||||
Numpydantic tries to be [stable](./development.md#api-stability),
|
||||
but we're not perfect. To preserve the full information about the
|
||||
interface that's needed to load the data referred to by the value,
|
||||
use the `mark_interface` contest parameter:
|
||||
|
||||
```{code-cell}
|
||||
print_json(
|
||||
model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"mark_interface": True}
|
||||
))
|
||||
```
|
||||
|
||||
```{todo}
|
||||
We will also add a separate `mark_version` parameter for marking
|
||||
the specific version of the relevant interface package, like `zarr`, or `numpy`,
|
||||
patience.
|
||||
```
|
||||
|
||||
## Context parameters
|
||||
|
||||
A reference listing of all the things that can be passed to
|
||||
{func}`~pydantic.BaseModel.model_dump_json`
|
||||
|
||||
|
||||
### `mark_interface`
|
||||
|
||||
Nest an additional layer of metadata for unambigous serialization that
|
||||
can be absolutely resolved across numpydantic versions
|
||||
(for now for downstream metadata purposes only,
|
||||
automatically resolving to a numpydantic version is not yet possible.)
|
||||
|
||||
Supported interfaces:
|
||||
|
||||
- (all)
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=[[1,2],[3,4]])
|
||||
data = model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"mark_interface": True}
|
||||
)
|
||||
print_json(data)
|
||||
```
|
||||
|
||||
### `absolute_paths`
|
||||
|
||||
Make all paths (that exist) absolute.
|
||||
|
||||
Supported interfaces:
|
||||
|
||||
- (all)
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=("docs/data/test.h5", "/data"))
|
||||
data = model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"absolute_paths": True}
|
||||
)
|
||||
print_json(data)
|
||||
```
|
||||
|
||||
### `relative_to`
|
||||
|
||||
Make all paths (that exist) relative to the given path
|
||||
|
||||
Supported interfaces:
|
||||
|
||||
- (all)
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=("docs/data/test.h5", "/data"))
|
||||
data = model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"relative_to": Path('../')}
|
||||
)
|
||||
print_json(data)
|
||||
```
|
||||
|
||||
### `dump_array`
|
||||
|
||||
Dump the raw array contents when serializing to json inside an `array` field
|
||||
|
||||
Supported interfaces:
|
||||
- {class}`.ZarrInterface`
|
||||
|
||||
```{code-cell}
|
||||
model = MyModel(array=("docs/data/test.zarr",))
|
||||
data = model.model_dump_json(
|
||||
round_trip=True,
|
||||
context={"dump_array": True}
|
||||
)
|
||||
print_json(data)
|
||||
```
|
||||
|
||||
|
||||
|
||||
[^normalstyle]: o ya we're posting JSON [normal style](https://normal.style)
|
||||
[^notenforced]: This is only *functionally* enforced at the moment, where
|
||||
a roundtrip test confirms that dtype and type are preserved,
|
||||
but there is no formal test for each interface having its own serialization class
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -73,12 +73,14 @@ tests = [
|
|||
"coveralls<4.0.0,>=3.3.1",
|
||||
]
|
||||
docs = [
|
||||
"numpydantic[arrays]",
|
||||
"sphinx<8.0.0,>=7.2.6",
|
||||
"furo>=2024.1.29",
|
||||
"myst-parser<3.0.0,>=2.0.0",
|
||||
"autodoc-pydantic<3.0.0,>=2.0.1",
|
||||
"sphinx-design<1.0.0,>=0.5.0",
|
||||
"sphinxcontrib-mermaid>=0.9.2",
|
||||
"myst-nb>=1.1.1",
|
||||
]
|
||||
dev = [
|
||||
"numpydantic[tests,docs]",
|
||||
|
|
|
@ -63,10 +63,15 @@ class JsonDict:
|
|||
return TypeAdapter(cls)
|
||||
|
||||
@classmethod
|
||||
def is_valid(cls, val: dict) -> bool:
|
||||
def is_valid(cls, val: dict, raise_on_error: bool = False) -> bool:
|
||||
"""
|
||||
Check whether a given dictionary matches this JsonDict specification
|
||||
|
||||
Args:
|
||||
val (dict): The dictionary to check for validity
|
||||
raise_on_error (bool): If ``True``, raise the validation error
|
||||
rather than returning a bool. (default: ``False``)
|
||||
|
||||
Returns:
|
||||
bool - true if valid, false if not
|
||||
"""
|
||||
|
@ -74,7 +79,9 @@ class JsonDict:
|
|||
try:
|
||||
_ = adapter.validate_python(val)
|
||||
return True
|
||||
except ValidationError:
|
||||
except ValidationError as e:
|
||||
if raise_on_error:
|
||||
raise e
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
@ -159,6 +159,9 @@ class VideoProxy:
|
|||
return self[:]
|
||||
|
||||
def __getitem__(self, item: Union[int, slice, tuple]) -> np.ndarray:
|
||||
if not self.path.exists():
|
||||
raise FileNotFoundError(f"Video file {self.path} does not exist!")
|
||||
|
||||
if isinstance(item, int):
|
||||
# want a single frame
|
||||
return self._get_frame(item)
|
||||
|
|
|
@ -178,12 +178,12 @@ class ZarrInterface(Interface):
|
|||
:meth:`zarr.core.Array.info_items`
|
||||
plus the :meth:`zarr.core.Array.hexdigest` as a :class:`.ZarrJsonDict`
|
||||
|
||||
If either the ``zarr_dump_array`` value in the context dictionary is ``True``
|
||||
If either the ``dump_array`` value in the context dictionary is ``True``
|
||||
or the zarr array is an in-memory array, dump the array as well
|
||||
(since without a persistent array it would be impossible to roundtrip and
|
||||
dumping to JSON would be meaningless)
|
||||
|
||||
Passing ``'zarr_dump_array': True`` to the serialization ``context``
|
||||
Passing ```dump_array': True`` to the serialization ``context``
|
||||
looks like this::
|
||||
|
||||
model.model_dump_json(context={'zarr_dump_array': True})
|
||||
|
@ -193,7 +193,7 @@ class ZarrInterface(Interface):
|
|||
if info.round_trip:
|
||||
dump_array = False
|
||||
if info is not None and info.context is not None:
|
||||
dump_array = info.context.get("zarr_dump_array", False)
|
||||
dump_array = info.context.get("dump_array", False)
|
||||
is_file = False
|
||||
|
||||
as_json = {"type": cls.name}
|
||||
|
|
|
@ -24,10 +24,10 @@ from numpydantic.exceptions import InterfaceError
|
|||
from numpydantic.interface import Interface
|
||||
from numpydantic.maps import python_to_nptyping
|
||||
from numpydantic.schema import (
|
||||
_jsonize_array,
|
||||
get_validate_interface,
|
||||
make_json_schema,
|
||||
)
|
||||
from numpydantic.serialization import jsonize_array
|
||||
from numpydantic.types import DtypeType, NDArrayType, ShapeType
|
||||
from numpydantic.vendor.nptyping.error import InvalidArgumentsError
|
||||
from numpydantic.vendor.nptyping.ndarray import NDArrayMeta as _NDArrayMeta
|
||||
|
@ -181,7 +181,7 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
|
|||
return core_schema.with_info_plain_validator_function(
|
||||
get_validate_interface(shape, dtype),
|
||||
serialization=core_schema.plain_serializer_function_ser_schema(
|
||||
_jsonize_array, when_used="json", info_arg=True
|
||||
jsonize_array, when_used="json", info_arg=True
|
||||
),
|
||||
metadata=json_schema,
|
||||
)
|
||||
|
|
|
@ -5,15 +5,15 @@ Helper functions for use with :class:`~numpydantic.NDArray` - see the note in
|
|||
|
||||
import hashlib
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
from pydantic import BaseModel, SerializationInfo
|
||||
from pydantic import BaseModel
|
||||
from pydantic_core import CoreSchema, core_schema
|
||||
from pydantic_core.core_schema import ListSchema, ValidationInfo
|
||||
|
||||
from numpydantic import dtype as dt
|
||||
from numpydantic.interface import Interface, JsonDict
|
||||
from numpydantic.interface import Interface
|
||||
from numpydantic.maps import np_to_python
|
||||
from numpydantic.types import DtypeType, NDArrayType, ShapeType
|
||||
from numpydantic.vendor.nptyping.structure import StructureMeta
|
||||
|
@ -278,16 +278,3 @@ def get_validate_interface(shape: ShapeType, dtype: DtypeType) -> Callable:
|
|||
return value
|
||||
|
||||
return validate_interface
|
||||
|
||||
|
||||
def _jsonize_array(value: Any, info: SerializationInfo) -> Union[list, dict]:
|
||||
"""Use an interface class to render an array as JSON"""
|
||||
interface_cls = Interface.match_output(value)
|
||||
array = interface_cls.to_json(value, info)
|
||||
if isinstance(array, JsonDict):
|
||||
array = array.to_dict()
|
||||
|
||||
if info.context and info.context.get("mark_interface", False):
|
||||
array = interface_cls.mark_json(array)
|
||||
|
||||
return array
|
||||
|
|
94
src/numpydantic/serialization.py
Normal file
94
src/numpydantic/serialization.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
from pathlib import Path
|
||||
from typing import Any, Callable, TypeVar, Union
|
||||
|
||||
from pydantic_core.core_schema import SerializationInfo
|
||||
|
||||
from numpydantic.interface import Interface, JsonDict
|
||||
|
||||
T = TypeVar("T")
|
||||
U = TypeVar("U")
|
||||
|
||||
|
||||
def jsonize_array(value: Any, info: SerializationInfo) -> Union[list, dict]:
|
||||
"""Use an interface class to render an array as JSON"""
|
||||
interface_cls = Interface.match_output(value)
|
||||
array = interface_cls.to_json(value, info)
|
||||
if isinstance(array, JsonDict):
|
||||
array = array.to_dict()
|
||||
|
||||
if info.context:
|
||||
if info.context.get("mark_interface", False):
|
||||
array = interface_cls.mark_json(array)
|
||||
if info.context.get("absolute_paths", False):
|
||||
array = _absolutize_paths(array)
|
||||
else:
|
||||
relative_to = info.context.get("relative_to", ".")
|
||||
array = _relativize_paths(array, relative_to)
|
||||
|
||||
return array
|
||||
|
||||
|
||||
def _relativize_paths(value: dict, relative_to: str = ".") -> dict:
|
||||
"""
|
||||
Make paths relative to either the current directory or the provided
|
||||
``relative_to`` directory, if provided in the context
|
||||
"""
|
||||
relative_to = Path(relative_to).resolve()
|
||||
|
||||
def _r_path(v: Any) -> Any:
|
||||
try:
|
||||
path = Path(v)
|
||||
if not path.exists():
|
||||
return v
|
||||
return str(relative_path(path, relative_to))
|
||||
except:
|
||||
return v
|
||||
|
||||
return _walk_and_apply(value, _r_path)
|
||||
|
||||
|
||||
def _absolutize_paths(value: dict) -> dict:
|
||||
def _a_path(v: Any) -> Any:
|
||||
try:
|
||||
path = Path(v)
|
||||
if not path.exists():
|
||||
return v
|
||||
return str(path.resolve())
|
||||
except:
|
||||
return v
|
||||
|
||||
return _walk_and_apply(value, _a_path)
|
||||
|
||||
|
||||
def _walk_and_apply(value: T, f: Callable[[U], U]) -> T:
|
||||
"""
|
||||
Walk an object, applying a function
|
||||
"""
|
||||
if isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
if isinstance(v, dict):
|
||||
_walk_and_apply(v, f)
|
||||
elif isinstance(v, list):
|
||||
value[k] = [_walk_and_apply(sub_v, f) for sub_v in v]
|
||||
else:
|
||||
value[k] = f(v)
|
||||
elif isinstance(value, list):
|
||||
value = [_walk_and_apply(v, f) for v in value]
|
||||
else:
|
||||
value = f(value)
|
||||
return value
|
||||
|
||||
|
||||
def relative_path(target: Path, origin: Path) -> Path:
|
||||
"""
|
||||
return path of target relative to origin, even if they're
|
||||
not in the same subpath
|
||||
|
||||
References:
|
||||
- https://stackoverflow.com/a/71874881
|
||||
"""
|
||||
try:
|
||||
return Path(target).resolve().relative_to(Path(origin).resolve())
|
||||
except ValueError: # target does not start with origin
|
||||
# recursion with origin (eventually origin is root so try will succeed)
|
||||
return Path("..").joinpath(relative_path(target, Path(origin).parent))
|
Loading…
Reference in a new issue