numpydantic/tests/test_serialization.py

"""
Test serialization-specific functionality that doesn't need to be
applied across every interface (use test_interface/test_interfaces for that
"""

import json
from pathlib import Path
from typing import Callable

import h5py
import numpy as np
import pytest

from numpydantic.serialization import _relativize_paths, _walk_and_apply, relative_path

pytestmark = pytest.mark.serialization


@pytest.fixture(scope="module")
def hdf5_at_path() -> Callable[[Path], None]:
    _path = ""

    def _hdf5_at_path(path: Path) -> None:
        nonlocal _path
        _path = path
        h5f = h5py.File(path, "w")
        _ = h5f.create_dataset("/data", data=np.array([[1, 2], [3, 4]]))
        _ = h5f.create_dataset("subpath/to/dataset", data=np.array([[1, 2], [4, 5]]))
        h5f.close()

    yield _hdf5_at_path

    Path(_path).unlink(missing_ok=True)


def test_relative_path(hdf5_at_path, tmp_output_dir, model_blank):
    """
    By default, we should make all paths relative to the cwd
    """
    out_path = tmp_output_dir / "relative.h5"
    hdf5_at_path(out_path)
    model = model_blank(array=(out_path, "/data"))
    rt = model.model_dump_json(round_trip=True)
    file = json.loads(rt)["array"]["file"]

    # should not be absolute
    assert not Path(file).is_absolute()
    # should be relative to cwd
    out_file = (Path.cwd() / file).resolve()
    assert out_file == out_path.resolve()


def test_relative_to_path(hdf5_at_path, tmp_output_dir, model_blank):
    """
    When explicitly passed a path to be ``relative_to`` ,
    relative to that instead of cwd
    """
    out_path = tmp_output_dir / "relative.h5"
    relative_to_path = Path(__file__) / "fake_dir" / "sub_fake_dir"
    expected_path = Path("../../../__tmp__/relative.h5")

    hdf5_at_path(out_path)
    model = model_blank(array=(out_path, "/data"))
    rt = model.model_dump_json(
        round_trip=True, context={"relative_to": str(relative_to_path)}
    )
    data = json.loads(rt)["array"]
    file = data["file"]

    # should not be absolute
    assert not Path(file).is_absolute()
    # should be expected path and reach the file
    assert Path(file) == expected_path
    assert (relative_to_path / file).resolve() == out_path.resolve()

    # we shouldn't have touched `/data` even though it is pathlike
    assert data["path"] == "/data"


def test_relative_to_root_dir():
    """
    The relativize function should ignore paths that are directories
    beneath the root directory (eg `/data`) even if they exist

    """
    # python 3.9 compat, which can't use negative indices
    test_path = [p for p in Path(__file__).resolve().parents][-2]

    test_data = {"some_field": str(test_path)}

    walked = _relativize_paths(test_data, relative_to=".")
    assert str(relative_path(test_path, Path(".").resolve())) != str(test_path)
    assert walked["some_field"] == str(test_path)


def test_absolute_path(hdf5_at_path, tmp_output_dir, model_blank):
    """
    When told, we make paths absolute
    """
    out_path = tmp_output_dir / "relative.h5"
    expected_dataset = "subpath/to/dataset"

    hdf5_at_path(out_path)
    model = model_blank(array=(out_path, expected_dataset))
    rt = model.model_dump_json(round_trip=True, context={"absolute_paths": True})
    data = json.loads(rt)["array"]
    file = data["file"]

    # should be absolute and equal to out_path
    assert Path(file).is_absolute()
    assert Path(file) == out_path.resolve()

    # shouldn't have absolutized subpath even if it's pathlike
    assert data["path"] == expected_dataset


def test_walk_and_apply():
    """
    Walk and apply should recursively apply a function to everything in a
    nesty structure
    """
    test = {
        "a": 1,
        "b": 1,
        "c": [
            {"a": 1, "b": {"a": 1, "b": 1}, "c": [1, 1, 1]},
            {"a": 1, "b": [1, 1, 1]},
        ],
    }

    def _mult_2(v, skip: bool = False):
        return v * 2

    def _assert_2(v, skip: bool = False):
        assert v == 2
        return v

    walked = _walk_and_apply(test, _mult_2)
    _walk_and_apply(walked, _assert_2)

    assert walked["a"] == 2
    assert walked["c"][0]["a"] == 2
    assert walked["c"][0]["b"]["a"] == 2
    assert all([w == 2 for w in walked["c"][0]["c"]])
    assert walked["c"][1]["a"] == 2
    assert all([w == 2 for w in walked["c"][1]["b"]])
tests for paths 2024-09-24 00:30:06 +00:00			`"""`
			`Test serialization-specific functionality that doesn't need to be`
			`applied across every interface (use test_interface/test_interfaces for that`
			`"""`

linting for tests 2024-10-04 02:57:54 +00:00			`import json`
tests for paths 2024-09-24 00:30:06 +00:00			`from pathlib import Path`
			`from typing import Callable`
linting for tests 2024-10-04 02:57:54 +00:00
			`import h5py`
tests for paths 2024-09-24 00:30:06 +00:00			`import numpy as np`
linting for tests 2024-10-04 02:57:54 +00:00			`import pytest`
tests for paths 2024-09-24 00:30:06 +00:00
linting for tests 2024-10-04 02:57:54 +00:00			`from numpydantic.serialization import _relativize_paths, _walk_and_apply, relative_path`
test walk and apply 2024-09-24 07:15:56 +00:00
test shape ellipsis at last 2024-09-24 00:35:24 +00:00			`pytestmark = pytest.mark.serialization`

tests for paths 2024-09-24 00:30:06 +00:00
			`@pytest.fixture(scope="module")`
			`def hdf5_at_path() -> Callable[[Path], None]:`
			`_path = ""`

			`def _hdf5_at_path(path: Path) -> None:`
			`nonlocal _path`
			`_path = path`
			`h5f = h5py.File(path, "w")`
			`_ = h5f.create_dataset("/data", data=np.array([[1, 2], [3, 4]]))`
			`_ = h5f.create_dataset("subpath/to/dataset", data=np.array([[1, 2], [4, 5]]))`
			`h5f.close()`

			`yield _hdf5_at_path`

			`Path(_path).unlink(missing_ok=True)`


			`def test_relative_path(hdf5_at_path, tmp_output_dir, model_blank):`
			`"""`
			`By default, we should make all paths relative to the cwd`
			`"""`
			`out_path = tmp_output_dir / "relative.h5"`
			`hdf5_at_path(out_path)`
			`model = model_blank(array=(out_path, "/data"))`
			`rt = model.model_dump_json(round_trip=True)`
			`file = json.loads(rt)["array"]["file"]`

			`# should not be absolute`
			`assert not Path(file).is_absolute()`
			`# should be relative to cwd`
			`out_file = (Path.cwd() / file).resolve()`
			`assert out_file == out_path.resolve()`


			`def test_relative_to_path(hdf5_at_path, tmp_output_dir, model_blank):`
			`"""`
			When explicitly passed a path to be ``relative_to`` ,
			`relative to that instead of cwd`
			`"""`
			`out_path = tmp_output_dir / "relative.h5"`
			`relative_to_path = Path(__file__) / "fake_dir" / "sub_fake_dir"`
fix skipping relativize absolute paths that are first-level root directories 2024-09-25 23:54:42 +00:00			`expected_path = Path("../../../__tmp__/relative.h5")`
tests for paths 2024-09-24 00:30:06 +00:00
			`hdf5_at_path(out_path)`
			`model = model_blank(array=(out_path, "/data"))`
			`rt = model.model_dump_json(`
			`round_trip=True, context={"relative_to": str(relative_to_path)}`
			`)`
			`data = json.loads(rt)["array"]`
			`file = data["file"]`

			`# should not be absolute`
			`assert not Path(file).is_absolute()`
			`# should be expected path and reach the file`
fix skipping relativize absolute paths that are first-level root directories 2024-09-25 23:54:42 +00:00			`assert Path(file) == expected_path`
tests for paths 2024-09-24 00:30:06 +00:00			`assert (relative_to_path / file).resolve() == out_path.resolve()`

			# we shouldn't have touched `/data` even though it is pathlike
			`assert data["path"] == "/data"`


fix skipping relativize absolute paths that are first-level root directories 2024-09-25 23:54:42 +00:00			`def test_relative_to_root_dir():`
			`"""`
			`The relativize function should ignore paths that are directories`
			beneath the root directory (eg `/data`) even if they exist

			`"""`
python 3.9 compat 2024-09-26 00:11:02 +00:00			`# python 3.9 compat, which can't use negative indices`
			`test_path = [p for p in Path(__file__).resolve().parents][-2]`
fix skipping relativize absolute paths that are first-level root directories 2024-09-25 23:54:42 +00:00
			`test_data = {"some_field": str(test_path)}`

			`walked = _relativize_paths(test_data, relative_to=".")`
			`assert str(relative_path(test_path, Path(".").resolve())) != str(test_path)`
			`assert walked["some_field"] == str(test_path)`


fix duplicated test name 2024-09-25 03:20:09 +00:00			`def test_absolute_path(hdf5_at_path, tmp_output_dir, model_blank):`
tests for paths 2024-09-24 00:30:06 +00:00			`"""`
			`When told, we make paths absolute`
			`"""`
			`out_path = tmp_output_dir / "relative.h5"`
			`expected_dataset = "subpath/to/dataset"`

			`hdf5_at_path(out_path)`
			`model = model_blank(array=(out_path, expected_dataset))`
			`rt = model.model_dump_json(round_trip=True, context={"absolute_paths": True})`
			`data = json.loads(rt)["array"]`
			`file = data["file"]`

			`# should be absolute and equal to out_path`
			`assert Path(file).is_absolute()`
			`assert Path(file) == out_path.resolve()`

			`# shouldn't have absolutized subpath even if it's pathlike`
			`assert data["path"] == expected_dataset`
test walk and apply 2024-09-24 07:15:56 +00:00

			`def test_walk_and_apply():`
			`"""`
format and also add pdm scripts 2024-10-04 03:01:56 +00:00			`Walk and apply should recursively apply a function to everything in a`
linting for tests 2024-10-04 02:57:54 +00:00			`nesty structure`
test walk and apply 2024-09-24 07:15:56 +00:00			`"""`
			`test = {`
			`"a": 1,`
			`"b": 1,`
			`"c": [`
			`{"a": 1, "b": {"a": 1, "b": 1}, "c": [1, 1, 1]},`
			`{"a": 1, "b": [1, 1, 1]},`
			`],`
			`}`

			`def _mult_2(v, skip: bool = False):`
			`return v * 2`

			`def _assert_2(v, skip: bool = False):`
			`assert v == 2`
			`return v`

			`walked = _walk_and_apply(test, _mult_2)`
			`_walk_and_apply(walked, _assert_2)`

			`assert walked["a"] == 2`
			`assert walked["c"][0]["a"] == 2`
			`assert walked["c"][0]["b"]["a"] == 2`
			`assert all([w == 2 for w in walked["c"][0]["c"]])`
			`assert walked["c"][1]["a"] == 2`
			`assert all([w == 2 for w in walked["c"][1]["b"]])`