adjusting array adapters to linkml arrays

2025-01-10 06:04:28 +00:00 · 2024-07-03 00:41:16 -07:00 · 2024-07-03 00:41:16 -07:00 · 087064be48
commit 087064be48
parent 0606221ab0
8 changed files with 302 additions and 15 deletions
--- a/nwb_linkml/src/nwb_linkml/adapters/init.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/init.py
@ -3,6 +3,7 @@ Adapter classes for translating from NWB schema language to LinkML
 """

 from nwb_linkml.adapters.adapter import Adapter, BuildResult
+from nwb_linkml.adapters.array import ArrayAdapter
 from nwb_linkml.adapters.classes import ClassAdapter
 from nwb_linkml.adapters.dataset import DatasetAdapter
 from nwb_linkml.adapters.group import GroupAdapter
--- a/nwb_linkml/src/nwb_linkml/adapters/array.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/array.py
@ -0,0 +1,109 @@
+"""
+Generator for array ranges from nwb dims/ranges
+"""
+
+from itertools import zip_longest
+from typing import Dict, List, Literal, Optional, Union, NamedTuple, TypeAlias
+from linkml_runtime.linkml_model.meta import (
+    ClassDefinition,
+    SlotDefinition,
+    ArrayExpression,
+    DimensionExpression,
+)
+import warnings
+
+from nwb_linkml.types.nwb import DIMS_LIST, DIMS_TYPE, SHAPE_LIST, SHAPE_TYPE
+
+
+class Dimension(NamedTuple):
+    """A single dimension/shape pair"""
+
+    dims: Optional[str] = None
+    shape: [Optional[int]] = None
+
+
+class Shape(tuple[Dimension]):
+    """
+    A collection of :class:`.Dimension` tuples representing one of the nested layers in
+    a dims/shape spec
+    """
+
+
+class ArrayAdapter:
+    """
+    Adapter that generates a :class:`.ArrayExpression` (or set of them)
+    from a NWB dims/shape declaration
+    """
+
+    def __init__(self, dims: DIMS_TYPE, shape: SHAPE_TYPE):
+        self.dims = dims
+        self.shape = shape
+
+    def pivot_dims(
+        self, dims: Optional[DIMS_TYPE] = None, shape: Optional[SHAPE_TYPE] = None
+    ) -> List[Shape]:
+        """
+        Pivot from a list of dims and a list of shape to a list of (dim, shape) tuples
+        """
+        if dims is None:
+            dims = self.dims
+        if shape is None:
+            shape = self.shape
+
+        if len(dims) != len(shape):
+            warnings.warn(
+                f"dims ({len(dims)} and shape ({len(shape)}) are not the same length!!! "
+                "Your schema is formatted badly"
+            )
+
+        def _iter_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE) -> List[Shape] | Shape:
+            shapes = []
+            for inner_dim, inner_shape in zip(dims, shape):
+                if isinstance(inner_shape, list):
+                    # list of lists
+
+                    # some badly formatted schema will have shape be a LoL but only provide a single
+                    # set of names at the top level. Best we can do is repeat it and pray
+                    # that it is the same size as the longest dims
+                    if not isinstance(inner_dim, list):
+                        inner_dim = dims
+
+                    shapes.append(_iter_dims(inner_dim, inner_shape))
+                else:
+                    # single-layer list
+                    shapes.append(Dimension(inner_dim, inner_shape))
+            if all([isinstance(x, Dimension) for x in shapes]):
+                shapes = Shape(shapes)
+            return shapes
+
+        shapes = _iter_dims(dims, shape)
+
+        if not all([isinstance(x, Shape) for x in shapes]):
+            # single-layered spec, wrap it
+            shapes = [shapes]
+
+        return shapes
+
+    def make_expression(self, shape: Shape) -> ArrayExpression:
+        """
+        Create the corresponding array specification from a shape
+        """
+        dims = [DimensionExpression(alias=dim.dims, exact_cardinality=dim.shape) for dim in shape]
+        return ArrayExpression(dimensions=dims)
+
+    def make(self) -> List[ArrayExpression]:
+        """Create an array specification from self.dims and self.shape"""
+        shapes = self.pivot_dims()
+        expressions = [self.make_expression(shape) for shape in shapes]
+        return expressions
+
+    def make_slot(self) -> Union[Dict[Literal['array'], ArrayExpression], Dict[Literal['any_of'], Dict[Literal['array'],List[ArrayExpression]]]]:
+        """
+        Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition,
+        taking into account needing to use ``any_of`` for multiple array range specifications.
+        """
+        expressions = self.make()
+        if len(expressions) == 1:
+            return {'array': expressions[0]}
+        else:
+            return {'any_of': [{'array': expression} for expression in expressions]}
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -5,8 +5,14 @@ Adapter for NWB datasets to linkml Classes
 from abc import abstractmethod
 from typing import Optional

-from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
+from linkml_runtime.linkml_model.meta import (
+    ClassDefinition,
+    SlotDefinition,
+    ArrayExpression,
+    DimensionExpression,
+)

+from nwb_linkml.adapters.array import ArrayAdapter
 from nwb_linkml.adapters.adapter import BuildResult
 from nwb_linkml.adapters.classes import ClassAdapter
 from nwb_linkml.maps import QUANTITY_MAP, Map
@ -233,19 +239,20 @@ class MapArraylike(DatasetMap):
        """
        Map to an array class and the adjoining slot
        """
-        array_class = make_arraylike(cls, name)
+        array_adapter = ArrayAdapter(cls.dims, cls.shape)
+        expressions = array_adapter.make_slot()
        name = camel_to_snake(cls.name)
        res = BuildResult(
            slots=[
                SlotDefinition(
                    name=name,
                    multivalued=False,
-                    range=array_class.name,
+                    range=ClassAdapter.handle_dtype(cls.dtype),
                    description=cls.doc,
                    required=cls.quantity not in ("*", "?"),
+                    **expressions
                )
-            ],
-            classes=[array_class],
+            ]
        )
        return res

@ -287,12 +294,11 @@ class MapArrayLikeAttributes(DatasetMap):
        """
        Map to an arraylike class
        """
-        array_class = make_arraylike(cls, name)
+        array_adapter = ArrayAdapter(cls.dims, cls.shape)
+        expressions = array_adapter.make_slot()
        # make a slot for the arraylike class
-        array_slot = SlotDefinition(name="array", range=array_class.name)
-
-        res.classes.append(array_class)
-        res.classes[0].attributes.update({"array": array_slot})
+        array_slot = SlotDefinition(name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions)
+        res.classes[0].attributes.update({'array':array_slot})
        return res


@ -405,7 +411,7 @@ class DatasetAdapter(ClassAdapter):
        return res


-def make_arraylike(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
+def make_array_range(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
    """
    Create a containing arraylike class

--- a/nwb_linkml/src/nwb_linkml/adapters/schema.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/schema.py
@ -2,7 +2,7 @@
 I don't know if NWB necessarily has a term for a single nwb schema file, so we're going
 to call them "schema" objects
 """
-
+import pdb
 from pathlib import Path
 from typing import List, Optional, Type

@ -74,9 +74,15 @@ class SchemaAdapter(Adapter):
        """
        res = BuildResult()
        for dset in self.datasets:
-            res += DatasetAdapter(cls=dset).build()
+            new_res = DatasetAdapter(cls=dset).build()
+            if len(new_res.slots)>0:
+                pdb.set_trace()
+            res += new_res
        for group in self.groups:
-            res += GroupAdapter(cls=group).build()
+            new_res = GroupAdapter(cls=group).build()
+            if len(new_res.slots)>0:
+                pdb.set_trace()
+            res += new_res

        if (
            len(res.slots) > 0
--- a/nwb_linkml/src/nwb_linkml/monkeypatch.py
+++ b/nwb_linkml/src/nwb_linkml/monkeypatch.py
@ -115,9 +115,84 @@ def patch_schemaview() -> None:

    SchemaView.imports_closure = imports_closure

+def patch_array_expression() -> None:
+    """
+    Allow SlotDefinitions to use `any_of` with `array`
+
+    see: https://github.com/linkml/linkml-model/issues/199
+    """
+    from dataclasses import make_dataclass, field
+    from linkml_runtime.linkml_model import meta
+    from typing import Optional
+    new_dataclass = make_dataclass('AnonymousSlotExpression', fields=[('array', Optional[meta.ArrayExpression], field(default=None))], bases=(meta.AnonymousSlotExpression,))
+    meta.AnonymousSlotExpression = new_dataclass
+
+def patch_pretty_print() -> None:
+    """
+    Fix the godforsaken linkml dataclass reprs
+
+    See: https://github.com/linkml/linkml-runtime/pull/314
+    """
+    import re
+    from pprint import pformat
+    from typing import Any
+    import textwrap
+    from dataclasses import is_dataclass, make_dataclass, field
+    from linkml_runtime.linkml_model import meta
+    from linkml_runtime.utils.formatutils import items
+
+    def _pformat(fields: dict, cls_name: str, indent: str = '  ') -> str:
+        """
+        pretty format the fields of the items of a ``YAMLRoot`` object without the wonky indentation of pformat.
+        see ``YAMLRoot.__repr__``.
+        formatting is similar to black - items at similar levels of nesting have similar levels of indentation,
+        rather than getting placed at essentially random levels of indentation depending on what came before them.
+        """
+        res = []
+        total_len = 0
+        for key, val in fields:
+            if val == [] or val == {} or val is None:
+                continue
+            # pformat handles everything else that isn't a YAMLRoot object, but it sure does look ugly
+            # use it to split lines and as the thing of last resort, but otherwise indent = 0, we'll do that
+            val_str = pformat(val, indent=0, compact=True, sort_dicts=False)
+            # now we indent everything except the first line by indenting and then using regex to remove just the first indent
+            val_str = re.sub(rf'\A{re.escape(indent)}', '', textwrap.indent(val_str, indent))
+            # now recombine with the key in a format that can be re-eval'd into an object if indent is just whitespace
+            val_str = f"'{key}': " + val_str
+
+            # count the total length of this string so we know if we need to linebreak or not later
+            total_len += len(val_str)
+            res.append(val_str)
+
+        if total_len > 80:
+            inside = ',\n'.join(res)
+            # we indent twice - once for the inner contents of every inner object, and one to
+            # offset from the root element. that keeps us from needing to be recursive except for the
+            # single pformat call
+            inside = textwrap.indent(inside, indent)
+            return cls_name + '({\n' + inside + '\n})'
+        else:
+            return cls_name + '({' + ', '.join(res) + '})'
+
+    def __repr__(self):
+        return _pformat(items(self), self.__class__.__name__)
+
+    for cls_name in dir(meta):
+        cls = getattr(meta, cls_name)
+        if is_dataclass(cls):
+            new_dataclass = make_dataclass(cls.__name__,fields=[('__dummy__', Any,  field(default=None))], bases=(cls,), repr=False)
+            new_dataclass.__repr__ = __repr__
+            new_dataclass.__str__ = __repr__
+            setattr(meta, cls.__name__, new_dataclass)
+
+
+

 def apply_patches() -> None:
    """Apply all monkeypatches"""
    patch_npytyping_perf()
    patch_nptyping_warnings()
    patch_schemaview()
+    patch_array_expression()
+    patch_pretty_print()
--- a/nwb_linkml/src/nwb_linkml/types/nwb.py
+++ b/nwb_linkml/src/nwb_linkml/types/nwb.py
@ -0,0 +1,17 @@
+"""
+Type annotations for NWB schema language types
+"""
+
+from typing import List, Union, TypeAlias
+
+DIMS_LIST: TypeAlias = List[Union[str, None]]
+"""A single-dimension dims specification"""
+
+DIMS_TYPE: TypeAlias = Union[DIMS_LIST, List[DIMS_LIST]]
+"""``dims`` in the nwb schema language"""
+
+SHAPE_LIST: TypeAlias = List[Union[str, None]]
+"""A single-dimension shape specification"""
+
+SHAPE_TYPE: TypeAlias = Union[SHAPE_LIST, List[SHAPE_LIST]]
+"""``shape`` in the nwb schema language"""
--- a/nwb_linkml/tests/test_adapters/test_adapter_array.py
+++ b/nwb_linkml/tests/test_adapters/test_adapter_array.py
@ -0,0 +1,73 @@
+import pdb
+
+import pytest
+
+from typing import Tuple
+from nwb_linkml.types.nwb import DIMS_TYPE, SHAPE_TYPE
+from nwb_linkml.adapters.array import ArrayAdapter, Dimension, Shape
+
+# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 2]], [],
+#              id='multi shape inconsistent dims'),
+# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim2']], [[1], [1, 2], [1, 3]], [],
+#              id='multi shape inconsistent shape'),
+# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 3]], [],
+#              id='multi shape inconsistent both'),
+
+
+@pytest.mark.parametrize(
+    "dims,shape,expected",
+    [
+        pytest.param(
+            ["dim1", "dim2", "dim3"],
+            [1, 2, 3],
+            [
+                Shape(
+                    [
+                        Dimension(dims="dim1", shape=1),
+                        Dimension(dims="dim2", shape=2),
+                        Dimension(dims="dim3", shape=3),
+                    ]
+                )
+            ],
+            id="single shape",
+        ),
+        pytest.param(
+            [["dim1"], ["dim1", "dim2"], ["dim1", "dim2", "dim3"]],
+            [[1], [1, 2], [1, 2, 3]],
+            [
+                Shape(
+                    [Dimension(dims="dim1", shape=1)],
+                ),
+                Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
+                Shape(
+                    (
+                        Dimension(dims="dim1", shape=1),
+                        Dimension(dims="dim2", shape=2),
+                        Dimension(dims="dim3", shape=3),
+                    )
+                ),
+            ],
+            id="multi shape",
+        ),
+        pytest.param(
+            ["dim1", "dim2", "dim3"],
+            [[1], [1, 2], [1, 2, 3]],
+            [
+                Shape([Dimension(dims="dim1", shape=1)]),
+                Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
+                Shape(
+                    (
+                        Dimension(dims="dim1", shape=1),
+                        Dimension(dims="dim2", shape=2),
+                        Dimension(dims="dim3", shape=3),
+                    )
+                ),
+            ],
+            id="malformed abbreviated dims spec",
+        ),
+    ],
+)
+def test_pivot_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE, expected):
+    adapter = ArrayAdapter(dims, shape)
+    pivoted = adapter.pivot_dims()
+    assert pivoted == expected
--- a/nwb_linkml/tests/test_adapters/test_adapter_classes.py
+++ b/nwb_linkml/tests/test_adapters/test_adapter_classes.py
@ -36,7 +36,7 @@ def test_build_base(nwb_schema):
    assert len(base.classes) == 1
    img = base.classes[0]
    assert len(img.attributes) == 4
-    assert img.attributes["newslot"] is slot
+    assert img.attributes["newslot"] == slot


 def test_get_attr_name():