From 087064be48296a7025dc6dd5a026a11c331e8b8a Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Wed, 3 Jul 2024 00:41:16 -0700 Subject: [PATCH] adjusting array adapters to linkml arrays --- .../src/nwb_linkml/adapters/__init__.py | 1 + nwb_linkml/src/nwb_linkml/adapters/array.py | 109 ++++++++++++++++++ nwb_linkml/src/nwb_linkml/adapters/dataset.py | 28 +++-- nwb_linkml/src/nwb_linkml/adapters/schema.py | 12 +- nwb_linkml/src/nwb_linkml/monkeypatch.py | 75 ++++++++++++ nwb_linkml/src/nwb_linkml/types/nwb.py | 17 +++ .../tests/test_adapters/test_adapter_array.py | 73 ++++++++++++ .../test_adapters/test_adapter_classes.py | 2 +- 8 files changed, 302 insertions(+), 15 deletions(-) create mode 100644 nwb_linkml/src/nwb_linkml/adapters/array.py create mode 100644 nwb_linkml/src/nwb_linkml/types/nwb.py create mode 100644 nwb_linkml/tests/test_adapters/test_adapter_array.py diff --git a/nwb_linkml/src/nwb_linkml/adapters/__init__.py b/nwb_linkml/src/nwb_linkml/adapters/__init__.py index db1da1f..df7ac3c 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/__init__.py +++ b/nwb_linkml/src/nwb_linkml/adapters/__init__.py @@ -3,6 +3,7 @@ Adapter classes for translating from NWB schema language to LinkML """ from nwb_linkml.adapters.adapter import Adapter, BuildResult +from nwb_linkml.adapters.array import ArrayAdapter from nwb_linkml.adapters.classes import ClassAdapter from nwb_linkml.adapters.dataset import DatasetAdapter from nwb_linkml.adapters.group import GroupAdapter diff --git a/nwb_linkml/src/nwb_linkml/adapters/array.py b/nwb_linkml/src/nwb_linkml/adapters/array.py new file mode 100644 index 0000000..ecd34d7 --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/adapters/array.py @@ -0,0 +1,109 @@ +""" +Generator for array ranges from nwb dims/ranges +""" + +from itertools import zip_longest +from typing import Dict, List, Literal, Optional, Union, NamedTuple, TypeAlias +from linkml_runtime.linkml_model.meta import ( + ClassDefinition, + SlotDefinition, + ArrayExpression, + DimensionExpression, +) +import warnings + +from nwb_linkml.types.nwb import DIMS_LIST, DIMS_TYPE, SHAPE_LIST, SHAPE_TYPE + + +class Dimension(NamedTuple): + """A single dimension/shape pair""" + + dims: Optional[str] = None + shape: [Optional[int]] = None + + +class Shape(tuple[Dimension]): + """ + A collection of :class:`.Dimension` tuples representing one of the nested layers in + a dims/shape spec + """ + + +class ArrayAdapter: + """ + Adapter that generates a :class:`.ArrayExpression` (or set of them) + from a NWB dims/shape declaration + """ + + def __init__(self, dims: DIMS_TYPE, shape: SHAPE_TYPE): + self.dims = dims + self.shape = shape + + def pivot_dims( + self, dims: Optional[DIMS_TYPE] = None, shape: Optional[SHAPE_TYPE] = None + ) -> List[Shape]: + """ + Pivot from a list of dims and a list of shape to a list of (dim, shape) tuples + """ + if dims is None: + dims = self.dims + if shape is None: + shape = self.shape + + if len(dims) != len(shape): + warnings.warn( + f"dims ({len(dims)} and shape ({len(shape)}) are not the same length!!! " + "Your schema is formatted badly" + ) + + def _iter_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE) -> List[Shape] | Shape: + shapes = [] + for inner_dim, inner_shape in zip(dims, shape): + if isinstance(inner_shape, list): + # list of lists + + # some badly formatted schema will have shape be a LoL but only provide a single + # set of names at the top level. Best we can do is repeat it and pray + # that it is the same size as the longest dims + if not isinstance(inner_dim, list): + inner_dim = dims + + shapes.append(_iter_dims(inner_dim, inner_shape)) + else: + # single-layer list + shapes.append(Dimension(inner_dim, inner_shape)) + if all([isinstance(x, Dimension) for x in shapes]): + shapes = Shape(shapes) + return shapes + + shapes = _iter_dims(dims, shape) + + if not all([isinstance(x, Shape) for x in shapes]): + # single-layered spec, wrap it + shapes = [shapes] + + return shapes + + def make_expression(self, shape: Shape) -> ArrayExpression: + """ + Create the corresponding array specification from a shape + """ + dims = [DimensionExpression(alias=dim.dims, exact_cardinality=dim.shape) for dim in shape] + return ArrayExpression(dimensions=dims) + + def make(self) -> List[ArrayExpression]: + """Create an array specification from self.dims and self.shape""" + shapes = self.pivot_dims() + expressions = [self.make_expression(shape) for shape in shapes] + return expressions + + def make_slot(self) -> Union[Dict[Literal['array'], ArrayExpression], Dict[Literal['any_of'], Dict[Literal['array'],List[ArrayExpression]]]]: + """ + Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition, + taking into account needing to use ``any_of`` for multiple array range specifications. + """ + expressions = self.make() + if len(expressions) == 1: + return {'array': expressions[0]} + else: + return {'any_of': [{'array': expression} for expression in expressions]} \ No newline at end of file diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index 2f77c1f..8ddee47 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -5,8 +5,14 @@ Adapter for NWB datasets to linkml Classes from abc import abstractmethod from typing import Optional -from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition +from linkml_runtime.linkml_model.meta import ( + ClassDefinition, + SlotDefinition, + ArrayExpression, + DimensionExpression, +) +from nwb_linkml.adapters.array import ArrayAdapter from nwb_linkml.adapters.adapter import BuildResult from nwb_linkml.adapters.classes import ClassAdapter from nwb_linkml.maps import QUANTITY_MAP, Map @@ -233,19 +239,20 @@ class MapArraylike(DatasetMap): """ Map to an array class and the adjoining slot """ - array_class = make_arraylike(cls, name) + array_adapter = ArrayAdapter(cls.dims, cls.shape) + expressions = array_adapter.make_slot() name = camel_to_snake(cls.name) res = BuildResult( slots=[ SlotDefinition( name=name, multivalued=False, - range=array_class.name, + range=ClassAdapter.handle_dtype(cls.dtype), description=cls.doc, required=cls.quantity not in ("*", "?"), + **expressions ) - ], - classes=[array_class], + ] ) return res @@ -287,12 +294,11 @@ class MapArrayLikeAttributes(DatasetMap): """ Map to an arraylike class """ - array_class = make_arraylike(cls, name) + array_adapter = ArrayAdapter(cls.dims, cls.shape) + expressions = array_adapter.make_slot() # make a slot for the arraylike class - array_slot = SlotDefinition(name="array", range=array_class.name) - - res.classes.append(array_class) - res.classes[0].attributes.update({"array": array_slot}) + array_slot = SlotDefinition(name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions) + res.classes[0].attributes.update({'array':array_slot}) return res @@ -405,7 +411,7 @@ class DatasetAdapter(ClassAdapter): return res -def make_arraylike(cls: Dataset, name: Optional[str] = None) -> ClassDefinition: +def make_array_range(cls: Dataset, name: Optional[str] = None) -> ClassDefinition: """ Create a containing arraylike class diff --git a/nwb_linkml/src/nwb_linkml/adapters/schema.py b/nwb_linkml/src/nwb_linkml/adapters/schema.py index 3138ec0..86ceb9e 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/schema.py +++ b/nwb_linkml/src/nwb_linkml/adapters/schema.py @@ -2,7 +2,7 @@ I don't know if NWB necessarily has a term for a single nwb schema file, so we're going to call them "schema" objects """ - +import pdb from pathlib import Path from typing import List, Optional, Type @@ -74,9 +74,15 @@ class SchemaAdapter(Adapter): """ res = BuildResult() for dset in self.datasets: - res += DatasetAdapter(cls=dset).build() + new_res = DatasetAdapter(cls=dset).build() + if len(new_res.slots)>0: + pdb.set_trace() + res += new_res for group in self.groups: - res += GroupAdapter(cls=group).build() + new_res = GroupAdapter(cls=group).build() + if len(new_res.slots)>0: + pdb.set_trace() + res += new_res if ( len(res.slots) > 0 diff --git a/nwb_linkml/src/nwb_linkml/monkeypatch.py b/nwb_linkml/src/nwb_linkml/monkeypatch.py index 1ab1eda..bf11b28 100644 --- a/nwb_linkml/src/nwb_linkml/monkeypatch.py +++ b/nwb_linkml/src/nwb_linkml/monkeypatch.py @@ -115,9 +115,84 @@ def patch_schemaview() -> None: SchemaView.imports_closure = imports_closure +def patch_array_expression() -> None: + """ + Allow SlotDefinitions to use `any_of` with `array` + + see: https://github.com/linkml/linkml-model/issues/199 + """ + from dataclasses import make_dataclass, field + from linkml_runtime.linkml_model import meta + from typing import Optional + new_dataclass = make_dataclass('AnonymousSlotExpression', fields=[('array', Optional[meta.ArrayExpression], field(default=None))], bases=(meta.AnonymousSlotExpression,)) + meta.AnonymousSlotExpression = new_dataclass + +def patch_pretty_print() -> None: + """ + Fix the godforsaken linkml dataclass reprs + + See: https://github.com/linkml/linkml-runtime/pull/314 + """ + import re + from pprint import pformat + from typing import Any + import textwrap + from dataclasses import is_dataclass, make_dataclass, field + from linkml_runtime.linkml_model import meta + from linkml_runtime.utils.formatutils import items + + def _pformat(fields: dict, cls_name: str, indent: str = ' ') -> str: + """ + pretty format the fields of the items of a ``YAMLRoot`` object without the wonky indentation of pformat. + see ``YAMLRoot.__repr__``. + formatting is similar to black - items at similar levels of nesting have similar levels of indentation, + rather than getting placed at essentially random levels of indentation depending on what came before them. + """ + res = [] + total_len = 0 + for key, val in fields: + if val == [] or val == {} or val is None: + continue + # pformat handles everything else that isn't a YAMLRoot object, but it sure does look ugly + # use it to split lines and as the thing of last resort, but otherwise indent = 0, we'll do that + val_str = pformat(val, indent=0, compact=True, sort_dicts=False) + # now we indent everything except the first line by indenting and then using regex to remove just the first indent + val_str = re.sub(rf'\A{re.escape(indent)}', '', textwrap.indent(val_str, indent)) + # now recombine with the key in a format that can be re-eval'd into an object if indent is just whitespace + val_str = f"'{key}': " + val_str + + # count the total length of this string so we know if we need to linebreak or not later + total_len += len(val_str) + res.append(val_str) + + if total_len > 80: + inside = ',\n'.join(res) + # we indent twice - once for the inner contents of every inner object, and one to + # offset from the root element. that keeps us from needing to be recursive except for the + # single pformat call + inside = textwrap.indent(inside, indent) + return cls_name + '({\n' + inside + '\n})' + else: + return cls_name + '({' + ', '.join(res) + '})' + + def __repr__(self): + return _pformat(items(self), self.__class__.__name__) + + for cls_name in dir(meta): + cls = getattr(meta, cls_name) + if is_dataclass(cls): + new_dataclass = make_dataclass(cls.__name__,fields=[('__dummy__', Any, field(default=None))], bases=(cls,), repr=False) + new_dataclass.__repr__ = __repr__ + new_dataclass.__str__ = __repr__ + setattr(meta, cls.__name__, new_dataclass) + + + def apply_patches() -> None: """Apply all monkeypatches""" patch_npytyping_perf() patch_nptyping_warnings() patch_schemaview() + patch_array_expression() + patch_pretty_print() diff --git a/nwb_linkml/src/nwb_linkml/types/nwb.py b/nwb_linkml/src/nwb_linkml/types/nwb.py new file mode 100644 index 0000000..91a8133 --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/types/nwb.py @@ -0,0 +1,17 @@ +""" +Type annotations for NWB schema language types +""" + +from typing import List, Union, TypeAlias + +DIMS_LIST: TypeAlias = List[Union[str, None]] +"""A single-dimension dims specification""" + +DIMS_TYPE: TypeAlias = Union[DIMS_LIST, List[DIMS_LIST]] +"""``dims`` in the nwb schema language""" + +SHAPE_LIST: TypeAlias = List[Union[str, None]] +"""A single-dimension shape specification""" + +SHAPE_TYPE: TypeAlias = Union[SHAPE_LIST, List[SHAPE_LIST]] +"""``shape`` in the nwb schema language""" diff --git a/nwb_linkml/tests/test_adapters/test_adapter_array.py b/nwb_linkml/tests/test_adapters/test_adapter_array.py new file mode 100644 index 0000000..26bdfc9 --- /dev/null +++ b/nwb_linkml/tests/test_adapters/test_adapter_array.py @@ -0,0 +1,73 @@ +import pdb + +import pytest + +from typing import Tuple +from nwb_linkml.types.nwb import DIMS_TYPE, SHAPE_TYPE +from nwb_linkml.adapters.array import ArrayAdapter, Dimension, Shape + +# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 2]], [], +# id='multi shape inconsistent dims'), +# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim2']], [[1], [1, 2], [1, 3]], [], +# id='multi shape inconsistent shape'), +# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 3]], [], +# id='multi shape inconsistent both'), + + +@pytest.mark.parametrize( + "dims,shape,expected", + [ + pytest.param( + ["dim1", "dim2", "dim3"], + [1, 2, 3], + [ + Shape( + [ + Dimension(dims="dim1", shape=1), + Dimension(dims="dim2", shape=2), + Dimension(dims="dim3", shape=3), + ] + ) + ], + id="single shape", + ), + pytest.param( + [["dim1"], ["dim1", "dim2"], ["dim1", "dim2", "dim3"]], + [[1], [1, 2], [1, 2, 3]], + [ + Shape( + [Dimension(dims="dim1", shape=1)], + ), + Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))), + Shape( + ( + Dimension(dims="dim1", shape=1), + Dimension(dims="dim2", shape=2), + Dimension(dims="dim3", shape=3), + ) + ), + ], + id="multi shape", + ), + pytest.param( + ["dim1", "dim2", "dim3"], + [[1], [1, 2], [1, 2, 3]], + [ + Shape([Dimension(dims="dim1", shape=1)]), + Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))), + Shape( + ( + Dimension(dims="dim1", shape=1), + Dimension(dims="dim2", shape=2), + Dimension(dims="dim3", shape=3), + ) + ), + ], + id="malformed abbreviated dims spec", + ), + ], +) +def test_pivot_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE, expected): + adapter = ArrayAdapter(dims, shape) + pivoted = adapter.pivot_dims() + assert pivoted == expected diff --git a/nwb_linkml/tests/test_adapters/test_adapter_classes.py b/nwb_linkml/tests/test_adapters/test_adapter_classes.py index 3cc173e..6ae9361 100644 --- a/nwb_linkml/tests/test_adapters/test_adapter_classes.py +++ b/nwb_linkml/tests/test_adapters/test_adapter_classes.py @@ -36,7 +36,7 @@ def test_build_base(nwb_schema): assert len(base.classes) == 1 img = base.classes[0] assert len(img.attributes) == 4 - assert img.attributes["newslot"] is slot + assert img.attributes["newslot"] == slot def test_get_attr_name():