adjusting array adapters to linkml arrays

This commit is contained in:
sneakers-the-rat 2024-07-03 00:41:16 -07:00
parent 0606221ab0
commit 087064be48
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 302 additions and 15 deletions

View file

@ -3,6 +3,7 @@ Adapter classes for translating from NWB schema language to LinkML
""" """
from nwb_linkml.adapters.adapter import Adapter, BuildResult from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.array import ArrayAdapter
from nwb_linkml.adapters.classes import ClassAdapter from nwb_linkml.adapters.classes import ClassAdapter
from nwb_linkml.adapters.dataset import DatasetAdapter from nwb_linkml.adapters.dataset import DatasetAdapter
from nwb_linkml.adapters.group import GroupAdapter from nwb_linkml.adapters.group import GroupAdapter

View file

@ -0,0 +1,109 @@
"""
Generator for array ranges from nwb dims/ranges
"""
from itertools import zip_longest
from typing import Dict, List, Literal, Optional, Union, NamedTuple, TypeAlias
from linkml_runtime.linkml_model.meta import (
ClassDefinition,
SlotDefinition,
ArrayExpression,
DimensionExpression,
)
import warnings
from nwb_linkml.types.nwb import DIMS_LIST, DIMS_TYPE, SHAPE_LIST, SHAPE_TYPE
class Dimension(NamedTuple):
"""A single dimension/shape pair"""
dims: Optional[str] = None
shape: [Optional[int]] = None
class Shape(tuple[Dimension]):
"""
A collection of :class:`.Dimension` tuples representing one of the nested layers in
a dims/shape spec
"""
class ArrayAdapter:
"""
Adapter that generates a :class:`.ArrayExpression` (or set of them)
from a NWB dims/shape declaration
"""
def __init__(self, dims: DIMS_TYPE, shape: SHAPE_TYPE):
self.dims = dims
self.shape = shape
def pivot_dims(
self, dims: Optional[DIMS_TYPE] = None, shape: Optional[SHAPE_TYPE] = None
) -> List[Shape]:
"""
Pivot from a list of dims and a list of shape to a list of (dim, shape) tuples
"""
if dims is None:
dims = self.dims
if shape is None:
shape = self.shape
if len(dims) != len(shape):
warnings.warn(
f"dims ({len(dims)} and shape ({len(shape)}) are not the same length!!! "
"Your schema is formatted badly"
)
def _iter_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE) -> List[Shape] | Shape:
shapes = []
for inner_dim, inner_shape in zip(dims, shape):
if isinstance(inner_shape, list):
# list of lists
# some badly formatted schema will have shape be a LoL but only provide a single
# set of names at the top level. Best we can do is repeat it and pray
# that it is the same size as the longest dims
if not isinstance(inner_dim, list):
inner_dim = dims
shapes.append(_iter_dims(inner_dim, inner_shape))
else:
# single-layer list
shapes.append(Dimension(inner_dim, inner_shape))
if all([isinstance(x, Dimension) for x in shapes]):
shapes = Shape(shapes)
return shapes
shapes = _iter_dims(dims, shape)
if not all([isinstance(x, Shape) for x in shapes]):
# single-layered spec, wrap it
shapes = [shapes]
return shapes
def make_expression(self, shape: Shape) -> ArrayExpression:
"""
Create the corresponding array specification from a shape
"""
dims = [DimensionExpression(alias=dim.dims, exact_cardinality=dim.shape) for dim in shape]
return ArrayExpression(dimensions=dims)
def make(self) -> List[ArrayExpression]:
"""Create an array specification from self.dims and self.shape"""
shapes = self.pivot_dims()
expressions = [self.make_expression(shape) for shape in shapes]
return expressions
def make_slot(self) -> Union[Dict[Literal['array'], ArrayExpression], Dict[Literal['any_of'], Dict[Literal['array'],List[ArrayExpression]]]]:
"""
Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition,
taking into account needing to use ``any_of`` for multiple array range specifications.
"""
expressions = self.make()
if len(expressions) == 1:
return {'array': expressions[0]}
else:
return {'any_of': [{'array': expression} for expression in expressions]}

View file

@ -5,8 +5,14 @@ Adapter for NWB datasets to linkml Classes
from abc import abstractmethod from abc import abstractmethod
from typing import Optional from typing import Optional
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition from linkml_runtime.linkml_model.meta import (
ClassDefinition,
SlotDefinition,
ArrayExpression,
DimensionExpression,
)
from nwb_linkml.adapters.array import ArrayAdapter
from nwb_linkml.adapters.adapter import BuildResult from nwb_linkml.adapters.adapter import BuildResult
from nwb_linkml.adapters.classes import ClassAdapter from nwb_linkml.adapters.classes import ClassAdapter
from nwb_linkml.maps import QUANTITY_MAP, Map from nwb_linkml.maps import QUANTITY_MAP, Map
@ -233,19 +239,20 @@ class MapArraylike(DatasetMap):
""" """
Map to an array class and the adjoining slot Map to an array class and the adjoining slot
""" """
array_class = make_arraylike(cls, name) array_adapter = ArrayAdapter(cls.dims, cls.shape)
expressions = array_adapter.make_slot()
name = camel_to_snake(cls.name) name = camel_to_snake(cls.name)
res = BuildResult( res = BuildResult(
slots=[ slots=[
SlotDefinition( SlotDefinition(
name=name, name=name,
multivalued=False, multivalued=False,
range=array_class.name, range=ClassAdapter.handle_dtype(cls.dtype),
description=cls.doc, description=cls.doc,
required=cls.quantity not in ("*", "?"), required=cls.quantity not in ("*", "?"),
**expressions
) )
], ]
classes=[array_class],
) )
return res return res
@ -287,12 +294,11 @@ class MapArrayLikeAttributes(DatasetMap):
""" """
Map to an arraylike class Map to an arraylike class
""" """
array_class = make_arraylike(cls, name) array_adapter = ArrayAdapter(cls.dims, cls.shape)
expressions = array_adapter.make_slot()
# make a slot for the arraylike class # make a slot for the arraylike class
array_slot = SlotDefinition(name="array", range=array_class.name) array_slot = SlotDefinition(name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions)
res.classes[0].attributes.update({'array':array_slot})
res.classes.append(array_class)
res.classes[0].attributes.update({"array": array_slot})
return res return res
@ -405,7 +411,7 @@ class DatasetAdapter(ClassAdapter):
return res return res
def make_arraylike(cls: Dataset, name: Optional[str] = None) -> ClassDefinition: def make_array_range(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
""" """
Create a containing arraylike class Create a containing arraylike class

View file

@ -2,7 +2,7 @@
I don't know if NWB necessarily has a term for a single nwb schema file, so we're going I don't know if NWB necessarily has a term for a single nwb schema file, so we're going
to call them "schema" objects to call them "schema" objects
""" """
import pdb
from pathlib import Path from pathlib import Path
from typing import List, Optional, Type from typing import List, Optional, Type
@ -74,9 +74,15 @@ class SchemaAdapter(Adapter):
""" """
res = BuildResult() res = BuildResult()
for dset in self.datasets: for dset in self.datasets:
res += DatasetAdapter(cls=dset).build() new_res = DatasetAdapter(cls=dset).build()
if len(new_res.slots)>0:
pdb.set_trace()
res += new_res
for group in self.groups: for group in self.groups:
res += GroupAdapter(cls=group).build() new_res = GroupAdapter(cls=group).build()
if len(new_res.slots)>0:
pdb.set_trace()
res += new_res
if ( if (
len(res.slots) > 0 len(res.slots) > 0

View file

@ -115,9 +115,84 @@ def patch_schemaview() -> None:
SchemaView.imports_closure = imports_closure SchemaView.imports_closure = imports_closure
def patch_array_expression() -> None:
"""
Allow SlotDefinitions to use `any_of` with `array`
see: https://github.com/linkml/linkml-model/issues/199
"""
from dataclasses import make_dataclass, field
from linkml_runtime.linkml_model import meta
from typing import Optional
new_dataclass = make_dataclass('AnonymousSlotExpression', fields=[('array', Optional[meta.ArrayExpression], field(default=None))], bases=(meta.AnonymousSlotExpression,))
meta.AnonymousSlotExpression = new_dataclass
def patch_pretty_print() -> None:
"""
Fix the godforsaken linkml dataclass reprs
See: https://github.com/linkml/linkml-runtime/pull/314
"""
import re
from pprint import pformat
from typing import Any
import textwrap
from dataclasses import is_dataclass, make_dataclass, field
from linkml_runtime.linkml_model import meta
from linkml_runtime.utils.formatutils import items
def _pformat(fields: dict, cls_name: str, indent: str = ' ') -> str:
"""
pretty format the fields of the items of a ``YAMLRoot`` object without the wonky indentation of pformat.
see ``YAMLRoot.__repr__``.
formatting is similar to black - items at similar levels of nesting have similar levels of indentation,
rather than getting placed at essentially random levels of indentation depending on what came before them.
"""
res = []
total_len = 0
for key, val in fields:
if val == [] or val == {} or val is None:
continue
# pformat handles everything else that isn't a YAMLRoot object, but it sure does look ugly
# use it to split lines and as the thing of last resort, but otherwise indent = 0, we'll do that
val_str = pformat(val, indent=0, compact=True, sort_dicts=False)
# now we indent everything except the first line by indenting and then using regex to remove just the first indent
val_str = re.sub(rf'\A{re.escape(indent)}', '', textwrap.indent(val_str, indent))
# now recombine with the key in a format that can be re-eval'd into an object if indent is just whitespace
val_str = f"'{key}': " + val_str
# count the total length of this string so we know if we need to linebreak or not later
total_len += len(val_str)
res.append(val_str)
if total_len > 80:
inside = ',\n'.join(res)
# we indent twice - once for the inner contents of every inner object, and one to
# offset from the root element. that keeps us from needing to be recursive except for the
# single pformat call
inside = textwrap.indent(inside, indent)
return cls_name + '({\n' + inside + '\n})'
else:
return cls_name + '({' + ', '.join(res) + '})'
def __repr__(self):
return _pformat(items(self), self.__class__.__name__)
for cls_name in dir(meta):
cls = getattr(meta, cls_name)
if is_dataclass(cls):
new_dataclass = make_dataclass(cls.__name__,fields=[('__dummy__', Any, field(default=None))], bases=(cls,), repr=False)
new_dataclass.__repr__ = __repr__
new_dataclass.__str__ = __repr__
setattr(meta, cls.__name__, new_dataclass)
def apply_patches() -> None: def apply_patches() -> None:
"""Apply all monkeypatches""" """Apply all monkeypatches"""
patch_npytyping_perf() patch_npytyping_perf()
patch_nptyping_warnings() patch_nptyping_warnings()
patch_schemaview() patch_schemaview()
patch_array_expression()
patch_pretty_print()

View file

@ -0,0 +1,17 @@
"""
Type annotations for NWB schema language types
"""
from typing import List, Union, TypeAlias
DIMS_LIST: TypeAlias = List[Union[str, None]]
"""A single-dimension dims specification"""
DIMS_TYPE: TypeAlias = Union[DIMS_LIST, List[DIMS_LIST]]
"""``dims`` in the nwb schema language"""
SHAPE_LIST: TypeAlias = List[Union[str, None]]
"""A single-dimension shape specification"""
SHAPE_TYPE: TypeAlias = Union[SHAPE_LIST, List[SHAPE_LIST]]
"""``shape`` in the nwb schema language"""

View file

@ -0,0 +1,73 @@
import pdb
import pytest
from typing import Tuple
from nwb_linkml.types.nwb import DIMS_TYPE, SHAPE_TYPE
from nwb_linkml.adapters.array import ArrayAdapter, Dimension, Shape
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 2]], [],
# id='multi shape inconsistent dims'),
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim2']], [[1], [1, 2], [1, 3]], [],
# id='multi shape inconsistent shape'),
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 3]], [],
# id='multi shape inconsistent both'),
@pytest.mark.parametrize(
"dims,shape,expected",
[
pytest.param(
["dim1", "dim2", "dim3"],
[1, 2, 3],
[
Shape(
[
Dimension(dims="dim1", shape=1),
Dimension(dims="dim2", shape=2),
Dimension(dims="dim3", shape=3),
]
)
],
id="single shape",
),
pytest.param(
[["dim1"], ["dim1", "dim2"], ["dim1", "dim2", "dim3"]],
[[1], [1, 2], [1, 2, 3]],
[
Shape(
[Dimension(dims="dim1", shape=1)],
),
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
Shape(
(
Dimension(dims="dim1", shape=1),
Dimension(dims="dim2", shape=2),
Dimension(dims="dim3", shape=3),
)
),
],
id="multi shape",
),
pytest.param(
["dim1", "dim2", "dim3"],
[[1], [1, 2], [1, 2, 3]],
[
Shape([Dimension(dims="dim1", shape=1)]),
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
Shape(
(
Dimension(dims="dim1", shape=1),
Dimension(dims="dim2", shape=2),
Dimension(dims="dim3", shape=3),
)
),
],
id="malformed abbreviated dims spec",
),
],
)
def test_pivot_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE, expected):
adapter = ArrayAdapter(dims, shape)
pivoted = adapter.pivot_dims()
assert pivoted == expected

View file

@ -36,7 +36,7 @@ def test_build_base(nwb_schema):
assert len(base.classes) == 1 assert len(base.classes) == 1
img = base.classes[0] img = base.classes[0]
assert len(img.attributes) == 4 assert len(img.attributes) == 4
assert img.attributes["newslot"] is slot assert img.attributes["newslot"] == slot
def test_get_attr_name(): def test_get_attr_name():