adjusting array adapters to linkml arrays

This commit is contained in:
sneakers-the-rat 2024-07-03 00:41:16 -07:00
parent 0606221ab0
commit 087064be48
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 302 additions and 15 deletions

View file

@ -3,6 +3,7 @@ Adapter classes for translating from NWB schema language to LinkML
"""
from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.array import ArrayAdapter
from nwb_linkml.adapters.classes import ClassAdapter
from nwb_linkml.adapters.dataset import DatasetAdapter
from nwb_linkml.adapters.group import GroupAdapter

View file

@ -0,0 +1,109 @@
"""
Generator for array ranges from nwb dims/ranges
"""
from itertools import zip_longest
from typing import Dict, List, Literal, Optional, Union, NamedTuple, TypeAlias
from linkml_runtime.linkml_model.meta import (
ClassDefinition,
SlotDefinition,
ArrayExpression,
DimensionExpression,
)
import warnings
from nwb_linkml.types.nwb import DIMS_LIST, DIMS_TYPE, SHAPE_LIST, SHAPE_TYPE
class Dimension(NamedTuple):
"""A single dimension/shape pair"""
dims: Optional[str] = None
shape: [Optional[int]] = None
class Shape(tuple[Dimension]):
"""
A collection of :class:`.Dimension` tuples representing one of the nested layers in
a dims/shape spec
"""
class ArrayAdapter:
"""
Adapter that generates a :class:`.ArrayExpression` (or set of them)
from a NWB dims/shape declaration
"""
def __init__(self, dims: DIMS_TYPE, shape: SHAPE_TYPE):
self.dims = dims
self.shape = shape
def pivot_dims(
self, dims: Optional[DIMS_TYPE] = None, shape: Optional[SHAPE_TYPE] = None
) -> List[Shape]:
"""
Pivot from a list of dims and a list of shape to a list of (dim, shape) tuples
"""
if dims is None:
dims = self.dims
if shape is None:
shape = self.shape
if len(dims) != len(shape):
warnings.warn(
f"dims ({len(dims)} and shape ({len(shape)}) are not the same length!!! "
"Your schema is formatted badly"
)
def _iter_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE) -> List[Shape] | Shape:
shapes = []
for inner_dim, inner_shape in zip(dims, shape):
if isinstance(inner_shape, list):
# list of lists
# some badly formatted schema will have shape be a LoL but only provide a single
# set of names at the top level. Best we can do is repeat it and pray
# that it is the same size as the longest dims
if not isinstance(inner_dim, list):
inner_dim = dims
shapes.append(_iter_dims(inner_dim, inner_shape))
else:
# single-layer list
shapes.append(Dimension(inner_dim, inner_shape))
if all([isinstance(x, Dimension) for x in shapes]):
shapes = Shape(shapes)
return shapes
shapes = _iter_dims(dims, shape)
if not all([isinstance(x, Shape) for x in shapes]):
# single-layered spec, wrap it
shapes = [shapes]
return shapes
def make_expression(self, shape: Shape) -> ArrayExpression:
"""
Create the corresponding array specification from a shape
"""
dims = [DimensionExpression(alias=dim.dims, exact_cardinality=dim.shape) for dim in shape]
return ArrayExpression(dimensions=dims)
def make(self) -> List[ArrayExpression]:
"""Create an array specification from self.dims and self.shape"""
shapes = self.pivot_dims()
expressions = [self.make_expression(shape) for shape in shapes]
return expressions
def make_slot(self) -> Union[Dict[Literal['array'], ArrayExpression], Dict[Literal['any_of'], Dict[Literal['array'],List[ArrayExpression]]]]:
"""
Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition,
taking into account needing to use ``any_of`` for multiple array range specifications.
"""
expressions = self.make()
if len(expressions) == 1:
return {'array': expressions[0]}
else:
return {'any_of': [{'array': expression} for expression in expressions]}

View file

@ -5,8 +5,14 @@ Adapter for NWB datasets to linkml Classes
from abc import abstractmethod
from typing import Optional
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
from linkml_runtime.linkml_model.meta import (
ClassDefinition,
SlotDefinition,
ArrayExpression,
DimensionExpression,
)
from nwb_linkml.adapters.array import ArrayAdapter
from nwb_linkml.adapters.adapter import BuildResult
from nwb_linkml.adapters.classes import ClassAdapter
from nwb_linkml.maps import QUANTITY_MAP, Map
@ -233,19 +239,20 @@ class MapArraylike(DatasetMap):
"""
Map to an array class and the adjoining slot
"""
array_class = make_arraylike(cls, name)
array_adapter = ArrayAdapter(cls.dims, cls.shape)
expressions = array_adapter.make_slot()
name = camel_to_snake(cls.name)
res = BuildResult(
slots=[
SlotDefinition(
name=name,
multivalued=False,
range=array_class.name,
range=ClassAdapter.handle_dtype(cls.dtype),
description=cls.doc,
required=cls.quantity not in ("*", "?"),
**expressions
)
],
classes=[array_class],
]
)
return res
@ -287,12 +294,11 @@ class MapArrayLikeAttributes(DatasetMap):
"""
Map to an arraylike class
"""
array_class = make_arraylike(cls, name)
array_adapter = ArrayAdapter(cls.dims, cls.shape)
expressions = array_adapter.make_slot()
# make a slot for the arraylike class
array_slot = SlotDefinition(name="array", range=array_class.name)
res.classes.append(array_class)
res.classes[0].attributes.update({"array": array_slot})
array_slot = SlotDefinition(name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions)
res.classes[0].attributes.update({'array':array_slot})
return res
@ -405,7 +411,7 @@ class DatasetAdapter(ClassAdapter):
return res
def make_arraylike(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
def make_array_range(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
"""
Create a containing arraylike class

View file

@ -2,7 +2,7 @@
I don't know if NWB necessarily has a term for a single nwb schema file, so we're going
to call them "schema" objects
"""
import pdb
from pathlib import Path
from typing import List, Optional, Type
@ -74,9 +74,15 @@ class SchemaAdapter(Adapter):
"""
res = BuildResult()
for dset in self.datasets:
res += DatasetAdapter(cls=dset).build()
new_res = DatasetAdapter(cls=dset).build()
if len(new_res.slots)>0:
pdb.set_trace()
res += new_res
for group in self.groups:
res += GroupAdapter(cls=group).build()
new_res = GroupAdapter(cls=group).build()
if len(new_res.slots)>0:
pdb.set_trace()
res += new_res
if (
len(res.slots) > 0

View file

@ -115,9 +115,84 @@ def patch_schemaview() -> None:
SchemaView.imports_closure = imports_closure
def patch_array_expression() -> None:
"""
Allow SlotDefinitions to use `any_of` with `array`
see: https://github.com/linkml/linkml-model/issues/199
"""
from dataclasses import make_dataclass, field
from linkml_runtime.linkml_model import meta
from typing import Optional
new_dataclass = make_dataclass('AnonymousSlotExpression', fields=[('array', Optional[meta.ArrayExpression], field(default=None))], bases=(meta.AnonymousSlotExpression,))
meta.AnonymousSlotExpression = new_dataclass
def patch_pretty_print() -> None:
"""
Fix the godforsaken linkml dataclass reprs
See: https://github.com/linkml/linkml-runtime/pull/314
"""
import re
from pprint import pformat
from typing import Any
import textwrap
from dataclasses import is_dataclass, make_dataclass, field
from linkml_runtime.linkml_model import meta
from linkml_runtime.utils.formatutils import items
def _pformat(fields: dict, cls_name: str, indent: str = ' ') -> str:
"""
pretty format the fields of the items of a ``YAMLRoot`` object without the wonky indentation of pformat.
see ``YAMLRoot.__repr__``.
formatting is similar to black - items at similar levels of nesting have similar levels of indentation,
rather than getting placed at essentially random levels of indentation depending on what came before them.
"""
res = []
total_len = 0
for key, val in fields:
if val == [] or val == {} or val is None:
continue
# pformat handles everything else that isn't a YAMLRoot object, but it sure does look ugly
# use it to split lines and as the thing of last resort, but otherwise indent = 0, we'll do that
val_str = pformat(val, indent=0, compact=True, sort_dicts=False)
# now we indent everything except the first line by indenting and then using regex to remove just the first indent
val_str = re.sub(rf'\A{re.escape(indent)}', '', textwrap.indent(val_str, indent))
# now recombine with the key in a format that can be re-eval'd into an object if indent is just whitespace
val_str = f"'{key}': " + val_str
# count the total length of this string so we know if we need to linebreak or not later
total_len += len(val_str)
res.append(val_str)
if total_len > 80:
inside = ',\n'.join(res)
# we indent twice - once for the inner contents of every inner object, and one to
# offset from the root element. that keeps us from needing to be recursive except for the
# single pformat call
inside = textwrap.indent(inside, indent)
return cls_name + '({\n' + inside + '\n})'
else:
return cls_name + '({' + ', '.join(res) + '})'
def __repr__(self):
return _pformat(items(self), self.__class__.__name__)
for cls_name in dir(meta):
cls = getattr(meta, cls_name)
if is_dataclass(cls):
new_dataclass = make_dataclass(cls.__name__,fields=[('__dummy__', Any, field(default=None))], bases=(cls,), repr=False)
new_dataclass.__repr__ = __repr__
new_dataclass.__str__ = __repr__
setattr(meta, cls.__name__, new_dataclass)
def apply_patches() -> None:
"""Apply all monkeypatches"""
patch_npytyping_perf()
patch_nptyping_warnings()
patch_schemaview()
patch_array_expression()
patch_pretty_print()

View file

@ -0,0 +1,17 @@
"""
Type annotations for NWB schema language types
"""
from typing import List, Union, TypeAlias
DIMS_LIST: TypeAlias = List[Union[str, None]]
"""A single-dimension dims specification"""
DIMS_TYPE: TypeAlias = Union[DIMS_LIST, List[DIMS_LIST]]
"""``dims`` in the nwb schema language"""
SHAPE_LIST: TypeAlias = List[Union[str, None]]
"""A single-dimension shape specification"""
SHAPE_TYPE: TypeAlias = Union[SHAPE_LIST, List[SHAPE_LIST]]
"""``shape`` in the nwb schema language"""

View file

@ -0,0 +1,73 @@
import pdb
import pytest
from typing import Tuple
from nwb_linkml.types.nwb import DIMS_TYPE, SHAPE_TYPE
from nwb_linkml.adapters.array import ArrayAdapter, Dimension, Shape
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 2]], [],
# id='multi shape inconsistent dims'),
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim2']], [[1], [1, 2], [1, 3]], [],
# id='multi shape inconsistent shape'),
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 3]], [],
# id='multi shape inconsistent both'),
@pytest.mark.parametrize(
"dims,shape,expected",
[
pytest.param(
["dim1", "dim2", "dim3"],
[1, 2, 3],
[
Shape(
[
Dimension(dims="dim1", shape=1),
Dimension(dims="dim2", shape=2),
Dimension(dims="dim3", shape=3),
]
)
],
id="single shape",
),
pytest.param(
[["dim1"], ["dim1", "dim2"], ["dim1", "dim2", "dim3"]],
[[1], [1, 2], [1, 2, 3]],
[
Shape(
[Dimension(dims="dim1", shape=1)],
),
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
Shape(
(
Dimension(dims="dim1", shape=1),
Dimension(dims="dim2", shape=2),
Dimension(dims="dim3", shape=3),
)
),
],
id="multi shape",
),
pytest.param(
["dim1", "dim2", "dim3"],
[[1], [1, 2], [1, 2, 3]],
[
Shape([Dimension(dims="dim1", shape=1)]),
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
Shape(
(
Dimension(dims="dim1", shape=1),
Dimension(dims="dim2", shape=2),
Dimension(dims="dim3", shape=3),
)
),
],
id="malformed abbreviated dims spec",
),
],
)
def test_pivot_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE, expected):
adapter = ArrayAdapter(dims, shape)
pivoted = adapter.pivot_dims()
assert pivoted == expected

View file

@ -36,7 +36,7 @@ def test_build_base(nwb_schema):
assert len(base.classes) == 1
img = base.classes[0]
assert len(img.attributes) == 4
assert img.attributes["newslot"] is slot
assert img.attributes["newslot"] == slot
def test_get_attr_name():