mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 06:04:28 +00:00
adjusting array adapters to linkml arrays
This commit is contained in:
parent
0606221ab0
commit
087064be48
8 changed files with 302 additions and 15 deletions
|
@ -3,6 +3,7 @@ Adapter classes for translating from NWB schema language to LinkML
|
|||
"""
|
||||
|
||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||
from nwb_linkml.adapters.array import ArrayAdapter
|
||||
from nwb_linkml.adapters.classes import ClassAdapter
|
||||
from nwb_linkml.adapters.dataset import DatasetAdapter
|
||||
from nwb_linkml.adapters.group import GroupAdapter
|
||||
|
|
109
nwb_linkml/src/nwb_linkml/adapters/array.py
Normal file
109
nwb_linkml/src/nwb_linkml/adapters/array.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
"""
|
||||
Generator for array ranges from nwb dims/ranges
|
||||
"""
|
||||
|
||||
from itertools import zip_longest
|
||||
from typing import Dict, List, Literal, Optional, Union, NamedTuple, TypeAlias
|
||||
from linkml_runtime.linkml_model.meta import (
|
||||
ClassDefinition,
|
||||
SlotDefinition,
|
||||
ArrayExpression,
|
||||
DimensionExpression,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from nwb_linkml.types.nwb import DIMS_LIST, DIMS_TYPE, SHAPE_LIST, SHAPE_TYPE
|
||||
|
||||
|
||||
class Dimension(NamedTuple):
|
||||
"""A single dimension/shape pair"""
|
||||
|
||||
dims: Optional[str] = None
|
||||
shape: [Optional[int]] = None
|
||||
|
||||
|
||||
class Shape(tuple[Dimension]):
|
||||
"""
|
||||
A collection of :class:`.Dimension` tuples representing one of the nested layers in
|
||||
a dims/shape spec
|
||||
"""
|
||||
|
||||
|
||||
class ArrayAdapter:
|
||||
"""
|
||||
Adapter that generates a :class:`.ArrayExpression` (or set of them)
|
||||
from a NWB dims/shape declaration
|
||||
"""
|
||||
|
||||
def __init__(self, dims: DIMS_TYPE, shape: SHAPE_TYPE):
|
||||
self.dims = dims
|
||||
self.shape = shape
|
||||
|
||||
def pivot_dims(
|
||||
self, dims: Optional[DIMS_TYPE] = None, shape: Optional[SHAPE_TYPE] = None
|
||||
) -> List[Shape]:
|
||||
"""
|
||||
Pivot from a list of dims and a list of shape to a list of (dim, shape) tuples
|
||||
"""
|
||||
if dims is None:
|
||||
dims = self.dims
|
||||
if shape is None:
|
||||
shape = self.shape
|
||||
|
||||
if len(dims) != len(shape):
|
||||
warnings.warn(
|
||||
f"dims ({len(dims)} and shape ({len(shape)}) are not the same length!!! "
|
||||
"Your schema is formatted badly"
|
||||
)
|
||||
|
||||
def _iter_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE) -> List[Shape] | Shape:
|
||||
shapes = []
|
||||
for inner_dim, inner_shape in zip(dims, shape):
|
||||
if isinstance(inner_shape, list):
|
||||
# list of lists
|
||||
|
||||
# some badly formatted schema will have shape be a LoL but only provide a single
|
||||
# set of names at the top level. Best we can do is repeat it and pray
|
||||
# that it is the same size as the longest dims
|
||||
if not isinstance(inner_dim, list):
|
||||
inner_dim = dims
|
||||
|
||||
shapes.append(_iter_dims(inner_dim, inner_shape))
|
||||
else:
|
||||
# single-layer list
|
||||
shapes.append(Dimension(inner_dim, inner_shape))
|
||||
if all([isinstance(x, Dimension) for x in shapes]):
|
||||
shapes = Shape(shapes)
|
||||
return shapes
|
||||
|
||||
shapes = _iter_dims(dims, shape)
|
||||
|
||||
if not all([isinstance(x, Shape) for x in shapes]):
|
||||
# single-layered spec, wrap it
|
||||
shapes = [shapes]
|
||||
|
||||
return shapes
|
||||
|
||||
def make_expression(self, shape: Shape) -> ArrayExpression:
|
||||
"""
|
||||
Create the corresponding array specification from a shape
|
||||
"""
|
||||
dims = [DimensionExpression(alias=dim.dims, exact_cardinality=dim.shape) for dim in shape]
|
||||
return ArrayExpression(dimensions=dims)
|
||||
|
||||
def make(self) -> List[ArrayExpression]:
|
||||
"""Create an array specification from self.dims and self.shape"""
|
||||
shapes = self.pivot_dims()
|
||||
expressions = [self.make_expression(shape) for shape in shapes]
|
||||
return expressions
|
||||
|
||||
def make_slot(self) -> Union[Dict[Literal['array'], ArrayExpression], Dict[Literal['any_of'], Dict[Literal['array'],List[ArrayExpression]]]]:
|
||||
"""
|
||||
Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition,
|
||||
taking into account needing to use ``any_of`` for multiple array range specifications.
|
||||
"""
|
||||
expressions = self.make()
|
||||
if len(expressions) == 1:
|
||||
return {'array': expressions[0]}
|
||||
else:
|
||||
return {'any_of': [{'array': expression} for expression in expressions]}
|
|
@ -5,8 +5,14 @@ Adapter for NWB datasets to linkml Classes
|
|||
from abc import abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
||||
from linkml_runtime.linkml_model.meta import (
|
||||
ClassDefinition,
|
||||
SlotDefinition,
|
||||
ArrayExpression,
|
||||
DimensionExpression,
|
||||
)
|
||||
|
||||
from nwb_linkml.adapters.array import ArrayAdapter
|
||||
from nwb_linkml.adapters.adapter import BuildResult
|
||||
from nwb_linkml.adapters.classes import ClassAdapter
|
||||
from nwb_linkml.maps import QUANTITY_MAP, Map
|
||||
|
@ -233,19 +239,20 @@ class MapArraylike(DatasetMap):
|
|||
"""
|
||||
Map to an array class and the adjoining slot
|
||||
"""
|
||||
array_class = make_arraylike(cls, name)
|
||||
array_adapter = ArrayAdapter(cls.dims, cls.shape)
|
||||
expressions = array_adapter.make_slot()
|
||||
name = camel_to_snake(cls.name)
|
||||
res = BuildResult(
|
||||
slots=[
|
||||
SlotDefinition(
|
||||
name=name,
|
||||
multivalued=False,
|
||||
range=array_class.name,
|
||||
range=ClassAdapter.handle_dtype(cls.dtype),
|
||||
description=cls.doc,
|
||||
required=cls.quantity not in ("*", "?"),
|
||||
**expressions
|
||||
)
|
||||
],
|
||||
classes=[array_class],
|
||||
]
|
||||
)
|
||||
return res
|
||||
|
||||
|
@ -287,12 +294,11 @@ class MapArrayLikeAttributes(DatasetMap):
|
|||
"""
|
||||
Map to an arraylike class
|
||||
"""
|
||||
array_class = make_arraylike(cls, name)
|
||||
array_adapter = ArrayAdapter(cls.dims, cls.shape)
|
||||
expressions = array_adapter.make_slot()
|
||||
# make a slot for the arraylike class
|
||||
array_slot = SlotDefinition(name="array", range=array_class.name)
|
||||
|
||||
res.classes.append(array_class)
|
||||
res.classes[0].attributes.update({"array": array_slot})
|
||||
array_slot = SlotDefinition(name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions)
|
||||
res.classes[0].attributes.update({'array':array_slot})
|
||||
return res
|
||||
|
||||
|
||||
|
@ -405,7 +411,7 @@ class DatasetAdapter(ClassAdapter):
|
|||
return res
|
||||
|
||||
|
||||
def make_arraylike(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
|
||||
def make_array_range(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
|
||||
"""
|
||||
Create a containing arraylike class
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
I don't know if NWB necessarily has a term for a single nwb schema file, so we're going
|
||||
to call them "schema" objects
|
||||
"""
|
||||
|
||||
import pdb
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Type
|
||||
|
||||
|
@ -74,9 +74,15 @@ class SchemaAdapter(Adapter):
|
|||
"""
|
||||
res = BuildResult()
|
||||
for dset in self.datasets:
|
||||
res += DatasetAdapter(cls=dset).build()
|
||||
new_res = DatasetAdapter(cls=dset).build()
|
||||
if len(new_res.slots)>0:
|
||||
pdb.set_trace()
|
||||
res += new_res
|
||||
for group in self.groups:
|
||||
res += GroupAdapter(cls=group).build()
|
||||
new_res = GroupAdapter(cls=group).build()
|
||||
if len(new_res.slots)>0:
|
||||
pdb.set_trace()
|
||||
res += new_res
|
||||
|
||||
if (
|
||||
len(res.slots) > 0
|
||||
|
|
|
@ -115,9 +115,84 @@ def patch_schemaview() -> None:
|
|||
|
||||
SchemaView.imports_closure = imports_closure
|
||||
|
||||
def patch_array_expression() -> None:
|
||||
"""
|
||||
Allow SlotDefinitions to use `any_of` with `array`
|
||||
|
||||
see: https://github.com/linkml/linkml-model/issues/199
|
||||
"""
|
||||
from dataclasses import make_dataclass, field
|
||||
from linkml_runtime.linkml_model import meta
|
||||
from typing import Optional
|
||||
new_dataclass = make_dataclass('AnonymousSlotExpression', fields=[('array', Optional[meta.ArrayExpression], field(default=None))], bases=(meta.AnonymousSlotExpression,))
|
||||
meta.AnonymousSlotExpression = new_dataclass
|
||||
|
||||
def patch_pretty_print() -> None:
|
||||
"""
|
||||
Fix the godforsaken linkml dataclass reprs
|
||||
|
||||
See: https://github.com/linkml/linkml-runtime/pull/314
|
||||
"""
|
||||
import re
|
||||
from pprint import pformat
|
||||
from typing import Any
|
||||
import textwrap
|
||||
from dataclasses import is_dataclass, make_dataclass, field
|
||||
from linkml_runtime.linkml_model import meta
|
||||
from linkml_runtime.utils.formatutils import items
|
||||
|
||||
def _pformat(fields: dict, cls_name: str, indent: str = ' ') -> str:
|
||||
"""
|
||||
pretty format the fields of the items of a ``YAMLRoot`` object without the wonky indentation of pformat.
|
||||
see ``YAMLRoot.__repr__``.
|
||||
formatting is similar to black - items at similar levels of nesting have similar levels of indentation,
|
||||
rather than getting placed at essentially random levels of indentation depending on what came before them.
|
||||
"""
|
||||
res = []
|
||||
total_len = 0
|
||||
for key, val in fields:
|
||||
if val == [] or val == {} or val is None:
|
||||
continue
|
||||
# pformat handles everything else that isn't a YAMLRoot object, but it sure does look ugly
|
||||
# use it to split lines and as the thing of last resort, but otherwise indent = 0, we'll do that
|
||||
val_str = pformat(val, indent=0, compact=True, sort_dicts=False)
|
||||
# now we indent everything except the first line by indenting and then using regex to remove just the first indent
|
||||
val_str = re.sub(rf'\A{re.escape(indent)}', '', textwrap.indent(val_str, indent))
|
||||
# now recombine with the key in a format that can be re-eval'd into an object if indent is just whitespace
|
||||
val_str = f"'{key}': " + val_str
|
||||
|
||||
# count the total length of this string so we know if we need to linebreak or not later
|
||||
total_len += len(val_str)
|
||||
res.append(val_str)
|
||||
|
||||
if total_len > 80:
|
||||
inside = ',\n'.join(res)
|
||||
# we indent twice - once for the inner contents of every inner object, and one to
|
||||
# offset from the root element. that keeps us from needing to be recursive except for the
|
||||
# single pformat call
|
||||
inside = textwrap.indent(inside, indent)
|
||||
return cls_name + '({\n' + inside + '\n})'
|
||||
else:
|
||||
return cls_name + '({' + ', '.join(res) + '})'
|
||||
|
||||
def __repr__(self):
|
||||
return _pformat(items(self), self.__class__.__name__)
|
||||
|
||||
for cls_name in dir(meta):
|
||||
cls = getattr(meta, cls_name)
|
||||
if is_dataclass(cls):
|
||||
new_dataclass = make_dataclass(cls.__name__,fields=[('__dummy__', Any, field(default=None))], bases=(cls,), repr=False)
|
||||
new_dataclass.__repr__ = __repr__
|
||||
new_dataclass.__str__ = __repr__
|
||||
setattr(meta, cls.__name__, new_dataclass)
|
||||
|
||||
|
||||
|
||||
|
||||
def apply_patches() -> None:
|
||||
"""Apply all monkeypatches"""
|
||||
patch_npytyping_perf()
|
||||
patch_nptyping_warnings()
|
||||
patch_schemaview()
|
||||
patch_array_expression()
|
||||
patch_pretty_print()
|
||||
|
|
17
nwb_linkml/src/nwb_linkml/types/nwb.py
Normal file
17
nwb_linkml/src/nwb_linkml/types/nwb.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
"""
|
||||
Type annotations for NWB schema language types
|
||||
"""
|
||||
|
||||
from typing import List, Union, TypeAlias
|
||||
|
||||
DIMS_LIST: TypeAlias = List[Union[str, None]]
|
||||
"""A single-dimension dims specification"""
|
||||
|
||||
DIMS_TYPE: TypeAlias = Union[DIMS_LIST, List[DIMS_LIST]]
|
||||
"""``dims`` in the nwb schema language"""
|
||||
|
||||
SHAPE_LIST: TypeAlias = List[Union[str, None]]
|
||||
"""A single-dimension shape specification"""
|
||||
|
||||
SHAPE_TYPE: TypeAlias = Union[SHAPE_LIST, List[SHAPE_LIST]]
|
||||
"""``shape`` in the nwb schema language"""
|
73
nwb_linkml/tests/test_adapters/test_adapter_array.py
Normal file
73
nwb_linkml/tests/test_adapters/test_adapter_array.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
import pdb
|
||||
|
||||
import pytest
|
||||
|
||||
from typing import Tuple
|
||||
from nwb_linkml.types.nwb import DIMS_TYPE, SHAPE_TYPE
|
||||
from nwb_linkml.adapters.array import ArrayAdapter, Dimension, Shape
|
||||
|
||||
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 2]], [],
|
||||
# id='multi shape inconsistent dims'),
|
||||
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim2']], [[1], [1, 2], [1, 3]], [],
|
||||
# id='multi shape inconsistent shape'),
|
||||
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 3]], [],
|
||||
# id='multi shape inconsistent both'),
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dims,shape,expected",
|
||||
[
|
||||
pytest.param(
|
||||
["dim1", "dim2", "dim3"],
|
||||
[1, 2, 3],
|
||||
[
|
||||
Shape(
|
||||
[
|
||||
Dimension(dims="dim1", shape=1),
|
||||
Dimension(dims="dim2", shape=2),
|
||||
Dimension(dims="dim3", shape=3),
|
||||
]
|
||||
)
|
||||
],
|
||||
id="single shape",
|
||||
),
|
||||
pytest.param(
|
||||
[["dim1"], ["dim1", "dim2"], ["dim1", "dim2", "dim3"]],
|
||||
[[1], [1, 2], [1, 2, 3]],
|
||||
[
|
||||
Shape(
|
||||
[Dimension(dims="dim1", shape=1)],
|
||||
),
|
||||
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
|
||||
Shape(
|
||||
(
|
||||
Dimension(dims="dim1", shape=1),
|
||||
Dimension(dims="dim2", shape=2),
|
||||
Dimension(dims="dim3", shape=3),
|
||||
)
|
||||
),
|
||||
],
|
||||
id="multi shape",
|
||||
),
|
||||
pytest.param(
|
||||
["dim1", "dim2", "dim3"],
|
||||
[[1], [1, 2], [1, 2, 3]],
|
||||
[
|
||||
Shape([Dimension(dims="dim1", shape=1)]),
|
||||
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
|
||||
Shape(
|
||||
(
|
||||
Dimension(dims="dim1", shape=1),
|
||||
Dimension(dims="dim2", shape=2),
|
||||
Dimension(dims="dim3", shape=3),
|
||||
)
|
||||
),
|
||||
],
|
||||
id="malformed abbreviated dims spec",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_pivot_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE, expected):
|
||||
adapter = ArrayAdapter(dims, shape)
|
||||
pivoted = adapter.pivot_dims()
|
||||
assert pivoted == expected
|
|
@ -36,7 +36,7 @@ def test_build_base(nwb_schema):
|
|||
assert len(base.classes) == 1
|
||||
img = base.classes[0]
|
||||
assert len(img.attributes) == 4
|
||||
assert img.attributes["newslot"] is slot
|
||||
assert img.attributes["newslot"] == slot
|
||||
|
||||
|
||||
def test_get_attr_name():
|
||||
|
|
Loading…
Reference in a new issue