mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 14:14:27 +00:00
adjusting array adapters to linkml arrays
This commit is contained in:
parent
0606221ab0
commit
087064be48
8 changed files with 302 additions and 15 deletions
|
@ -3,6 +3,7 @@ Adapter classes for translating from NWB schema language to LinkML
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||||
|
from nwb_linkml.adapters.array import ArrayAdapter
|
||||||
from nwb_linkml.adapters.classes import ClassAdapter
|
from nwb_linkml.adapters.classes import ClassAdapter
|
||||||
from nwb_linkml.adapters.dataset import DatasetAdapter
|
from nwb_linkml.adapters.dataset import DatasetAdapter
|
||||||
from nwb_linkml.adapters.group import GroupAdapter
|
from nwb_linkml.adapters.group import GroupAdapter
|
||||||
|
|
109
nwb_linkml/src/nwb_linkml/adapters/array.py
Normal file
109
nwb_linkml/src/nwb_linkml/adapters/array.py
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
"""
|
||||||
|
Generator for array ranges from nwb dims/ranges
|
||||||
|
"""
|
||||||
|
|
||||||
|
from itertools import zip_longest
|
||||||
|
from typing import Dict, List, Literal, Optional, Union, NamedTuple, TypeAlias
|
||||||
|
from linkml_runtime.linkml_model.meta import (
|
||||||
|
ClassDefinition,
|
||||||
|
SlotDefinition,
|
||||||
|
ArrayExpression,
|
||||||
|
DimensionExpression,
|
||||||
|
)
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from nwb_linkml.types.nwb import DIMS_LIST, DIMS_TYPE, SHAPE_LIST, SHAPE_TYPE
|
||||||
|
|
||||||
|
|
||||||
|
class Dimension(NamedTuple):
|
||||||
|
"""A single dimension/shape pair"""
|
||||||
|
|
||||||
|
dims: Optional[str] = None
|
||||||
|
shape: [Optional[int]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class Shape(tuple[Dimension]):
|
||||||
|
"""
|
||||||
|
A collection of :class:`.Dimension` tuples representing one of the nested layers in
|
||||||
|
a dims/shape spec
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ArrayAdapter:
|
||||||
|
"""
|
||||||
|
Adapter that generates a :class:`.ArrayExpression` (or set of them)
|
||||||
|
from a NWB dims/shape declaration
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, dims: DIMS_TYPE, shape: SHAPE_TYPE):
|
||||||
|
self.dims = dims
|
||||||
|
self.shape = shape
|
||||||
|
|
||||||
|
def pivot_dims(
|
||||||
|
self, dims: Optional[DIMS_TYPE] = None, shape: Optional[SHAPE_TYPE] = None
|
||||||
|
) -> List[Shape]:
|
||||||
|
"""
|
||||||
|
Pivot from a list of dims and a list of shape to a list of (dim, shape) tuples
|
||||||
|
"""
|
||||||
|
if dims is None:
|
||||||
|
dims = self.dims
|
||||||
|
if shape is None:
|
||||||
|
shape = self.shape
|
||||||
|
|
||||||
|
if len(dims) != len(shape):
|
||||||
|
warnings.warn(
|
||||||
|
f"dims ({len(dims)} and shape ({len(shape)}) are not the same length!!! "
|
||||||
|
"Your schema is formatted badly"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _iter_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE) -> List[Shape] | Shape:
|
||||||
|
shapes = []
|
||||||
|
for inner_dim, inner_shape in zip(dims, shape):
|
||||||
|
if isinstance(inner_shape, list):
|
||||||
|
# list of lists
|
||||||
|
|
||||||
|
# some badly formatted schema will have shape be a LoL but only provide a single
|
||||||
|
# set of names at the top level. Best we can do is repeat it and pray
|
||||||
|
# that it is the same size as the longest dims
|
||||||
|
if not isinstance(inner_dim, list):
|
||||||
|
inner_dim = dims
|
||||||
|
|
||||||
|
shapes.append(_iter_dims(inner_dim, inner_shape))
|
||||||
|
else:
|
||||||
|
# single-layer list
|
||||||
|
shapes.append(Dimension(inner_dim, inner_shape))
|
||||||
|
if all([isinstance(x, Dimension) for x in shapes]):
|
||||||
|
shapes = Shape(shapes)
|
||||||
|
return shapes
|
||||||
|
|
||||||
|
shapes = _iter_dims(dims, shape)
|
||||||
|
|
||||||
|
if not all([isinstance(x, Shape) for x in shapes]):
|
||||||
|
# single-layered spec, wrap it
|
||||||
|
shapes = [shapes]
|
||||||
|
|
||||||
|
return shapes
|
||||||
|
|
||||||
|
def make_expression(self, shape: Shape) -> ArrayExpression:
|
||||||
|
"""
|
||||||
|
Create the corresponding array specification from a shape
|
||||||
|
"""
|
||||||
|
dims = [DimensionExpression(alias=dim.dims, exact_cardinality=dim.shape) for dim in shape]
|
||||||
|
return ArrayExpression(dimensions=dims)
|
||||||
|
|
||||||
|
def make(self) -> List[ArrayExpression]:
|
||||||
|
"""Create an array specification from self.dims and self.shape"""
|
||||||
|
shapes = self.pivot_dims()
|
||||||
|
expressions = [self.make_expression(shape) for shape in shapes]
|
||||||
|
return expressions
|
||||||
|
|
||||||
|
def make_slot(self) -> Union[Dict[Literal['array'], ArrayExpression], Dict[Literal['any_of'], Dict[Literal['array'],List[ArrayExpression]]]]:
|
||||||
|
"""
|
||||||
|
Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition,
|
||||||
|
taking into account needing to use ``any_of`` for multiple array range specifications.
|
||||||
|
"""
|
||||||
|
expressions = self.make()
|
||||||
|
if len(expressions) == 1:
|
||||||
|
return {'array': expressions[0]}
|
||||||
|
else:
|
||||||
|
return {'any_of': [{'array': expression} for expression in expressions]}
|
|
@ -5,8 +5,14 @@ Adapter for NWB datasets to linkml Classes
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
from linkml_runtime.linkml_model.meta import (
|
||||||
|
ClassDefinition,
|
||||||
|
SlotDefinition,
|
||||||
|
ArrayExpression,
|
||||||
|
DimensionExpression,
|
||||||
|
)
|
||||||
|
|
||||||
|
from nwb_linkml.adapters.array import ArrayAdapter
|
||||||
from nwb_linkml.adapters.adapter import BuildResult
|
from nwb_linkml.adapters.adapter import BuildResult
|
||||||
from nwb_linkml.adapters.classes import ClassAdapter
|
from nwb_linkml.adapters.classes import ClassAdapter
|
||||||
from nwb_linkml.maps import QUANTITY_MAP, Map
|
from nwb_linkml.maps import QUANTITY_MAP, Map
|
||||||
|
@ -233,19 +239,20 @@ class MapArraylike(DatasetMap):
|
||||||
"""
|
"""
|
||||||
Map to an array class and the adjoining slot
|
Map to an array class and the adjoining slot
|
||||||
"""
|
"""
|
||||||
array_class = make_arraylike(cls, name)
|
array_adapter = ArrayAdapter(cls.dims, cls.shape)
|
||||||
|
expressions = array_adapter.make_slot()
|
||||||
name = camel_to_snake(cls.name)
|
name = camel_to_snake(cls.name)
|
||||||
res = BuildResult(
|
res = BuildResult(
|
||||||
slots=[
|
slots=[
|
||||||
SlotDefinition(
|
SlotDefinition(
|
||||||
name=name,
|
name=name,
|
||||||
multivalued=False,
|
multivalued=False,
|
||||||
range=array_class.name,
|
range=ClassAdapter.handle_dtype(cls.dtype),
|
||||||
description=cls.doc,
|
description=cls.doc,
|
||||||
required=cls.quantity not in ("*", "?"),
|
required=cls.quantity not in ("*", "?"),
|
||||||
|
**expressions
|
||||||
)
|
)
|
||||||
],
|
]
|
||||||
classes=[array_class],
|
|
||||||
)
|
)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@ -287,12 +294,11 @@ class MapArrayLikeAttributes(DatasetMap):
|
||||||
"""
|
"""
|
||||||
Map to an arraylike class
|
Map to an arraylike class
|
||||||
"""
|
"""
|
||||||
array_class = make_arraylike(cls, name)
|
array_adapter = ArrayAdapter(cls.dims, cls.shape)
|
||||||
|
expressions = array_adapter.make_slot()
|
||||||
# make a slot for the arraylike class
|
# make a slot for the arraylike class
|
||||||
array_slot = SlotDefinition(name="array", range=array_class.name)
|
array_slot = SlotDefinition(name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions)
|
||||||
|
res.classes[0].attributes.update({'array':array_slot})
|
||||||
res.classes.append(array_class)
|
|
||||||
res.classes[0].attributes.update({"array": array_slot})
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@ -405,7 +411,7 @@ class DatasetAdapter(ClassAdapter):
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def make_arraylike(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
|
def make_array_range(cls: Dataset, name: Optional[str] = None) -> ClassDefinition:
|
||||||
"""
|
"""
|
||||||
Create a containing arraylike class
|
Create a containing arraylike class
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
I don't know if NWB necessarily has a term for a single nwb schema file, so we're going
|
I don't know if NWB necessarily has a term for a single nwb schema file, so we're going
|
||||||
to call them "schema" objects
|
to call them "schema" objects
|
||||||
"""
|
"""
|
||||||
|
import pdb
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Type
|
from typing import List, Optional, Type
|
||||||
|
|
||||||
|
@ -74,9 +74,15 @@ class SchemaAdapter(Adapter):
|
||||||
"""
|
"""
|
||||||
res = BuildResult()
|
res = BuildResult()
|
||||||
for dset in self.datasets:
|
for dset in self.datasets:
|
||||||
res += DatasetAdapter(cls=dset).build()
|
new_res = DatasetAdapter(cls=dset).build()
|
||||||
|
if len(new_res.slots)>0:
|
||||||
|
pdb.set_trace()
|
||||||
|
res += new_res
|
||||||
for group in self.groups:
|
for group in self.groups:
|
||||||
res += GroupAdapter(cls=group).build()
|
new_res = GroupAdapter(cls=group).build()
|
||||||
|
if len(new_res.slots)>0:
|
||||||
|
pdb.set_trace()
|
||||||
|
res += new_res
|
||||||
|
|
||||||
if (
|
if (
|
||||||
len(res.slots) > 0
|
len(res.slots) > 0
|
||||||
|
|
|
@ -115,9 +115,84 @@ def patch_schemaview() -> None:
|
||||||
|
|
||||||
SchemaView.imports_closure = imports_closure
|
SchemaView.imports_closure = imports_closure
|
||||||
|
|
||||||
|
def patch_array_expression() -> None:
|
||||||
|
"""
|
||||||
|
Allow SlotDefinitions to use `any_of` with `array`
|
||||||
|
|
||||||
|
see: https://github.com/linkml/linkml-model/issues/199
|
||||||
|
"""
|
||||||
|
from dataclasses import make_dataclass, field
|
||||||
|
from linkml_runtime.linkml_model import meta
|
||||||
|
from typing import Optional
|
||||||
|
new_dataclass = make_dataclass('AnonymousSlotExpression', fields=[('array', Optional[meta.ArrayExpression], field(default=None))], bases=(meta.AnonymousSlotExpression,))
|
||||||
|
meta.AnonymousSlotExpression = new_dataclass
|
||||||
|
|
||||||
|
def patch_pretty_print() -> None:
|
||||||
|
"""
|
||||||
|
Fix the godforsaken linkml dataclass reprs
|
||||||
|
|
||||||
|
See: https://github.com/linkml/linkml-runtime/pull/314
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from pprint import pformat
|
||||||
|
from typing import Any
|
||||||
|
import textwrap
|
||||||
|
from dataclasses import is_dataclass, make_dataclass, field
|
||||||
|
from linkml_runtime.linkml_model import meta
|
||||||
|
from linkml_runtime.utils.formatutils import items
|
||||||
|
|
||||||
|
def _pformat(fields: dict, cls_name: str, indent: str = ' ') -> str:
|
||||||
|
"""
|
||||||
|
pretty format the fields of the items of a ``YAMLRoot`` object without the wonky indentation of pformat.
|
||||||
|
see ``YAMLRoot.__repr__``.
|
||||||
|
formatting is similar to black - items at similar levels of nesting have similar levels of indentation,
|
||||||
|
rather than getting placed at essentially random levels of indentation depending on what came before them.
|
||||||
|
"""
|
||||||
|
res = []
|
||||||
|
total_len = 0
|
||||||
|
for key, val in fields:
|
||||||
|
if val == [] or val == {} or val is None:
|
||||||
|
continue
|
||||||
|
# pformat handles everything else that isn't a YAMLRoot object, but it sure does look ugly
|
||||||
|
# use it to split lines and as the thing of last resort, but otherwise indent = 0, we'll do that
|
||||||
|
val_str = pformat(val, indent=0, compact=True, sort_dicts=False)
|
||||||
|
# now we indent everything except the first line by indenting and then using regex to remove just the first indent
|
||||||
|
val_str = re.sub(rf'\A{re.escape(indent)}', '', textwrap.indent(val_str, indent))
|
||||||
|
# now recombine with the key in a format that can be re-eval'd into an object if indent is just whitespace
|
||||||
|
val_str = f"'{key}': " + val_str
|
||||||
|
|
||||||
|
# count the total length of this string so we know if we need to linebreak or not later
|
||||||
|
total_len += len(val_str)
|
||||||
|
res.append(val_str)
|
||||||
|
|
||||||
|
if total_len > 80:
|
||||||
|
inside = ',\n'.join(res)
|
||||||
|
# we indent twice - once for the inner contents of every inner object, and one to
|
||||||
|
# offset from the root element. that keeps us from needing to be recursive except for the
|
||||||
|
# single pformat call
|
||||||
|
inside = textwrap.indent(inside, indent)
|
||||||
|
return cls_name + '({\n' + inside + '\n})'
|
||||||
|
else:
|
||||||
|
return cls_name + '({' + ', '.join(res) + '})'
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return _pformat(items(self), self.__class__.__name__)
|
||||||
|
|
||||||
|
for cls_name in dir(meta):
|
||||||
|
cls = getattr(meta, cls_name)
|
||||||
|
if is_dataclass(cls):
|
||||||
|
new_dataclass = make_dataclass(cls.__name__,fields=[('__dummy__', Any, field(default=None))], bases=(cls,), repr=False)
|
||||||
|
new_dataclass.__repr__ = __repr__
|
||||||
|
new_dataclass.__str__ = __repr__
|
||||||
|
setattr(meta, cls.__name__, new_dataclass)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def apply_patches() -> None:
|
def apply_patches() -> None:
|
||||||
"""Apply all monkeypatches"""
|
"""Apply all monkeypatches"""
|
||||||
patch_npytyping_perf()
|
patch_npytyping_perf()
|
||||||
patch_nptyping_warnings()
|
patch_nptyping_warnings()
|
||||||
patch_schemaview()
|
patch_schemaview()
|
||||||
|
patch_array_expression()
|
||||||
|
patch_pretty_print()
|
||||||
|
|
17
nwb_linkml/src/nwb_linkml/types/nwb.py
Normal file
17
nwb_linkml/src/nwb_linkml/types/nwb.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
"""
|
||||||
|
Type annotations for NWB schema language types
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import List, Union, TypeAlias
|
||||||
|
|
||||||
|
DIMS_LIST: TypeAlias = List[Union[str, None]]
|
||||||
|
"""A single-dimension dims specification"""
|
||||||
|
|
||||||
|
DIMS_TYPE: TypeAlias = Union[DIMS_LIST, List[DIMS_LIST]]
|
||||||
|
"""``dims`` in the nwb schema language"""
|
||||||
|
|
||||||
|
SHAPE_LIST: TypeAlias = List[Union[str, None]]
|
||||||
|
"""A single-dimension shape specification"""
|
||||||
|
|
||||||
|
SHAPE_TYPE: TypeAlias = Union[SHAPE_LIST, List[SHAPE_LIST]]
|
||||||
|
"""``shape`` in the nwb schema language"""
|
73
nwb_linkml/tests/test_adapters/test_adapter_array.py
Normal file
73
nwb_linkml/tests/test_adapters/test_adapter_array.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
import pdb
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from typing import Tuple
|
||||||
|
from nwb_linkml.types.nwb import DIMS_TYPE, SHAPE_TYPE
|
||||||
|
from nwb_linkml.adapters.array import ArrayAdapter, Dimension, Shape
|
||||||
|
|
||||||
|
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 2]], [],
|
||||||
|
# id='multi shape inconsistent dims'),
|
||||||
|
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim2']], [[1], [1, 2], [1, 3]], [],
|
||||||
|
# id='multi shape inconsistent shape'),
|
||||||
|
# pytest.param([['dim1'], ['dim1', 'dim2'], ['dim1', 'dim3']], [[1], [1, 2], [1, 3]], [],
|
||||||
|
# id='multi shape inconsistent both'),
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"dims,shape,expected",
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
["dim1", "dim2", "dim3"],
|
||||||
|
[1, 2, 3],
|
||||||
|
[
|
||||||
|
Shape(
|
||||||
|
[
|
||||||
|
Dimension(dims="dim1", shape=1),
|
||||||
|
Dimension(dims="dim2", shape=2),
|
||||||
|
Dimension(dims="dim3", shape=3),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
],
|
||||||
|
id="single shape",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
[["dim1"], ["dim1", "dim2"], ["dim1", "dim2", "dim3"]],
|
||||||
|
[[1], [1, 2], [1, 2, 3]],
|
||||||
|
[
|
||||||
|
Shape(
|
||||||
|
[Dimension(dims="dim1", shape=1)],
|
||||||
|
),
|
||||||
|
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
|
||||||
|
Shape(
|
||||||
|
(
|
||||||
|
Dimension(dims="dim1", shape=1),
|
||||||
|
Dimension(dims="dim2", shape=2),
|
||||||
|
Dimension(dims="dim3", shape=3),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
],
|
||||||
|
id="multi shape",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
["dim1", "dim2", "dim3"],
|
||||||
|
[[1], [1, 2], [1, 2, 3]],
|
||||||
|
[
|
||||||
|
Shape([Dimension(dims="dim1", shape=1)]),
|
||||||
|
Shape((Dimension(dims="dim1", shape=1), Dimension(dims="dim2", shape=2))),
|
||||||
|
Shape(
|
||||||
|
(
|
||||||
|
Dimension(dims="dim1", shape=1),
|
||||||
|
Dimension(dims="dim2", shape=2),
|
||||||
|
Dimension(dims="dim3", shape=3),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
],
|
||||||
|
id="malformed abbreviated dims spec",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_pivot_dims(dims: DIMS_TYPE, shape: SHAPE_TYPE, expected):
|
||||||
|
adapter = ArrayAdapter(dims, shape)
|
||||||
|
pivoted = adapter.pivot_dims()
|
||||||
|
assert pivoted == expected
|
|
@ -36,7 +36,7 @@ def test_build_base(nwb_schema):
|
||||||
assert len(base.classes) == 1
|
assert len(base.classes) == 1
|
||||||
img = base.classes[0]
|
img = base.classes[0]
|
||||||
assert len(img.attributes) == 4
|
assert len(img.attributes) == 4
|
||||||
assert img.attributes["newslot"] is slot
|
assert img.attributes["newslot"] == slot
|
||||||
|
|
||||||
|
|
||||||
def test_get_attr_name():
|
def test_get_attr_name():
|
||||||
|
|
Loading…
Reference in a new issue