mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 06:04:28 +00:00
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly
This commit is contained in:
parent
fd9aef9531
commit
3568037a1e
5 changed files with 432 additions and 257 deletions
|
@ -2,6 +2,8 @@
|
||||||
Adapters to linkML classes
|
Adapters to linkML classes
|
||||||
"""
|
"""
|
||||||
import pdb
|
import pdb
|
||||||
|
import re
|
||||||
|
from abc import abstractmethod
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
|
from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
|
||||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||||
|
@ -9,17 +11,95 @@ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
||||||
from nwb_linkml.maps import QUANTITY_MAP
|
from nwb_linkml.maps import QUANTITY_MAP
|
||||||
from nwb_linkml.lang_elements import Arraylike
|
from nwb_linkml.lang_elements import Arraylike
|
||||||
|
|
||||||
|
CAMEL_TO_SNAKE = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
|
||||||
|
"""
|
||||||
|
Convert camel case to snake case
|
||||||
|
|
||||||
|
courtesy of: https://stackoverflow.com/a/12867228
|
||||||
|
"""
|
||||||
|
|
||||||
|
def camel_to_snake(name:str) -> str:
|
||||||
|
"""
|
||||||
|
Convert camel case to snake case
|
||||||
|
|
||||||
|
courtesy of: https://stackoverflow.com/a/12867228
|
||||||
|
"""
|
||||||
|
return CAMEL_TO_SNAKE.sub(r'_\1', name).lower()
|
||||||
|
|
||||||
class ClassAdapter(Adapter):
|
class ClassAdapter(Adapter):
|
||||||
"""
|
"""
|
||||||
Adapter to class-like things in linkml, including datasets and groups
|
Abstract adapter to class-like things in linkml, holds methods common to
|
||||||
|
both DatasetAdapter and GroupAdapter
|
||||||
"""
|
"""
|
||||||
cls: Dataset | Group
|
cls: Dataset | Group
|
||||||
parent: Optional['ClassAdapter'] = None
|
parent: Optional['ClassAdapter'] = None
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def build(self) -> BuildResult:
|
||||||
|
"""
|
||||||
|
Make this abstract so it can't be instantiated directly.
|
||||||
|
|
||||||
|
Subclasses call :meth:`.build_base` to get the basics true of both groups and datasets
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def build_base(self, extra_attrs: Optional[List[SlotDefinition]]=None) -> BuildResult:
|
||||||
|
"""
|
||||||
|
Build the basic class and attributes before adding any specific
|
||||||
|
modifications for groups or datasets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Build this class
|
||||||
|
#name = self._get_full_name()
|
||||||
|
if self.parent is not None:
|
||||||
|
name = self._get_full_name()
|
||||||
|
else:
|
||||||
|
name = self._get_attr_name()
|
||||||
|
|
||||||
|
# Get vanilla top-level attributes
|
||||||
|
attrs = self.build_attrs(self.cls)
|
||||||
|
name_slot = self.build_name_slot()
|
||||||
|
attrs.append(name_slot)
|
||||||
|
if extra_attrs is not None:
|
||||||
|
if isinstance(extra_attrs, SlotDefinition):
|
||||||
|
extra_attrs = [extra_attrs]
|
||||||
|
attrs.extend(extra_attrs)
|
||||||
|
|
||||||
|
cls = ClassDefinition(
|
||||||
|
name = name,
|
||||||
|
is_a = self.cls.neurodata_type_inc,
|
||||||
|
description=self.cls.doc,
|
||||||
|
attributes=attrs,
|
||||||
|
)
|
||||||
|
|
||||||
|
slots = []
|
||||||
|
if self.parent is not None:
|
||||||
|
slots.append(self.build_self_slot())
|
||||||
|
|
||||||
|
res = BuildResult(
|
||||||
|
classes = [cls],
|
||||||
|
slots = slots
|
||||||
|
)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
|
||||||
|
attrs = [
|
||||||
|
SlotDefinition(
|
||||||
|
name=attr.name,
|
||||||
|
description=attr.doc,
|
||||||
|
range=self.handle_dtype(attr.dtype),
|
||||||
|
) for attr in cls.attributes
|
||||||
|
]
|
||||||
|
|
||||||
|
return attrs
|
||||||
|
|
||||||
def _get_full_name(self) -> str:
|
def _get_full_name(self) -> str:
|
||||||
"""The full name of the object in the generated linkml
|
"""The full name of the object in the generated linkml
|
||||||
|
|
||||||
Distinct from 'name' which is the thing that's often used in """
|
Distinct from 'name' which is the thing that's used to define position in
|
||||||
|
a hierarchical data setting
|
||||||
|
"""
|
||||||
if self.cls.neurodata_type_def:
|
if self.cls.neurodata_type_def:
|
||||||
name = self.cls.neurodata_type_def
|
name = self.cls.neurodata_type_def
|
||||||
elif self.cls.name is not None:
|
elif self.cls.name is not None:
|
||||||
|
@ -39,22 +119,21 @@ class ClassAdapter(Adapter):
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
def _get_name(self) -> str:
|
def _get_attr_name(self) -> str:
|
||||||
"""
|
"""
|
||||||
Get the "regular" name, which is used as the name of the attr
|
Get the name to use as the attribute name,
|
||||||
|
again distinct from the actual name of the instantiated object
|
||||||
Returns:
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# return self._get_full_name()
|
# return self._get_full_name()
|
||||||
name = None
|
name = None
|
||||||
if self.cls.neurodata_type_def:
|
if self.cls.neurodata_type_def:
|
||||||
|
#name = camel_to_snake(self.cls.neurodata_type_def)
|
||||||
name = self.cls.neurodata_type_def
|
name = self.cls.neurodata_type_def
|
||||||
elif self.cls.name is not None:
|
elif self.cls.name is not None:
|
||||||
# we do have a unique name
|
# we do have a unique name
|
||||||
name = self.cls.name
|
name = self.cls.name
|
||||||
elif self.cls.neurodata_type_inc:
|
elif self.cls.neurodata_type_inc:
|
||||||
# group members can be anonymous? this violates the schema but is common
|
#name = camel_to_snake(self.cls.neurodata_type_inc)
|
||||||
name = self.cls.neurodata_type_inc
|
name = self.cls.neurodata_type_inc
|
||||||
|
|
||||||
if name is None:
|
if name is None:
|
||||||
|
@ -62,125 +141,6 @@ class ClassAdapter(Adapter):
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def handle_arraylike(self, dataset: Dataset, name:Optional[str]=None) -> Optional[ClassDefinition | SlotDefinition]:
|
|
||||||
"""
|
|
||||||
Handling the
|
|
||||||
|
|
||||||
- dims
|
|
||||||
- shape
|
|
||||||
- dtype
|
|
||||||
|
|
||||||
fields as they are used in datasets. We'll use the :class:`.Arraylike` class to imitate them.
|
|
||||||
|
|
||||||
Specifically:
|
|
||||||
|
|
||||||
- Each slot within a subclass indicates a possible dimension.
|
|
||||||
- Only dimensions that are present in all the dimension specifiers in the
|
|
||||||
original schema are required.
|
|
||||||
- Shape requirements are indicated using max/min cardinalities on the slot.
|
|
||||||
- The arraylike object should be stored in the `array` slot on the containing class
|
|
||||||
(since there are already properties named `data`)
|
|
||||||
|
|
||||||
If any of `dims`, `shape`, or `dtype` are undefined, return `None`
|
|
||||||
|
|
||||||
Args:
|
|
||||||
dataset (:class:`nwb_schema_language.Dataset`): The dataset defining the arraylike
|
|
||||||
name (str): If present, override the name of the class before appending _Array
|
|
||||||
(we don't use _get_full_name here because we want to eventually decouple these functions from this adapter
|
|
||||||
class, which is sort of a development crutch. Ideally all these methods would just work on base nwb schema language types)
|
|
||||||
"""
|
|
||||||
if not any((dataset.dims, dataset.shape)):
|
|
||||||
# none of the required properties are defined, that's fine.
|
|
||||||
return
|
|
||||||
elif not all((dataset.dims, dataset.shape)):
|
|
||||||
# need to have both if one is present!
|
|
||||||
raise ValueError(f"A dataset needs both dims and shape to define an arraylike object")
|
|
||||||
|
|
||||||
# Special cases
|
|
||||||
if dataset.neurodata_type_inc == 'VectorData':
|
|
||||||
# Handle this in `handle_vectorlike` instead
|
|
||||||
return None
|
|
||||||
|
|
||||||
# The schema language doesn't have a way of specifying a dataset/group is "abstract"
|
|
||||||
# and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
|
|
||||||
# so....
|
|
||||||
dtype = self.handle_dtype(dataset.dtype)
|
|
||||||
|
|
||||||
# dims and shape are lists of lists. First we couple them
|
|
||||||
# (so each dim has its corresponding shape)..
|
|
||||||
# and then we take unique
|
|
||||||
# (dicts are ordered by default in recent pythons,
|
|
||||||
# while set() doesn't preserve order)
|
|
||||||
dims_shape = []
|
|
||||||
for inner_dim, inner_shape in zip(dataset.dims, dataset.shape):
|
|
||||||
if isinstance(inner_dim, list):
|
|
||||||
# list of lists
|
|
||||||
dims_shape.extend([(dim, shape) for dim, shape in zip(inner_dim, inner_shape)])
|
|
||||||
else:
|
|
||||||
# single-layer list
|
|
||||||
dims_shape.append((inner_dim, inner_shape))
|
|
||||||
|
|
||||||
dims_shape = tuple(dict.fromkeys(dims_shape).keys())
|
|
||||||
|
|
||||||
# if we only have one possible dimension, it's equivalent to a list, so we just return the slot
|
|
||||||
if len(dims_shape) == 1 and self.parent:
|
|
||||||
quantity = QUANTITY_MAP[dataset.quantity]
|
|
||||||
slot = SlotDefinition(
|
|
||||||
name=dataset.name,
|
|
||||||
range = dtype,
|
|
||||||
description=dataset.doc,
|
|
||||||
required=quantity['required'],
|
|
||||||
multivalued=True
|
|
||||||
)
|
|
||||||
return slot
|
|
||||||
|
|
||||||
# now make slots for each of them
|
|
||||||
slots = []
|
|
||||||
for dims, shape in dims_shape:
|
|
||||||
# if a dim is present in all possible combinations of dims, make it required
|
|
||||||
if all([dims in inner_dim for inner_dim in dataset.dims]):
|
|
||||||
required = True
|
|
||||||
else:
|
|
||||||
required = False
|
|
||||||
|
|
||||||
# use cardinality to do shape
|
|
||||||
if shape == 'null':
|
|
||||||
cardinality = None
|
|
||||||
else:
|
|
||||||
cardinality = shape
|
|
||||||
|
|
||||||
slots.append(SlotDefinition(
|
|
||||||
name=dims,
|
|
||||||
required=required,
|
|
||||||
maximum_cardinality=cardinality,
|
|
||||||
minimum_cardinality=cardinality,
|
|
||||||
range=dtype
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# and then the class is just a subclass of `Arraylike` (which is imported by default from `nwb.language.yaml`)
|
|
||||||
if name:
|
|
||||||
pass
|
|
||||||
elif dataset.neurodata_type_def:
|
|
||||||
name = dataset.neurodata_type_def
|
|
||||||
elif dataset.name:
|
|
||||||
name = dataset.name
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Dataset has no name or type definition, what do call it?")
|
|
||||||
|
|
||||||
name = '__'.join([name, 'Array'])
|
|
||||||
|
|
||||||
array_class = ClassDefinition(
|
|
||||||
name=name,
|
|
||||||
is_a="Arraylike",
|
|
||||||
attributes=slots
|
|
||||||
)
|
|
||||||
return array_class
|
|
||||||
|
|
||||||
|
|
||||||
def handle_dtype(self, dtype: DTypeType | None) -> str:
|
def handle_dtype(self, dtype: DTypeType | None) -> str:
|
||||||
if isinstance(dtype, ReferenceDtype):
|
if isinstance(dtype, ReferenceDtype):
|
||||||
return dtype.target_type
|
return dtype.target_type
|
||||||
|
@ -201,128 +161,49 @@ class ClassAdapter(Adapter):
|
||||||
# flat dtype
|
# flat dtype
|
||||||
return dtype
|
return dtype
|
||||||
|
|
||||||
def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
|
def build_name_slot(self) -> SlotDefinition:
|
||||||
attrs = [
|
|
||||||
SlotDefinition(
|
|
||||||
name=attr.name,
|
|
||||||
description=attr.doc,
|
|
||||||
range=self.handle_dtype(attr.dtype),
|
|
||||||
) for attr in cls.attributes
|
|
||||||
]
|
|
||||||
|
|
||||||
return attrs
|
|
||||||
|
|
||||||
def build_subclasses(self, cls: Dataset | Group) -> BuildResult:
|
|
||||||
"""
|
"""
|
||||||
Build nested groups and datasets
|
If a class has a name, then that name should be a slot with a
|
||||||
|
fixed value.
|
||||||
|
|
||||||
|
If a class does not have a name, then name should be a required attribute
|
||||||
|
|
||||||
|
References:
|
||||||
|
https://github.com/NeurodataWithoutBorders/nwb-schema/issues/552#issuecomment-1700319001
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
Create ClassDefinitions for each, but then also create SlotDefinitions that
|
|
||||||
will be used as attributes linking the main class to the subclasses
|
|
||||||
"""
|
"""
|
||||||
# build and flatten nested classes
|
if self.cls.name:
|
||||||
nested_classes = [ClassAdapter(cls=dset, parent=self) for dset in cls.datasets]
|
name_slot = SlotDefinition(
|
||||||
nested_classes.extend([ClassAdapter(cls=grp, parent=self) for grp in cls.groups])
|
name='name',
|
||||||
nested_res = BuildResult()
|
required=True,
|
||||||
for subclass in nested_classes:
|
ifabsent=self.cls.name,
|
||||||
# handle the special case where `VectorData` is subclasssed without any dims or attributes
|
equals_string=self.cls.name,
|
||||||
# which just gets instantiated as a 1-d array in HDF5
|
range='string'
|
||||||
if subclass.cls.neurodata_type_inc == 'VectorData' and \
|
)
|
||||||
not subclass.cls.dims and \
|
|
||||||
not subclass.cls.shape and \
|
|
||||||
not subclass.cls.attributes \
|
|
||||||
and subclass.cls.name:
|
|
||||||
this_slot = SlotDefinition(
|
|
||||||
name=subclass.cls.name,
|
|
||||||
description=subclass.cls.doc,
|
|
||||||
range=self.handle_dtype(subclass.cls.dtype),
|
|
||||||
multivalued=True
|
|
||||||
)
|
|
||||||
nested_res.slots.append(this_slot)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Simplify datasets that are just a single value
|
|
||||||
elif isinstance(subclass.cls, Dataset) and \
|
|
||||||
not subclass.cls.neurodata_type_inc and \
|
|
||||||
not subclass.cls.attributes and \
|
|
||||||
not subclass.cls.dims and \
|
|
||||||
not subclass.cls.shape and \
|
|
||||||
subclass.cls.name:
|
|
||||||
this_slot = SlotDefinition(
|
|
||||||
name=subclass.cls.name,
|
|
||||||
description=subclass.cls.doc,
|
|
||||||
range=self.handle_dtype(subclass.cls.dtype),
|
|
||||||
**QUANTITY_MAP[subclass.cls.quantity]
|
|
||||||
)
|
|
||||||
nested_res.slots.append(this_slot)
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
this_slot = SlotDefinition(
|
|
||||||
name=subclass._get_name(),
|
|
||||||
description=subclass.cls.doc,
|
|
||||||
range=subclass._get_full_name(),
|
|
||||||
**QUANTITY_MAP[subclass.cls.quantity]
|
|
||||||
)
|
|
||||||
nested_res.slots.append(this_slot)
|
|
||||||
|
|
||||||
if subclass.cls.name is None and subclass.cls.neurodata_type_def is None:
|
|
||||||
# anonymous group that's just an inc, we only need the slot since the class is defined elsewhere
|
|
||||||
continue
|
|
||||||
|
|
||||||
this_build = subclass.build()
|
|
||||||
nested_res += this_build
|
|
||||||
return nested_res
|
|
||||||
|
|
||||||
|
|
||||||
def build(self) -> BuildResult:
|
|
||||||
|
|
||||||
# Build this class
|
|
||||||
if self.parent is not None:
|
|
||||||
name = self._get_full_name()
|
|
||||||
else:
|
else:
|
||||||
name = self._get_name()
|
name_slot = SlotDefinition(
|
||||||
|
name='name',
|
||||||
|
required=True,
|
||||||
|
range='string'
|
||||||
|
)
|
||||||
|
return name_slot
|
||||||
|
|
||||||
# Get vanilla top-level attributes
|
def build_self_slot(self) -> SlotDefinition:
|
||||||
attrs = self.build_attrs(self.cls)
|
"""
|
||||||
|
If we are a child class, we make a slot so our parent can refer to us
|
||||||
# unnest and build subclasses in datasets and groups
|
"""
|
||||||
if isinstance(self.cls, Group):
|
return SlotDefinition(
|
||||||
# only groups have sub-datasets and sub-groups
|
name=self._get_attr_name(),
|
||||||
# split out the recursion step rather than making purely recursive because
|
|
||||||
# top-level datasets and groups are handled differently - they have names,
|
|
||||||
# and so we need to split out which things we unnest and which things
|
|
||||||
# can just be slots because they are already defined without knowing about
|
|
||||||
# the global state of the schema build.
|
|
||||||
nested_res = self.build_subclasses(self.cls)
|
|
||||||
attrs.extend(nested_res.slots)
|
|
||||||
else:
|
|
||||||
# must be a dataset
|
|
||||||
nested_res = BuildResult()
|
|
||||||
arraylike = self.handle_arraylike(self.cls, self._get_full_name())
|
|
||||||
if arraylike:
|
|
||||||
# if the arraylike thing can only have one dimension, it's equivalent to a list, so
|
|
||||||
# we just add a multivalued slot
|
|
||||||
if isinstance(arraylike, SlotDefinition):
|
|
||||||
attrs.append(arraylike)
|
|
||||||
else:
|
|
||||||
# make a slot for the arraylike class
|
|
||||||
attrs.append(
|
|
||||||
SlotDefinition(
|
|
||||||
name='array',
|
|
||||||
range=arraylike.name
|
|
||||||
)
|
|
||||||
)
|
|
||||||
nested_res.classes.append(arraylike)
|
|
||||||
|
|
||||||
|
|
||||||
cls = ClassDefinition(
|
|
||||||
name = name,
|
|
||||||
is_a = self.cls.neurodata_type_inc,
|
|
||||||
description=self.cls.doc,
|
description=self.cls.doc,
|
||||||
attributes=attrs,
|
range=self._get_full_name(),
|
||||||
)
|
**QUANTITY_MAP[self.cls.quantity]
|
||||||
res = BuildResult(
|
|
||||||
classes = [cls, *nested_res.classes]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
198
nwb_linkml/adapters/dataset.py
Normal file
198
nwb_linkml/adapters/dataset.py
Normal file
|
@ -0,0 +1,198 @@
|
||||||
|
"""
|
||||||
|
Adapter for NWB datasets to linkml Classes
|
||||||
|
"""
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
||||||
|
from pydantic import PrivateAttr
|
||||||
|
|
||||||
|
from nwb_schema_language import Dataset, ReferenceDtype, CompoundDtype, DTypeType
|
||||||
|
from nwb_linkml.adapters.classes import ClassAdapter
|
||||||
|
from nwb_linkml.adapters.adapter import BuildResult
|
||||||
|
from nwb_linkml.maps import QUANTITY_MAP
|
||||||
|
|
||||||
|
class DatasetAdapter(ClassAdapter):
|
||||||
|
cls: Dataset
|
||||||
|
|
||||||
|
_handlers: List[str] = PrivateAttr(default_factory=list)
|
||||||
|
"""Keep track of which handlers have been called"""
|
||||||
|
|
||||||
|
|
||||||
|
def build(self) -> BuildResult:
|
||||||
|
res = self.build_base()
|
||||||
|
|
||||||
|
res = self.handle_arraylike(res, self.cls, self._get_full_name())
|
||||||
|
res = self.handle_1d_vector(res)
|
||||||
|
res = self.handle_scalar(res)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def handle_scalar(self, res:BuildResult) -> BuildResult:
|
||||||
|
|
||||||
|
# Simplify datasets that are just a single value
|
||||||
|
if self.cls.neurodata_type_inc != 'VectorData' and \
|
||||||
|
not self.cls.neurodata_type_inc and \
|
||||||
|
not self.cls.attributes and \
|
||||||
|
not self.cls.dims and \
|
||||||
|
not self.cls.shape and \
|
||||||
|
self.cls.name:
|
||||||
|
self._handlers.append('scalar')
|
||||||
|
# throw out the class that would have been made for us
|
||||||
|
# we just need a slot
|
||||||
|
this_slot = SlotDefinition(
|
||||||
|
name=self.cls.name,
|
||||||
|
description=self.cls.doc,
|
||||||
|
range=self.handle_dtype(self.cls.dtype),
|
||||||
|
**QUANTITY_MAP[self.cls.quantity]
|
||||||
|
)
|
||||||
|
res = BuildResult(slots = [this_slot])
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def handle_1d_vector(self, res: BuildResult) -> BuildResult:
|
||||||
|
# handle the special case where `VectorData` is subclasssed without any dims or attributes
|
||||||
|
# which just gets instantiated as a 1-d array in HDF5
|
||||||
|
if self.cls.neurodata_type_inc == 'VectorData' and \
|
||||||
|
not self.cls.dims and \
|
||||||
|
not self.cls.shape and \
|
||||||
|
not self.cls.attributes \
|
||||||
|
and self.cls.name:
|
||||||
|
self._handlers.append('1d_vector')
|
||||||
|
this_slot = SlotDefinition(
|
||||||
|
name=self.cls.name,
|
||||||
|
description=self.cls.doc,
|
||||||
|
range=self.handle_dtype(self.cls.dtype),
|
||||||
|
multivalued=True
|
||||||
|
)
|
||||||
|
# No need to make a class for us, so we replace the existing build results
|
||||||
|
res = BuildResult(slots=[this_slot])
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def handle_arraylike(self, res: BuildResult, dataset: Dataset, name: Optional[str] = None) -> BuildResult:
|
||||||
|
"""
|
||||||
|
Handling the
|
||||||
|
|
||||||
|
- dims
|
||||||
|
- shape
|
||||||
|
- dtype
|
||||||
|
|
||||||
|
fields as they are used in datasets. We'll use the :class:`.Arraylike` class to imitate them.
|
||||||
|
|
||||||
|
Specifically:
|
||||||
|
|
||||||
|
- Each slot within a subclass indicates a possible dimension.
|
||||||
|
- Only dimensions that are present in all the dimension specifiers in the
|
||||||
|
original schema are required.
|
||||||
|
- Shape requirements are indicated using max/min cardinalities on the slot.
|
||||||
|
- The arraylike object should be stored in the `array` slot on the containing class
|
||||||
|
(since there are already properties named `data`)
|
||||||
|
|
||||||
|
If any of `dims`, `shape`, or `dtype` are undefined, return `None`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dataset (:class:`nwb_schema_language.Dataset`): The dataset defining the arraylike
|
||||||
|
name (str): If present, override the name of the class before appending _Array
|
||||||
|
(we don't use _get_full_name here because we want to eventually decouple these functions from this adapter
|
||||||
|
class, which is sort of a development crutch. Ideally all these methods would just work on base nwb schema language types)
|
||||||
|
"""
|
||||||
|
if not any((dataset.dims, dataset.shape)):
|
||||||
|
# none of the required properties are defined, that's fine.
|
||||||
|
return res
|
||||||
|
elif not all((dataset.dims, dataset.shape)):
|
||||||
|
# need to have both if one is present!
|
||||||
|
raise ValueError(f"A dataset needs both dims and shape to define an arraylike object")
|
||||||
|
|
||||||
|
# Special cases
|
||||||
|
if dataset.neurodata_type_inc == 'VectorData':
|
||||||
|
# Handle this in `handle_vectorlike` instead
|
||||||
|
return res
|
||||||
|
|
||||||
|
# The schema language doesn't have a way of specifying a dataset/group is "abstract"
|
||||||
|
# and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
|
||||||
|
# so....
|
||||||
|
dtype = self.handle_dtype(dataset.dtype)
|
||||||
|
|
||||||
|
# dims and shape are lists of lists. First we couple them
|
||||||
|
# (so each dim has its corresponding shape)..
|
||||||
|
# and then we take unique
|
||||||
|
# (dicts are ordered by default in recent pythons,
|
||||||
|
# while set() doesn't preserve order)
|
||||||
|
dims_shape = []
|
||||||
|
for inner_dim, inner_shape in zip(dataset.dims, dataset.shape):
|
||||||
|
if isinstance(inner_dim, list):
|
||||||
|
# list of lists
|
||||||
|
dims_shape.extend([(dim, shape) for dim, shape in zip(inner_dim, inner_shape)])
|
||||||
|
else:
|
||||||
|
# single-layer list
|
||||||
|
dims_shape.append((inner_dim, inner_shape))
|
||||||
|
|
||||||
|
dims_shape = tuple(dict.fromkeys(dims_shape).keys())
|
||||||
|
|
||||||
|
# if we only have one possible dimension, it's equivalent to a list, so we just return the slot
|
||||||
|
if len(dims_shape) == 1 and self.parent:
|
||||||
|
quantity = QUANTITY_MAP[dataset.quantity]
|
||||||
|
slot = SlotDefinition(
|
||||||
|
name=dataset.name,
|
||||||
|
range=dtype,
|
||||||
|
description=dataset.doc,
|
||||||
|
required=quantity['required'],
|
||||||
|
multivalued=True
|
||||||
|
)
|
||||||
|
res.classes[0].attributes.update({dataset.name: slot})
|
||||||
|
self._handlers.append('arraylike-1d')
|
||||||
|
return res
|
||||||
|
|
||||||
|
# now make slots for each of them
|
||||||
|
slots = []
|
||||||
|
for dims, shape in dims_shape:
|
||||||
|
# if a dim is present in all possible combinations of dims, make it required
|
||||||
|
if all([dims in inner_dim for inner_dim in dataset.dims]):
|
||||||
|
required = True
|
||||||
|
else:
|
||||||
|
required = False
|
||||||
|
|
||||||
|
# use cardinality to do shape
|
||||||
|
if shape == 'null':
|
||||||
|
cardinality = None
|
||||||
|
else:
|
||||||
|
cardinality = shape
|
||||||
|
|
||||||
|
slots.append(SlotDefinition(
|
||||||
|
name=dims,
|
||||||
|
required=required,
|
||||||
|
maximum_cardinality=cardinality,
|
||||||
|
minimum_cardinality=cardinality,
|
||||||
|
range=dtype
|
||||||
|
))
|
||||||
|
|
||||||
|
# and then the class is just a subclass of `Arraylike` (which is imported by default from `nwb.language.yaml`)
|
||||||
|
if name:
|
||||||
|
pass
|
||||||
|
elif dataset.neurodata_type_def:
|
||||||
|
name = dataset.neurodata_type_def
|
||||||
|
elif dataset.name:
|
||||||
|
name = dataset.name
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Dataset has no name or type definition, what do call it?")
|
||||||
|
|
||||||
|
name = '__'.join([name, 'Array'])
|
||||||
|
|
||||||
|
array_class = ClassDefinition(
|
||||||
|
name=name,
|
||||||
|
is_a="Arraylike",
|
||||||
|
attributes=slots
|
||||||
|
)
|
||||||
|
# make a slot for the arraylike class
|
||||||
|
array_slot = SlotDefinition(
|
||||||
|
name='array',
|
||||||
|
range=array_class.name
|
||||||
|
)
|
||||||
|
|
||||||
|
res.classes.append(array_class)
|
||||||
|
res.classes[0].attributes.update({'array': array_slot})
|
||||||
|
#res.slots.append(array_slot)
|
||||||
|
self._handlers.append('arraylike')
|
||||||
|
|
||||||
|
return res
|
89
nwb_linkml/adapters/group.py
Normal file
89
nwb_linkml/adapters/group.py
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
"""
|
||||||
|
Adapter for NWB groups to linkml Classes
|
||||||
|
"""
|
||||||
|
import pdb
|
||||||
|
from typing import List
|
||||||
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
||||||
|
|
||||||
|
from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
|
||||||
|
from nwb_linkml.adapters.classes import ClassAdapter
|
||||||
|
from nwb_linkml.adapters.dataset import DatasetAdapter
|
||||||
|
from nwb_linkml.adapters.adapter import BuildResult
|
||||||
|
from nwb_linkml.maps import QUANTITY_MAP
|
||||||
|
|
||||||
|
class GroupAdapter(ClassAdapter):
|
||||||
|
cls: Group
|
||||||
|
|
||||||
|
def build(self) -> BuildResult:
|
||||||
|
|
||||||
|
|
||||||
|
nested_res = self.build_subclasses()
|
||||||
|
# we don't propagate slots up to the next level since they are meant for this
|
||||||
|
# level (ie. a way to refer to our children)
|
||||||
|
res = self.build_base(extra_attrs=nested_res.slots)
|
||||||
|
# we do propagate classes tho
|
||||||
|
res.classes.extend(nested_res.classes)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def handle_children(self, children: List[Group]) -> BuildResult:
|
||||||
|
"""
|
||||||
|
Make a special LinkML `children` slot that can
|
||||||
|
have any number of the objects that are of `neurodata_type_inc` class
|
||||||
|
|
||||||
|
Args:
|
||||||
|
children (List[:class:`.Group`]): Child groups
|
||||||
|
|
||||||
|
"""
|
||||||
|
child_slot = SlotDefinition(
|
||||||
|
name='children',
|
||||||
|
multivalued=True,
|
||||||
|
any_of=[{'range': cls.neurodata_type_inc} for cls in children]
|
||||||
|
)
|
||||||
|
return BuildResult(slots=[child_slot])
|
||||||
|
|
||||||
|
def build_subclasses(self) -> BuildResult:
|
||||||
|
"""
|
||||||
|
Build nested groups and datasets
|
||||||
|
|
||||||
|
Create ClassDefinitions for each, but then also create SlotDefinitions that
|
||||||
|
will be used as attributes linking the main class to the subclasses
|
||||||
|
"""
|
||||||
|
# Datasets are simple, they are terminal classes, and all logic
|
||||||
|
# for creating slots vs. classes is handled by the adapter class
|
||||||
|
dataset_res = BuildResult()
|
||||||
|
for dset in self.cls.datasets:
|
||||||
|
# if dset.name == 'timestamps':
|
||||||
|
# pdb.set_trace()
|
||||||
|
dset_adapter = DatasetAdapter(cls=dset, parent=self)
|
||||||
|
dataset_res += dset_adapter.build()
|
||||||
|
|
||||||
|
# Actually i'm not sure we have to special case this, we could handle it in
|
||||||
|
# i/o instead
|
||||||
|
|
||||||
|
# Groups are a bit more complicated because they can also behave like
|
||||||
|
# range declarations:
|
||||||
|
# eg. a group can have multiple groups with `neurodata_type_inc`, no name, and quantity of *,
|
||||||
|
# the group can then contain any number of groups of those included types as direct children
|
||||||
|
|
||||||
|
# group_res = BuildResult()
|
||||||
|
# children = []
|
||||||
|
# for group in self.cls.groups:
|
||||||
|
# if not group.name and \
|
||||||
|
# group.quantity == '*' and \
|
||||||
|
# group.neurodata_type_inc:
|
||||||
|
# children.append(group)
|
||||||
|
# else:
|
||||||
|
# group_adapter = GroupAdapter(cls=group, parent=self)
|
||||||
|
# group_res += group_adapter.build()
|
||||||
|
#
|
||||||
|
# group_res += self.handle_children(children)
|
||||||
|
|
||||||
|
group_res = BuildResult()
|
||||||
|
for group in self.cls.groups:
|
||||||
|
group_adapter = GroupAdapter(cls=group, parent=self)
|
||||||
|
group_res += group_adapter.build()
|
||||||
|
|
||||||
|
res = dataset_res + group_res
|
||||||
|
|
||||||
|
return res
|
|
@ -8,7 +8,8 @@ from pathlib import Path
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
|
||||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||||
from nwb_linkml.adapters.classes import ClassAdapter
|
from nwb_linkml.adapters.dataset import DatasetAdapter
|
||||||
|
from nwb_linkml.adapters.group import GroupAdapter
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from nwb_linkml.adapters.namespaces import NamespacesAdapter
|
from nwb_linkml.adapters.namespaces import NamespacesAdapter
|
||||||
|
|
||||||
|
@ -68,17 +69,17 @@ class SchemaAdapter(Adapter):
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
classes = [ClassAdapter(cls=dset) for dset in self.datasets]
|
res = BuildResult()
|
||||||
classes.extend(ClassAdapter(cls=group) for group in self.groups)
|
for dset in self.datasets:
|
||||||
built_results = None
|
res += DatasetAdapter(cls=dset).build()
|
||||||
for cls in classes:
|
for group in self.groups:
|
||||||
if built_results is None:
|
res += GroupAdapter(cls=group).build()
|
||||||
built_results = cls.build()
|
|
||||||
else:
|
if len(res.slots) > 0:
|
||||||
built_results += cls.build()
|
raise RuntimeError('Generated schema in this translation can only have classes, all slots should be attributes within a class')
|
||||||
|
|
||||||
if self.split:
|
if self.split:
|
||||||
sch_split = self.split_subclasses(built_results)
|
sch_split = self.split_subclasses(res)
|
||||||
return sch_split
|
return sch_split
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -86,9 +87,9 @@ class SchemaAdapter(Adapter):
|
||||||
name = self.name,
|
name = self.name,
|
||||||
id = self.name,
|
id = self.name,
|
||||||
imports = [i.name for i in self.imports],
|
imports = [i.name for i in self.imports],
|
||||||
classes=built_results.classes,
|
classes=res.classes,
|
||||||
slots=built_results.slots,
|
slots=res.slots,
|
||||||
types=built_results.types
|
types=res.types
|
||||||
)
|
)
|
||||||
# every schema needs the language elements
|
# every schema needs the language elements
|
||||||
sch.imports.append('nwb.language')
|
sch.imports.append('nwb.language')
|
||||||
|
|
6
nwb_schema_language/docs/CHANGELOG.md
Normal file
6
nwb_schema_language/docs/CHANGELOG.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# 0.1.1
|
||||||
|
|
||||||
|
Revised models to make `name` an optional slot regardless of presence/absence
|
||||||
|
of `neurodata_type_def`, the naming of individual classes within the schema will be
|
||||||
|
handled by `nwb_linkml` - see:
|
||||||
|
https://github.com/NeurodataWithoutBorders/nwb-schema/issues/552
|
Loading…
Reference in a new issue