2023-08-22 04:43:02 +00:00
|
|
|
"""
|
|
|
|
Adapters to linkML classes
|
|
|
|
"""
|
2023-08-24 02:56:09 +00:00
|
|
|
import pdb
|
|
|
|
from typing import List, Optional
|
2023-08-25 07:22:47 +00:00
|
|
|
from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
|
2023-08-24 02:56:09 +00:00
|
|
|
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
2023-08-22 04:43:02 +00:00
|
|
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
2023-08-25 07:22:47 +00:00
|
|
|
from nwb_linkml.maps import QUANTITY_MAP
|
|
|
|
from nwb_linkml.lang_elements import Arraylike
|
2023-08-22 04:43:02 +00:00
|
|
|
|
|
|
|
class ClassAdapter(Adapter):
|
|
|
|
"""
|
|
|
|
Adapter to class-like things in linkml, including datasets and groups
|
|
|
|
"""
|
|
|
|
cls: Dataset | Group
|
2023-08-24 02:56:09 +00:00
|
|
|
parent: Optional['ClassAdapter'] = None
|
|
|
|
|
|
|
|
def _get_full_name(self) -> str:
|
|
|
|
"""The full name of the object in the generated linkml
|
2023-08-22 04:43:02 +00:00
|
|
|
|
2023-08-24 02:56:09 +00:00
|
|
|
Distinct from 'name' which is the thing that's often used in """
|
2023-08-22 04:43:02 +00:00
|
|
|
if self.cls.neurodata_type_def:
|
|
|
|
name = self.cls.neurodata_type_def
|
2023-08-24 02:56:09 +00:00
|
|
|
elif self.cls.name is not None:
|
|
|
|
# not necessarily a unique name, so we combine parent names
|
|
|
|
name_parts = []
|
|
|
|
if self.parent is not None:
|
|
|
|
name_parts.append(self.parent._get_full_name())
|
|
|
|
|
|
|
|
name_parts.append(self.cls.name)
|
2023-08-29 05:16:58 +00:00
|
|
|
name = '__'.join(name_parts)
|
2023-08-24 02:56:09 +00:00
|
|
|
elif self.cls.neurodata_type_inc is not None:
|
|
|
|
# again, this is against the schema, but is common
|
|
|
|
name = self.cls.neurodata_type_inc
|
2023-08-22 04:43:02 +00:00
|
|
|
else:
|
2023-08-24 02:56:09 +00:00
|
|
|
raise ValueError('Not sure what our name is!')
|
|
|
|
|
|
|
|
|
|
|
|
return name
|
|
|
|
|
|
|
|
def _get_name(self) -> str:
|
|
|
|
"""
|
|
|
|
Get the "regular" name, which is used as the name of the attr
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
|
|
|
# return self._get_full_name()
|
|
|
|
name = None
|
|
|
|
if self.cls.neurodata_type_def:
|
|
|
|
name = self.cls.neurodata_type_def
|
|
|
|
elif self.cls.name is not None:
|
|
|
|
# we do have a unique name
|
2023-08-22 04:43:02 +00:00
|
|
|
name = self.cls.name
|
2023-08-24 02:56:09 +00:00
|
|
|
elif self.cls.neurodata_type_inc:
|
|
|
|
# group members can be anonymous? this violates the schema but is common
|
|
|
|
name = self.cls.neurodata_type_inc
|
|
|
|
|
|
|
|
if name is None:
|
|
|
|
raise ValueError(f'Class has no name!: {self.cls}')
|
|
|
|
|
|
|
|
return name
|
2023-08-22 04:43:02 +00:00
|
|
|
|
2023-08-29 05:16:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
def handle_arraylike(self, dataset: Dataset, name:Optional[str]=None) -> Optional[ClassDefinition | SlotDefinition]:
|
2023-08-25 07:22:47 +00:00
|
|
|
"""
|
|
|
|
Handling the
|
|
|
|
|
|
|
|
- dims
|
|
|
|
- shape
|
|
|
|
- dtype
|
|
|
|
|
|
|
|
fields as they are used in datasets. We'll use the :class:`.Arraylike` class to imitate them.
|
|
|
|
|
|
|
|
Specifically:
|
|
|
|
|
|
|
|
- Each slot within a subclass indicates a possible dimension.
|
|
|
|
- Only dimensions that are present in all the dimension specifiers in the
|
|
|
|
original schema are required.
|
|
|
|
- Shape requirements are indicated using max/min cardinalities on the slot.
|
|
|
|
- The arraylike object should be stored in the `array` slot on the containing class
|
|
|
|
(since there are already properties named `data`)
|
|
|
|
|
|
|
|
If any of `dims`, `shape`, or `dtype` are undefined, return `None`
|
|
|
|
|
|
|
|
Args:
|
|
|
|
dataset (:class:`nwb_schema_language.Dataset`): The dataset defining the arraylike
|
|
|
|
name (str): If present, override the name of the class before appending _Array
|
|
|
|
(we don't use _get_full_name here because we want to eventually decouple these functions from this adapter
|
|
|
|
class, which is sort of a development crutch. Ideally all these methods would just work on base nwb schema language types)
|
|
|
|
"""
|
|
|
|
if not any((dataset.dims, dataset.shape)):
|
|
|
|
# none of the required properties are defined, that's fine.
|
|
|
|
return
|
|
|
|
elif not all((dataset.dims, dataset.shape)):
|
|
|
|
# need to have both if one is present!
|
|
|
|
raise ValueError(f"A dataset needs both dims and shape to define an arraylike object")
|
|
|
|
|
2023-08-29 05:16:58 +00:00
|
|
|
# Special cases
|
|
|
|
if dataset.neurodata_type_inc == 'VectorData':
|
|
|
|
# Handle this in `handle_vectorlike` instead
|
|
|
|
return None
|
|
|
|
|
2023-08-25 07:22:47 +00:00
|
|
|
# The schema language doesn't have a way of specifying a dataset/group is "abstract"
|
|
|
|
# and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
|
|
|
|
# so....
|
|
|
|
dtype = self.handle_dtype(dataset.dtype)
|
|
|
|
|
|
|
|
# dims and shape are lists of lists. First we couple them
|
|
|
|
# (so each dim has its corresponding shape)..
|
|
|
|
# and then we take unique
|
|
|
|
# (dicts are ordered by default in recent pythons,
|
|
|
|
# while set() doesn't preserve order)
|
|
|
|
dims_shape = []
|
|
|
|
for inner_dim, inner_shape in zip(dataset.dims, dataset.shape):
|
|
|
|
if isinstance(inner_dim, list):
|
|
|
|
# list of lists
|
|
|
|
dims_shape.extend([(dim, shape) for dim, shape in zip(inner_dim, inner_shape)])
|
|
|
|
else:
|
|
|
|
# single-layer list
|
|
|
|
dims_shape.append((inner_dim, inner_shape))
|
|
|
|
|
|
|
|
dims_shape = tuple(dict.fromkeys(dims_shape).keys())
|
|
|
|
|
2023-08-29 05:16:58 +00:00
|
|
|
# if we only have one possible dimension, it's equivalent to a list, so we just return the slot
|
|
|
|
if len(dims_shape) == 1 and self.parent:
|
|
|
|
quantity = QUANTITY_MAP[dataset.quantity]
|
|
|
|
slot = SlotDefinition(
|
|
|
|
name=dataset.name,
|
|
|
|
range = dtype,
|
|
|
|
description=dataset.doc,
|
|
|
|
required=quantity['required'],
|
|
|
|
multivalued=True
|
|
|
|
)
|
|
|
|
return slot
|
|
|
|
|
2023-08-25 07:22:47 +00:00
|
|
|
# now make slots for each of them
|
|
|
|
slots = []
|
|
|
|
for dims, shape in dims_shape:
|
|
|
|
# if a dim is present in all possible combinations of dims, make it required
|
|
|
|
if all([dims in inner_dim for inner_dim in dataset.dims]):
|
|
|
|
required = True
|
|
|
|
else:
|
|
|
|
required = False
|
|
|
|
|
|
|
|
# use cardinality to do shape
|
|
|
|
if shape == 'null':
|
|
|
|
cardinality = None
|
|
|
|
else:
|
|
|
|
cardinality = shape
|
|
|
|
|
|
|
|
slots.append(SlotDefinition(
|
|
|
|
name=dims,
|
|
|
|
required=required,
|
|
|
|
maximum_cardinality=cardinality,
|
|
|
|
minimum_cardinality=cardinality,
|
|
|
|
range=dtype
|
|
|
|
))
|
|
|
|
|
2023-08-29 05:16:58 +00:00
|
|
|
|
|
|
|
|
2023-08-25 07:22:47 +00:00
|
|
|
# and then the class is just a subclass of `Arraylike` (which is imported by default from `nwb.language.yaml`)
|
|
|
|
if name:
|
|
|
|
pass
|
|
|
|
elif dataset.neurodata_type_def:
|
|
|
|
name = dataset.neurodata_type_def
|
|
|
|
elif dataset.name:
|
|
|
|
name = dataset.name
|
|
|
|
else:
|
|
|
|
raise ValueError(f"Dataset has no name or type definition, what do call it?")
|
|
|
|
|
2023-08-29 05:16:58 +00:00
|
|
|
name = '__'.join([name, 'Array'])
|
2023-08-25 07:22:47 +00:00
|
|
|
|
|
|
|
array_class = ClassDefinition(
|
|
|
|
name=name,
|
|
|
|
is_a="Arraylike",
|
|
|
|
attributes=slots
|
|
|
|
)
|
|
|
|
return array_class
|
|
|
|
|
|
|
|
|
|
|
|
def handle_dtype(self, dtype: DTypeType | None) -> str:
|
2023-08-24 02:56:09 +00:00
|
|
|
if isinstance(dtype, ReferenceDtype):
|
|
|
|
return dtype.target_type
|
2023-08-25 07:22:47 +00:00
|
|
|
elif dtype is None or dtype == []:
|
|
|
|
# Some ill-defined datasets are "abstract" despite that not being in the schema language
|
|
|
|
return 'AnyType'
|
|
|
|
elif isinstance(dtype, list) and isinstance(dtype[0], CompoundDtype):
|
|
|
|
# there is precisely one class that uses compound dtypes:
|
|
|
|
# TimeSeriesReferenceVectorData
|
|
|
|
# compoundDtypes are able to define a ragged table according to the schema
|
|
|
|
# but are used in this single case equivalently to attributes.
|
|
|
|
# so we'll... uh... treat them as slots.
|
|
|
|
# TODO
|
|
|
|
return 'AnyType'
|
|
|
|
#raise NotImplementedError('got distracted, need to implement')
|
|
|
|
|
2023-08-24 02:56:09 +00:00
|
|
|
else:
|
2023-08-25 07:22:47 +00:00
|
|
|
# flat dtype
|
2023-08-24 02:56:09 +00:00
|
|
|
return dtype
|
|
|
|
|
|
|
|
def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
|
2023-08-22 04:43:02 +00:00
|
|
|
attrs = [
|
|
|
|
SlotDefinition(
|
|
|
|
name=attr.name,
|
|
|
|
description=attr.doc,
|
2023-08-25 07:22:47 +00:00
|
|
|
range=self.handle_dtype(attr.dtype),
|
2023-08-24 02:56:09 +00:00
|
|
|
) for attr in cls.attributes
|
2023-08-22 04:43:02 +00:00
|
|
|
]
|
|
|
|
|
2023-08-24 02:56:09 +00:00
|
|
|
return attrs
|
|
|
|
|
|
|
|
def build_subclasses(self, cls: Dataset | Group) -> BuildResult:
|
|
|
|
"""
|
|
|
|
Build nested groups and datasets
|
|
|
|
|
|
|
|
Create ClassDefinitions for each, but then also create SlotDefinitions that
|
|
|
|
will be used as attributes linking the main class to the subclasses
|
|
|
|
"""
|
|
|
|
# build and flatten nested classes
|
|
|
|
nested_classes = [ClassAdapter(cls=dset, parent=self) for dset in cls.datasets]
|
|
|
|
nested_classes.extend([ClassAdapter(cls=grp, parent=self) for grp in cls.groups])
|
|
|
|
nested_res = BuildResult()
|
|
|
|
for subclass in nested_classes:
|
2023-08-29 05:16:58 +00:00
|
|
|
# handle the special case where `VectorData` is subclasssed without any dims or attributes
|
|
|
|
# which just gets instantiated as a 1-d array in HDF5
|
|
|
|
if subclass.cls.neurodata_type_inc == 'VectorData' and \
|
|
|
|
not subclass.cls.dims and \
|
|
|
|
not subclass.cls.shape and \
|
|
|
|
not subclass.cls.attributes \
|
|
|
|
and subclass.cls.name:
|
|
|
|
this_slot = SlotDefinition(
|
|
|
|
name=subclass.cls.name,
|
|
|
|
description=subclass.cls.doc,
|
|
|
|
range=self.handle_dtype(subclass.cls.dtype),
|
|
|
|
multivalued=True
|
|
|
|
)
|
|
|
|
nested_res.slots.append(this_slot)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Simplify datasets that are just a single value
|
|
|
|
elif isinstance(subclass.cls, Dataset) and \
|
|
|
|
not subclass.cls.neurodata_type_inc and \
|
|
|
|
not subclass.cls.attributes and \
|
|
|
|
not subclass.cls.dims and \
|
|
|
|
not subclass.cls.shape and \
|
|
|
|
subclass.cls.name:
|
|
|
|
this_slot = SlotDefinition(
|
|
|
|
name=subclass.cls.name,
|
|
|
|
description=subclass.cls.doc,
|
|
|
|
range=self.handle_dtype(subclass.cls.dtype),
|
|
|
|
**QUANTITY_MAP[subclass.cls.quantity]
|
|
|
|
)
|
|
|
|
nested_res.slots.append(this_slot)
|
|
|
|
continue
|
|
|
|
|
|
|
|
else:
|
|
|
|
this_slot = SlotDefinition(
|
|
|
|
name=subclass._get_name(),
|
|
|
|
description=subclass.cls.doc,
|
|
|
|
range=subclass._get_full_name(),
|
|
|
|
**QUANTITY_MAP[subclass.cls.quantity]
|
|
|
|
)
|
|
|
|
nested_res.slots.append(this_slot)
|
2023-08-24 02:56:09 +00:00
|
|
|
|
|
|
|
if subclass.cls.name is None and subclass.cls.neurodata_type_def is None:
|
|
|
|
# anonymous group that's just an inc, we only need the slot since the class is defined elsewhere
|
|
|
|
continue
|
|
|
|
|
|
|
|
this_build = subclass.build()
|
|
|
|
nested_res += this_build
|
|
|
|
return nested_res
|
|
|
|
|
|
|
|
|
|
|
|
def build(self) -> BuildResult:
|
|
|
|
|
|
|
|
# Build this class
|
|
|
|
if self.parent is not None:
|
|
|
|
name = self._get_full_name()
|
|
|
|
else:
|
|
|
|
name = self._get_name()
|
|
|
|
|
|
|
|
# Get vanilla top-level attributes
|
|
|
|
attrs = self.build_attrs(self.cls)
|
|
|
|
|
|
|
|
# unnest and build subclasses in datasets and groups
|
|
|
|
if isinstance(self.cls, Group):
|
|
|
|
# only groups have sub-datasets and sub-groups
|
2023-08-25 07:22:47 +00:00
|
|
|
# split out the recursion step rather than making purely recursive because
|
|
|
|
# top-level datasets and groups are handled differently - they have names,
|
|
|
|
# and so we need to split out which things we unnest and which things
|
|
|
|
# can just be slots because they are already defined without knowing about
|
|
|
|
# the global state of the schema build.
|
2023-08-24 02:56:09 +00:00
|
|
|
nested_res = self.build_subclasses(self.cls)
|
|
|
|
attrs.extend(nested_res.slots)
|
|
|
|
else:
|
2023-08-25 07:22:47 +00:00
|
|
|
# must be a dataset
|
2023-08-24 02:56:09 +00:00
|
|
|
nested_res = BuildResult()
|
2023-08-25 07:22:47 +00:00
|
|
|
arraylike = self.handle_arraylike(self.cls, self._get_full_name())
|
|
|
|
if arraylike:
|
2023-08-29 05:16:58 +00:00
|
|
|
# if the arraylike thing can only have one dimension, it's equivalent to a list, so
|
|
|
|
# we just add a multivalued slot
|
|
|
|
if isinstance(arraylike, SlotDefinition):
|
|
|
|
attrs.append(arraylike)
|
|
|
|
else:
|
|
|
|
# make a slot for the arraylike class
|
|
|
|
attrs.append(
|
|
|
|
SlotDefinition(
|
|
|
|
name='array',
|
|
|
|
range=arraylike.name
|
|
|
|
)
|
2023-08-25 07:22:47 +00:00
|
|
|
)
|
2023-08-29 05:16:58 +00:00
|
|
|
nested_res.classes.append(arraylike)
|
2023-08-25 07:22:47 +00:00
|
|
|
|
2023-08-24 02:56:09 +00:00
|
|
|
|
2023-08-22 04:43:02 +00:00
|
|
|
cls = ClassDefinition(
|
|
|
|
name = name,
|
|
|
|
is_a = self.cls.neurodata_type_inc,
|
|
|
|
description=self.cls.doc,
|
2023-08-25 07:22:47 +00:00
|
|
|
attributes=attrs,
|
2023-08-22 04:43:02 +00:00
|
|
|
)
|
2023-08-24 02:56:09 +00:00
|
|
|
res = BuildResult(
|
|
|
|
classes = [cls, *nested_res.classes]
|
|
|
|
)
|
|
|
|
|
|
|
|
return res
|