mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 14:14:27 +00:00
321 lines
12 KiB
Python
321 lines
12 KiB
Python
"""
|
|
Adapter for NWB datasets to linkml Classes
|
|
"""
|
|
import pdb
|
|
from typing import Optional, List
|
|
import warnings
|
|
|
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
from pydantic import PrivateAttr
|
|
|
|
from nwb_schema_language import Dataset, ReferenceDtype, CompoundDtype, DTypeType
|
|
from nwb_linkml.adapters.classes import ClassAdapter, camel_to_snake
|
|
from nwb_linkml.adapters.adapter import BuildResult
|
|
from nwb_linkml.maps import QUANTITY_MAP
|
|
|
|
class DatasetAdapter(ClassAdapter):
|
|
cls: Dataset
|
|
|
|
_handlers: List[str] = PrivateAttr(default_factory=list)
|
|
"""Keep track of which handlers have been called"""
|
|
|
|
|
|
def build(self) -> BuildResult:
|
|
res = self.build_base()
|
|
|
|
res = self.drop_dynamic_table(res)
|
|
res = self.handle_arraylike(res, self.cls, self._get_full_name())
|
|
res = self.handle_1d_vector(res)
|
|
res = self.handle_listlike(res)
|
|
res = self.handle_scalar(res)
|
|
|
|
|
|
if len(self._handlers) > 1:
|
|
raise RuntimeError(f"Only one handler should have been triggered, instead triggered {self._handlers}")
|
|
|
|
return res
|
|
|
|
def handle_scalar(self, res:BuildResult) -> BuildResult:
|
|
|
|
# Simplify datasets that are just a single value
|
|
if self.cls.neurodata_type_inc != 'VectorData' and \
|
|
not self.cls.neurodata_type_inc and \
|
|
not self.cls.attributes and \
|
|
not self.cls.dims and \
|
|
not self.cls.shape and \
|
|
self.cls.name:
|
|
self._handlers.append('scalar')
|
|
# throw out the class that would have been made for us
|
|
# we just need a slot
|
|
this_slot = SlotDefinition(
|
|
name=self.cls.name,
|
|
description=self.cls.doc,
|
|
range=self.handle_dtype(self.cls.dtype),
|
|
**QUANTITY_MAP[self.cls.quantity]
|
|
)
|
|
res = BuildResult(slots = [this_slot])
|
|
|
|
# if the scalar-valued class has attributes, append a
|
|
# 'value' slot that holds the (scalar) value of the dataset
|
|
elif self.cls.neurodata_type_inc != 'VectorData' and \
|
|
not self.cls.neurodata_type_inc and \
|
|
self.cls.attributes and \
|
|
not self.cls.dims and \
|
|
not self.cls.shape and \
|
|
self.cls.name:
|
|
self._handlers.append('scalar_class')
|
|
|
|
# quantity (including requirement) is handled by the
|
|
# parent slot - the value is required if the value class is
|
|
# supplied.
|
|
# ie.
|
|
# Optional[ScalarClass] = None
|
|
# class ScalarClass:
|
|
# value: dtype
|
|
value_slot = SlotDefinition(
|
|
name='value',
|
|
range=self.handle_dtype(self.cls.dtype),
|
|
required=True
|
|
)
|
|
res.classes[0].attributes['value'] = value_slot
|
|
|
|
return res
|
|
|
|
|
|
def handle_1d_vector(self, res: BuildResult) -> BuildResult:
|
|
# handle the special case where `VectorData` is subclasssed without any dims or attributes
|
|
# which just gets instantiated as a 1-d array in HDF5
|
|
if self.cls.neurodata_type_inc == 'VectorData' and \
|
|
not self.cls.dims and \
|
|
not self.cls.shape and \
|
|
not self.cls.attributes \
|
|
and self.cls.name:
|
|
self._handlers.append('1d_vector')
|
|
this_slot = SlotDefinition(
|
|
name=self.cls.name,
|
|
description=self.cls.doc,
|
|
range=self.handle_dtype(self.cls.dtype),
|
|
multivalued=True
|
|
)
|
|
# No need to make a class for us, so we replace the existing build results
|
|
res = BuildResult(slots=[this_slot])
|
|
|
|
return res
|
|
|
|
def handle_listlike(self, res:BuildResult) -> BuildResult:
|
|
"""
|
|
Handle cases where the dataset is just a list of a specific type.
|
|
|
|
Examples:
|
|
|
|
datasets:
|
|
- name: file_create_date
|
|
dtype: isodatetime
|
|
dims:
|
|
- num_modifications
|
|
shape:
|
|
- null
|
|
|
|
"""
|
|
if self.cls.name and ((
|
|
# single-layer list
|
|
not any([isinstance(dim, list) for dim in self.cls.dims]) and
|
|
len(self.cls.dims) == 1
|
|
) or (
|
|
# nested list
|
|
all([isinstance(dim, list) for dim in self.cls.dims]) and
|
|
len(self.cls.dims) == 1 and
|
|
len(self.cls.dims[0]) == 1
|
|
)):
|
|
res = BuildResult(
|
|
slots = [
|
|
SlotDefinition(
|
|
name = self.cls.name,
|
|
multivalued=True,
|
|
range=self.handle_dtype(self.cls.dtype),
|
|
description=self.cls.doc,
|
|
required=False if self.cls.quantity in ('*', '?') else True
|
|
)
|
|
]
|
|
)
|
|
return res
|
|
else:
|
|
return res
|
|
|
|
|
|
def handle_arraylike(self, res: BuildResult, dataset: Dataset, name: Optional[str] = None) -> BuildResult:
|
|
"""
|
|
Handling the
|
|
|
|
- dims
|
|
- shape
|
|
- dtype
|
|
|
|
fields as they are used in datasets. We'll use the :class:`.Arraylike` class to imitate them.
|
|
|
|
Specifically:
|
|
|
|
- Each slot within a subclass indicates a possible dimension.
|
|
- Only dimensions that are present in all the dimension specifiers in the
|
|
original schema are required.
|
|
- Shape requirements are indicated using max/min cardinalities on the slot.
|
|
- The arraylike object should be stored in the `array` slot on the containing class
|
|
(since there are already properties named `data`)
|
|
|
|
If any of `dims`, `shape`, or `dtype` are undefined, return `None`
|
|
|
|
Args:
|
|
dataset (:class:`nwb_schema_language.Dataset`): The dataset defining the arraylike
|
|
name (str): If present, override the name of the class before appending _Array
|
|
(we don't use _get_full_name here because we want to eventually decouple these functions from this adapter
|
|
class, which is sort of a development crutch. Ideally all these methods would just work on base nwb schema language types)
|
|
"""
|
|
if not any((dataset.dims, dataset.shape)):
|
|
# none of the required properties are defined, that's fine.
|
|
return res
|
|
elif not all((dataset.dims, dataset.shape)):
|
|
# need to have both if one is present!
|
|
warnings.warn(f"A dataset needs both dims and shape to define an arraylike object. This is allowed for compatibility with some badly formatted NWB files, but should in general be avoided. Treating like we dont have an array")
|
|
return res
|
|
|
|
# Special cases
|
|
if dataset.neurodata_type_inc == 'VectorData':
|
|
# Handle this in `handle_vectorlike` instead
|
|
return res
|
|
|
|
# The schema language doesn't have a way of specifying a dataset/group is "abstract"
|
|
# and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
|
|
# so....
|
|
dtype = self.handle_dtype(dataset.dtype)
|
|
|
|
# dims and shape are lists of lists. First we couple them
|
|
# (so each dim has its corresponding shape)..
|
|
# and then we take unique
|
|
# (dicts are ordered by default in recent pythons,
|
|
# while set() doesn't preserve order)
|
|
dims_shape = []
|
|
for inner_dim, inner_shape in zip(dataset.dims, dataset.shape):
|
|
if isinstance(inner_dim, list):
|
|
# list of lists
|
|
dims_shape.extend([(dim, shape) for dim, shape in zip(inner_dim, inner_shape)])
|
|
elif isinstance(inner_shape, list):
|
|
# Some badly formatted schema will have the shape be a LoL but the dims won't be...
|
|
dims_shape.extend([(inner_dim, shape) for shape in inner_shape])
|
|
else:
|
|
# single-layer list
|
|
dims_shape.append((inner_dim, inner_shape))
|
|
|
|
dims_shape = tuple(dict.fromkeys(dims_shape).keys())
|
|
|
|
# if we only have one possible dimension, it's equivalent to a list, so we just return the slot
|
|
if len(dims_shape) == 1 and self.parent:
|
|
quantity = QUANTITY_MAP[dataset.quantity]
|
|
slot = SlotDefinition(
|
|
name=dataset.name,
|
|
range=dtype,
|
|
description=dataset.doc,
|
|
required=quantity['required'],
|
|
multivalued=True
|
|
)
|
|
res.classes[0].attributes.update({dataset.name: slot})
|
|
self._handlers.append('arraylike-1d')
|
|
return res
|
|
|
|
# now make slots for each of them
|
|
slots = []
|
|
for dims, shape in dims_shape:
|
|
# if a dim is present in all possible combinations of dims, make it required
|
|
if all([dims in inner_dim for inner_dim in dataset.dims]):
|
|
required = True
|
|
# or if there is just a single list of possible dimensions
|
|
elif not any([isinstance(inner_dim, list) for inner_dim in dataset.dims]):
|
|
required = True
|
|
else:
|
|
required = False
|
|
|
|
# use cardinality to do shape
|
|
if shape == 'null':
|
|
cardinality = None
|
|
else:
|
|
cardinality = shape
|
|
|
|
slots.append(SlotDefinition(
|
|
name=dims,
|
|
required=required,
|
|
maximum_cardinality=cardinality,
|
|
minimum_cardinality=cardinality,
|
|
range=dtype
|
|
))
|
|
|
|
# and then the class is just a subclass of `Arraylist` (which is imported by default from `nwb.language.yaml`)
|
|
if name:
|
|
pass
|
|
elif dataset.neurodata_type_def:
|
|
name = dataset.neurodata_type_def
|
|
elif dataset.name:
|
|
name = dataset.name
|
|
else:
|
|
raise ValueError(f"Dataset has no name or type definition, what do call it?")
|
|
|
|
name = '__'.join([name, 'Array'])
|
|
|
|
array_class = ClassDefinition(
|
|
name=name,
|
|
is_a="Arraylike",
|
|
attributes=slots
|
|
)
|
|
# make a slot for the arraylike class
|
|
array_slot = SlotDefinition(
|
|
name='array',
|
|
range=array_class.name
|
|
)
|
|
|
|
res.classes.append(array_class)
|
|
res.classes[0].attributes.update({'array': array_slot})
|
|
#res.slots.append(array_slot)
|
|
self._handlers.append('arraylike')
|
|
|
|
return res
|
|
|
|
def drop_dynamic_table(self, res:BuildResult) -> BuildResult:
|
|
"""
|
|
DynamicTables in hdmf are so special-cased that we have to just special-case them ourselves.
|
|
|
|
Typically they include a '*' quantitied, unnamed VectorData object to contain arbitrary columns,
|
|
this would normally get converted to its own container class, but since they're unnamed they conflict with
|
|
names in the containing scope.
|
|
|
|
We just convert them into multivalued slots and don't use them
|
|
"""
|
|
if self.cls.name is None and \
|
|
self.cls.neurodata_type_def is None and \
|
|
self.cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \
|
|
self.cls.quantity == '*':
|
|
self._handlers.append('dynamic_table')
|
|
this_slot = SlotDefinition(
|
|
name=camel_to_snake(self.cls.neurodata_type_inc),
|
|
description=self.cls.doc,
|
|
range=self.cls.neurodata_type_inc,
|
|
required=False,
|
|
multivalued=True
|
|
)
|
|
# No need to make a class for us, so we replace the existing build results
|
|
res = BuildResult(slots=[this_slot])
|
|
return res
|
|
elif self.cls.name is None and \
|
|
self.cls.neurodata_type_def is None and \
|
|
self.cls.neurodata_type_inc and \
|
|
self.cls.quantity in ('*', '+'):
|
|
self._handlers.append('generic_container')
|
|
this_slot = SlotDefinition(
|
|
name=camel_to_snake(self.cls.neurodata_type_inc),
|
|
description=self.cls.doc,
|
|
range=self.cls.neurodata_type_inc,
|
|
**QUANTITY_MAP[self.cls.quantity]
|
|
)
|
|
# No need to make a class for us, so we replace the existing build results
|
|
res = BuildResult(slots=[this_slot])
|
|
return res
|
|
else:
|
|
return res
|
|
|