mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 14:14:27 +00:00
235 lines
7.4 KiB
Python
235 lines
7.4 KiB
Python
"""
|
|
Adapters to linkML classes
|
|
"""
|
|
import pdb
|
|
import re
|
|
from abc import abstractmethod
|
|
from typing import List, Optional
|
|
from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
|
|
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
from nwb_linkml.maps import QUANTITY_MAP
|
|
from nwb_linkml.lang_elements import Arraylike
|
|
|
|
CAMEL_TO_SNAKE = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
|
|
"""
|
|
Convert camel case to snake case
|
|
|
|
courtesy of: https://stackoverflow.com/a/12867228
|
|
"""
|
|
|
|
def camel_to_snake(name:str) -> str:
|
|
"""
|
|
Convert camel case to snake case
|
|
|
|
courtesy of: https://stackoverflow.com/a/12867228
|
|
"""
|
|
return CAMEL_TO_SNAKE.sub(r'_\1', name).lower()
|
|
|
|
class ClassAdapter(Adapter):
|
|
"""
|
|
Abstract adapter to class-like things in linkml, holds methods common to
|
|
both DatasetAdapter and GroupAdapter
|
|
"""
|
|
cls: Dataset | Group
|
|
parent: Optional['ClassAdapter'] = None
|
|
|
|
@abstractmethod
|
|
def build(self) -> BuildResult:
|
|
"""
|
|
Make this abstract so it can't be instantiated directly.
|
|
|
|
Subclasses call :meth:`.build_base` to get the basics true of both groups and datasets
|
|
"""
|
|
|
|
|
|
def build_base(self, extra_attrs: Optional[List[SlotDefinition]]=None) -> BuildResult:
|
|
"""
|
|
Build the basic class and attributes before adding any specific
|
|
modifications for groups or datasets.
|
|
"""
|
|
|
|
# Build this class
|
|
#name = self._get_full_name()
|
|
kwargs = {}
|
|
if self.parent is not None:
|
|
kwargs['name'] = self._get_full_name()
|
|
else:
|
|
kwargs['name'] = self._get_attr_name()
|
|
kwargs['tree_root'] = True
|
|
|
|
# Attributes
|
|
name_slot = self.build_name_slot()
|
|
kwargs['attributes'] = [name_slot]
|
|
# Get vanilla top-level attributes
|
|
kwargs['attributes'].extend(self.build_attrs(self.cls))
|
|
|
|
if extra_attrs is not None:
|
|
if isinstance(extra_attrs, SlotDefinition):
|
|
extra_attrs = [extra_attrs]
|
|
kwargs['attributes'].extend(extra_attrs)
|
|
kwargs['description'] = self.cls.doc
|
|
kwargs['is_a'] = self.cls.neurodata_type_inc
|
|
|
|
cls = ClassDefinition(
|
|
**kwargs
|
|
)
|
|
|
|
slots = []
|
|
if self.parent is not None:
|
|
slots.append(self.build_self_slot())
|
|
|
|
res = BuildResult(
|
|
classes = [cls],
|
|
slots = slots
|
|
)
|
|
|
|
return res
|
|
|
|
def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
|
|
attrs = [
|
|
SlotDefinition(
|
|
name=attr.name,
|
|
description=attr.doc,
|
|
range=self.handle_dtype(attr.dtype),
|
|
) for attr in cls.attributes
|
|
]
|
|
|
|
return attrs
|
|
|
|
def _get_full_name(self) -> str:
|
|
"""The full name of the object in the generated linkml
|
|
|
|
Distinct from 'name' which is the thing that's used to define position in
|
|
a hierarchical data setting
|
|
"""
|
|
if self.cls.neurodata_type_def:
|
|
name = self.cls.neurodata_type_def
|
|
elif self.cls.name is not None:
|
|
# not necessarily a unique name, so we combine parent names
|
|
name_parts = []
|
|
if self.parent is not None:
|
|
name_parts.append(self.parent._get_full_name())
|
|
|
|
name_parts.append(self.cls.name)
|
|
name = '__'.join(name_parts)
|
|
elif self.cls.neurodata_type_inc is not None:
|
|
# again, this is against the schema, but is common
|
|
name = self.cls.neurodata_type_inc
|
|
else:
|
|
raise ValueError('Not sure what our name is!')
|
|
|
|
|
|
return name
|
|
|
|
def _get_attr_name(self) -> str:
|
|
"""
|
|
Get the name to use as the attribute name,
|
|
again distinct from the actual name of the instantiated object
|
|
"""
|
|
# return self._get_full_name()
|
|
name = None
|
|
if self.cls.neurodata_type_def:
|
|
# name = camel_to_snake(self.cls.neurodata_type_def)
|
|
name = self.cls.neurodata_type_def
|
|
elif self.cls.name is not None:
|
|
# we do have a unique name
|
|
name = self.cls.name
|
|
elif self.cls.neurodata_type_inc:
|
|
# name = camel_to_snake(self.cls.neurodata_type_inc)
|
|
name = self.cls.neurodata_type_inc
|
|
|
|
if name is None:
|
|
raise ValueError(f'Class has no name!: {self.cls}')
|
|
|
|
return name
|
|
|
|
def _get_slot_name(self) -> str:
|
|
"""
|
|
Get the name to use as the name when this is a subclass used as a slot,
|
|
used to dodge name overlaps by snake-casing!
|
|
again distinct from the actual name of the instantiated object
|
|
"""
|
|
# return self._get_full_name()
|
|
name = None
|
|
if self.cls.neurodata_type_def:
|
|
name = camel_to_snake(self.cls.neurodata_type_def)
|
|
# name = self.cls.neurodata_type_def
|
|
elif self.cls.name is not None:
|
|
# we do have a unique name
|
|
name = self.cls.name
|
|
elif self.cls.neurodata_type_inc:
|
|
name = camel_to_snake(self.cls.neurodata_type_inc)
|
|
# name = self.cls.neurodata_type_inc
|
|
|
|
if name is None:
|
|
raise ValueError(f'Class has no name!: {self.cls}')
|
|
|
|
return name
|
|
|
|
def handle_dtype(self, dtype: DTypeType | None) -> str:
|
|
if isinstance(dtype, ReferenceDtype):
|
|
return dtype.target_type
|
|
elif dtype is None or dtype == []:
|
|
# Some ill-defined datasets are "abstract" despite that not being in the schema language
|
|
return 'AnyType'
|
|
elif isinstance(dtype, list) and isinstance(dtype[0], CompoundDtype):
|
|
# there is precisely one class that uses compound dtypes:
|
|
# TimeSeriesReferenceVectorData
|
|
# compoundDtypes are able to define a ragged table according to the schema
|
|
# but are used in this single case equivalently to attributes.
|
|
# so we'll... uh... treat them as slots.
|
|
# TODO
|
|
return 'AnyType'
|
|
#raise NotImplementedError('got distracted, need to implement')
|
|
|
|
else:
|
|
# flat dtype
|
|
return dtype
|
|
|
|
def build_name_slot(self) -> SlotDefinition:
|
|
"""
|
|
If a class has a name, then that name should be a slot with a
|
|
fixed value.
|
|
|
|
If a class does not have a name, then name should be a required attribute
|
|
|
|
References:
|
|
https://github.com/NeurodataWithoutBorders/nwb-schema/issues/552#issuecomment-1700319001
|
|
|
|
Returns:
|
|
|
|
"""
|
|
if self.cls.name:
|
|
name_slot = SlotDefinition(
|
|
name='name',
|
|
required=True,
|
|
ifabsent=f'string({self.cls.name})',
|
|
equals_string=self.cls.name,
|
|
range='string'
|
|
)
|
|
else:
|
|
name_slot = SlotDefinition(
|
|
name='name',
|
|
required=True,
|
|
range='string'
|
|
)
|
|
return name_slot
|
|
|
|
def build_self_slot(self) -> SlotDefinition:
|
|
"""
|
|
If we are a child class, we make a slot so our parent can refer to us
|
|
"""
|
|
return SlotDefinition(
|
|
name=self._get_slot_name(),
|
|
description=self.cls.doc,
|
|
range=self._get_full_name(),
|
|
**QUANTITY_MAP[self.cls.quantity]
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|