nwb-linkml/nwb_linkml/adapters/classes.py

"""
Adapters to linkML classes
"""
import pdb
import re
from abc import abstractmethod
from typing import List, Optional
from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
from nwb_linkml.adapters.adapter import Adapter, BuildResult
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
from nwb_linkml.maps import QUANTITY_MAP
from nwb_linkml.lang_elements import Arraylike

CAMEL_TO_SNAKE = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
"""
Convert camel case to snake case

courtesy of: https://stackoverflow.com/a/12867228
"""

def camel_to_snake(name:str) -> str:
    """
    Convert camel case to snake case

    courtesy of: https://stackoverflow.com/a/12867228
    """
    return CAMEL_TO_SNAKE.sub(r'_\1', name).lower()

class ClassAdapter(Adapter):
    """
    Abstract adapter to class-like things in linkml, holds methods common to
    both DatasetAdapter and GroupAdapter
    """
    cls: Dataset | Group
    parent: Optional['ClassAdapter'] = None

    @abstractmethod
    def build(self) -> BuildResult:
        """
        Make this abstract so it can't be instantiated directly.

        Subclasses call :meth:`.build_base` to get the basics true of both groups and datasets
        """


    def build_base(self, extra_attrs: Optional[List[SlotDefinition]]=None) -> BuildResult:
        """
        Build the basic class and attributes before adding any specific
        modifications for groups or datasets.
        """

        # Build this class
        #name = self._get_full_name()
        if self.parent is not None:
            name = self._get_full_name()
        else:
            name = self._get_attr_name()

        # Get vanilla top-level attributes
        attrs = self.build_attrs(self.cls)
        name_slot = self.build_name_slot()
        attrs.append(name_slot)
        if extra_attrs is not None:
            if isinstance(extra_attrs, SlotDefinition):
                extra_attrs = [extra_attrs]
            attrs.extend(extra_attrs)

        cls = ClassDefinition(
            name = name,
            is_a = self.cls.neurodata_type_inc,
            description=self.cls.doc,
            attributes=attrs,
        )

        slots = []
        if self.parent is not None:
            slots.append(self.build_self_slot())

        res = BuildResult(
            classes = [cls],
            slots = slots
        )

        return res

    def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
        attrs = [
            SlotDefinition(
                name=attr.name,
                description=attr.doc,
                range=self.handle_dtype(attr.dtype),
            ) for attr in cls.attributes
        ]

        return attrs

    def _get_full_name(self) -> str:
        """The full name of the object in the generated linkml

        Distinct from 'name' which is the thing that's used to define position in
        a hierarchical data setting
        """
        if self.cls.neurodata_type_def:
            name = self.cls.neurodata_type_def
        elif self.cls.name is not None:
            # not necessarily a unique name, so we combine parent names
            name_parts = []
            if self.parent is not None:
                name_parts.append(self.parent._get_full_name())

            name_parts.append(self.cls.name)
            name = '__'.join(name_parts)
        elif self.cls.neurodata_type_inc is not None:
            # again, this is against the schema, but is common
            name = self.cls.neurodata_type_inc
        else:
            raise ValueError('Not sure what our name is!')


        return name

    def _get_attr_name(self) -> str:
        """
        Get the name to use as the attribute name,
        again distinct from the actual name of the instantiated object
        """
        # return self._get_full_name()
        name = None
        if self.cls.neurodata_type_def:
            #name = camel_to_snake(self.cls.neurodata_type_def)
            name = self.cls.neurodata_type_def
        elif self.cls.name is not None:
            # we do have a unique name
            name = self.cls.name
        elif self.cls.neurodata_type_inc:
            #name = camel_to_snake(self.cls.neurodata_type_inc)
            name = self.cls.neurodata_type_inc

        if name is None:
            raise ValueError(f'Class has no name!: {self.cls}')

        return name

    def handle_dtype(self, dtype: DTypeType | None) -> str:
        if isinstance(dtype, ReferenceDtype):
            return dtype.target_type
        elif dtype is None or dtype == []:
            # Some ill-defined datasets are "abstract" despite that not being in the schema language
            return 'AnyType'
        elif isinstance(dtype, list) and isinstance(dtype[0], CompoundDtype):
            # there is precisely one class that uses compound dtypes:
            # TimeSeriesReferenceVectorData
            # compoundDtypes are able to define a ragged table according to the schema
            # but are used in this single case equivalently to attributes.
            # so we'll... uh... treat them as slots.
             # TODO
            return 'AnyType'
            #raise NotImplementedError('got distracted, need to implement')

        else:
            # flat dtype
            return dtype

    def build_name_slot(self) -> SlotDefinition:
        """
        If a class has a name, then that name should be a slot with a
        fixed value.

        If a class does not have a name, then name should be a required attribute

        References:
            https://github.com/NeurodataWithoutBorders/nwb-schema/issues/552#issuecomment-1700319001

        Returns:

        """
        if self.cls.name:
            name_slot = SlotDefinition(
                name='name',
                required=True,
                ifabsent=self.cls.name,
                equals_string=self.cls.name,
                range='string'
            )
        else:
            name_slot = SlotDefinition(
                name='name',
                required=True,
                range='string'
            )
        return name_slot

    def build_self_slot(self) -> SlotDefinition:
        """
        If we are a child class, we make a slot so our parent can refer to us
        """
        return SlotDefinition(
            name=self._get_attr_name(),
            description=self.cls.doc,
            range=self._get_full_name(),
            **QUANTITY_MAP[self.cls.quantity]
        )
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`"""`
			`Adapters to linkML classes`
			`"""`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`import pdb`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`import re`
			`from abc import abstractmethod`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`from typing import List, Optional`
I believe that's a full translation or at least all the semantics are present. it's not pretty by any stretch of the imagination 2023-08-25 07:22:47 +00:00			`from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`from nwb_linkml.adapters.adapter import Adapter, BuildResult`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition`
I believe that's a full translation or at least all the semantics are present. it's not pretty by any stretch of the imagination 2023-08-25 07:22:47 +00:00			`from nwb_linkml.maps import QUANTITY_MAP`
			`from nwb_linkml.lang_elements import Arraylike`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`CAMEL_TO_SNAKE = re.compile('((?<=[a-z0-9])[A-Z]\|(?!^)[A-Z](?=[a-z]))')`
			`"""`
			`Convert camel case to snake case`

			`courtesy of: https://stackoverflow.com/a/12867228`
			`"""`

			`def camel_to_snake(name:str) -> str:`
			`"""`
			`Convert camel case to snake case`

			`courtesy of: https://stackoverflow.com/a/12867228`
			`"""`
			`return CAMEL_TO_SNAKE.sub(r'_\1', name).lower()`

Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`class ClassAdapter(Adapter):`
			`"""`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`Abstract adapter to class-like things in linkml, holds methods common to`
			`both DatasetAdapter and GroupAdapter`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`"""`
			`cls: Dataset \| Group`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`parent: Optional['ClassAdapter'] = None`

Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`@abstractmethod`
			`def build(self) -> BuildResult:`
			`"""`
			`Make this abstract so it can't be instantiated directly.`

			Subclasses call :meth:`.build_base` to get the basics true of both groups and datasets
			`"""`


			`def build_base(self, extra_attrs: Optional[List[SlotDefinition]]=None) -> BuildResult:`
			`"""`
			`Build the basic class and attributes before adding any specific`
			`modifications for groups or datasets.`
			`"""`

			`# Build this class`
			`#name = self._get_full_name()`
			`if self.parent is not None:`
			`name = self._get_full_name()`
			`else:`
			`name = self._get_attr_name()`

			`# Get vanilla top-level attributes`
			`attrs = self.build_attrs(self.cls)`
			`name_slot = self.build_name_slot()`
			`attrs.append(name_slot)`
			`if extra_attrs is not None:`
			`if isinstance(extra_attrs, SlotDefinition):`
			`extra_attrs = [extra_attrs]`
			`attrs.extend(extra_attrs)`

			`cls = ClassDefinition(`
			`name = name,`
			`is_a = self.cls.neurodata_type_inc,`
			`description=self.cls.doc,`
			`attributes=attrs,`
			`)`

			`slots = []`
			`if self.parent is not None:`
			`slots.append(self.build_self_slot())`

			`res = BuildResult(`
			`classes = [cls],`
			`slots = slots`
			`)`

			`return res`

			`def build_attrs(self, cls: Dataset \| Group) -> List[SlotDefinition]:`
			`attrs = [`
			`SlotDefinition(`
			`name=attr.name,`
			`description=attr.doc,`
			`range=self.handle_dtype(attr.dtype),`
			`) for attr in cls.attributes`
			`]`

			`return attrs`

recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`def _get_full_name(self) -> str:`
			`"""The full name of the object in the generated linkml`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`Distinct from 'name' which is the thing that's used to define position in`
			`a hierarchical data setting`
			`"""`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`if self.cls.neurodata_type_def:`
			`name = self.cls.neurodata_type_def`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`elif self.cls.name is not None:`
			`# not necessarily a unique name, so we combine parent names`
			`name_parts = []`
			`if self.parent is not None:`
			`name_parts.append(self.parent._get_full_name())`

			`name_parts.append(self.cls.name)`
Cleaner code generation, npytyping type hints for arrays - split off generated subclasses into "include" files, not sure if that's good, but in any case we have them separable now. - more work on cleanly un-nesting scalar and 1D-vector data into attributes and lists, respectively - brought the pydantic generator in-repo to do a bunch of overrides 2023-08-29 05:16:58 +00:00			`name = '__'.join(name_parts)`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`elif self.cls.neurodata_type_inc is not None:`
			`# again, this is against the schema, but is common`
			`name = self.cls.neurodata_type_inc`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`else:`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`raise ValueError('Not sure what our name is!')`


			`return name`

Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`def _get_attr_name(self) -> str:`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`"""`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`Get the name to use as the attribute name,`
			`again distinct from the actual name of the instantiated object`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`"""`
			`# return self._get_full_name()`
			`name = None`
			`if self.cls.neurodata_type_def:`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`#name = camel_to_snake(self.cls.neurodata_type_def)`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`name = self.cls.neurodata_type_def`
			`elif self.cls.name is not None:`
			`# we do have a unique name`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00			`name = self.cls.name`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`elif self.cls.neurodata_type_inc:`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`#name = camel_to_snake(self.cls.neurodata_type_inc)`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`name = self.cls.neurodata_type_inc`

			`if name is None:`
			`raise ValueError(f'Class has no name!: {self.cls}')`

			`return name`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00
I believe that's a full translation or at least all the semantics are present. it's not pretty by any stretch of the imagination 2023-08-25 07:22:47 +00:00			`def handle_dtype(self, dtype: DTypeType \| None) -> str:`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`if isinstance(dtype, ReferenceDtype):`
			`return dtype.target_type`
I believe that's a full translation or at least all the semantics are present. it's not pretty by any stretch of the imagination 2023-08-25 07:22:47 +00:00			`elif dtype is None or dtype == []:`
			`# Some ill-defined datasets are "abstract" despite that not being in the schema language`
			`return 'AnyType'`
			`elif isinstance(dtype, list) and isinstance(dtype[0], CompoundDtype):`
			`# there is precisely one class that uses compound dtypes:`
			`# TimeSeriesReferenceVectorData`
			`# compoundDtypes are able to define a ragged table according to the schema`
			`# but are used in this single case equivalently to attributes.`
			`# so we'll... uh... treat them as slots.`
			`# TODO`
			`return 'AnyType'`
			`#raise NotImplementedError('got distracted, need to implement')`

recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`else:`
I believe that's a full translation or at least all the semantics are present. it's not pretty by any stretch of the imagination 2023-08-25 07:22:47 +00:00			`# flat dtype`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`return dtype`

Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`def build_name_slot(self) -> SlotDefinition:`
			`"""`
			`If a class has a name, then that name should be a slot with a`
			`fixed value.`
Actually generating some translations at this point 2023-08-22 04:43:02 +00:00
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`If a class does not have a name, then name should be a required attribute`

			`References:`
			`https://github.com/NeurodataWithoutBorders/nwb-schema/issues/552#issuecomment-1700319001`

			`Returns:`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00
			`"""`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`if self.cls.name:`
			`name_slot = SlotDefinition(`
			`name='name',`
			`required=True,`
			`ifabsent=self.cls.name,`
			`equals_string=self.cls.name,`
			`range='string'`
			`)`
			`else:`
			`name_slot = SlotDefinition(`
			`name='name',`
			`required=True,`
			`range='string'`
			`)`
			`return name_slot`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`def build_self_slot(self) -> SlotDefinition:`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00			`"""`
Need to go home, in the middle of refactoring group and dataset as well as implementing the name property correctly 2023-08-31 07:01:43 +00:00			`If we are a child class, we make a slot so our parent can refer to us`
			`"""`
			`return SlotDefinition(`
			`name=self._get_attr_name(),`
			`description=self.cls.doc,`
			`range=self._get_full_name(),`
			`**QUANTITY_MAP[self.cls.quantity]`
			`)`
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00




I believe that's a full translation or at least all the semantics are present. it's not pretty by any stretch of the imagination 2023-08-25 07:22:47 +00:00
recursion works, more formal build method in adapters 2023-08-24 02:56:09 +00:00