From da6d0d860897ee17f832311004f8bb59cb001d13 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 5 Aug 2024 18:41:38 -0700 Subject: [PATCH] correctly handle attributes --- nwb_linkml/src/nwb_linkml/adapters/adapter.py | 35 +++- .../src/nwb_linkml/adapters/attribute.py | 194 ++++++++++++++++++ nwb_linkml/src/nwb_linkml/adapters/classes.py | 47 +---- nwb_linkml/src/nwb_linkml/adapters/dataset.py | 67 ++---- nwb_linkml/src/nwb_linkml/maps/dtype.py | 32 +++ scripts/generate_core.py | 11 +- 6 files changed, 290 insertions(+), 96 deletions(-) create mode 100644 nwb_linkml/src/nwb_linkml/adapters/attribute.py diff --git a/nwb_linkml/src/nwb_linkml/adapters/adapter.py b/nwb_linkml/src/nwb_linkml/adapters/adapter.py index 561df65..6823bb0 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py +++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py @@ -26,7 +26,7 @@ from linkml_runtime.linkml_model import ( ) from pydantic import BaseModel -from nwb_schema_language import Attribute, Dataset, Group, Schema +from nwb_schema_language import Attribute, Dataset, Group, Schema, CompoundDtype if sys.version_info.minor >= 11: from typing import TypeVarTuple, Unpack @@ -238,3 +238,36 @@ class Adapter(BaseModel): for item in self.walk(input): if any([type(item) is atype for atype in get_type]): yield item + + +def is_1d(cls: Dataset | Attribute) -> bool: + """ + Check if the values of a dataset are 1-dimensional. + + Specifically: + * a single-layer dim/shape list of length 1, or + * a nested dim/shape list where every nested spec is of length 1 + """ + return ( + not any([isinstance(dim, list) for dim in cls.dims]) and len(cls.dims) == 1 + ) or ( # nested list + all([isinstance(dim, list) for dim in cls.dims]) + and len(cls.dims) == 1 + and len(cls.dims[0]) == 1 + ) + + +def is_compound(cls: Dataset) -> bool: + """Check if dataset has a compound dtype""" + return ( + isinstance(cls.dtype, list) + and len(cls.dtype) > 0 + and isinstance(cls.dtype[0], CompoundDtype) + ) + + +def has_attrs(cls: Dataset) -> bool: + """ + Check if a dataset has any attributes at all without defaults + """ + return len(cls.attributes) > 0 and all([not a.value for a in cls.attributes]) diff --git a/nwb_linkml/src/nwb_linkml/adapters/attribute.py b/nwb_linkml/src/nwb_linkml/adapters/attribute.py new file mode 100644 index 0000000..c7ebd19 --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/adapters/attribute.py @@ -0,0 +1,194 @@ +""" +Adapters for attribute types +""" + +from abc import abstractmethod +from typing import ClassVar, Optional, TypedDict, Type + +from linkml_runtime.linkml_model.meta import SlotDefinition + +from nwb_linkml.adapters.array import ArrayAdapter +from nwb_linkml.adapters.adapter import BuildResult, is_1d, Adapter +from nwb_linkml.maps import Map +from nwb_linkml.maps.dtype import handle_dtype +from nwb_schema_language import Attribute + + +def _make_ifabsent(val: str | int | float | None) -> str | None: + if val is None: + return None + elif isinstance(val, str): + return f"string({val})" + elif isinstance(val, int): + return f"integer({val})" + elif isinstance(val, float): + return f"float({val})" + else: + return str(value) + + +class AttrDefaults(TypedDict): + equals_string: str | None + equals_number: float | int | None + ifabsent: str | None + + +class AttributeMap(Map): + + @classmethod + def handle_defaults(cls, attr: Attribute) -> AttrDefaults: + """ + Construct arguments for linkml slot default metaslots from nwb schema lang attribute props + """ + equals_string = None + equals_number = None + default_value = None + if attr.value: + if isinstance(attr.value, (int, float)): + equals_number = attr.value + elif attr.value: + equals_string = str(attr.value) + + if equals_number: + default_value = _make_ifabsent(equals_number) + elif equals_string: + default_value = _make_ifabsent(equals_string) + elif attr.default_value: + default_value = _make_ifabsent(attr.default_value) + + return AttrDefaults( + equals_string=equals_string, equals_number=equals_number, ifabsent=default_value + ) + + @classmethod + @abstractmethod + def check(cls, attr: Attribute) -> bool: + """ + Check if this map applies + """ + pass # pragma: no cover + + @classmethod + @abstractmethod + def apply( + cls, attr: Attribute, res: Optional[BuildResult] = None, name: Optional[str] = None + ) -> BuildResult: + """ + Apply this mapping + """ + pass # pragma: no cover + + +class MapScalar(AttributeMap): + """ + Map a simple scalar value + """ + + @classmethod + def check(cls, attr: Attribute) -> bool: + """ + Check if we are a scalar value! + """ + return not attr.dims and not attr.shape + + @classmethod + def apply(cls, attr: Attribute, res: Optional[BuildResult] = None) -> BuildResult: + """ + Make a slot for us! + """ + slot = SlotDefinition( + name=attr.name, + range=handle_dtype(attr.dtype), + description=attr.doc, + required=attr.required, + **cls.handle_defaults(attr), + ) + return BuildResult(slots=[slot]) + + +class MapArray(AttributeMap): + """ + Map an array value! + """ + + @classmethod + def check(cls, attr: Attribute) -> bool: + """ + Check that we have some array specification! + """ + return bool(attr.dims) or attr.shape + + @classmethod + def apply(cls, attr: Attribute, res: Optional[BuildResult] = None) -> BuildResult: + """ + Make a slot with an array expression! + + If we're just a 1D array, use a list (set multivalued: true). + If more than that, make an array descriptor + """ + expressions = {} + multivalued = False + if is_1d(attr): + multivalued = True + else: + # --------------------------------- + # SPECIAL CASE: Some old versions of HDMF don't have ``dims``, only shape + # --------------------------------- + shape = attr.shape + dims = attr.dims + if shape and not dims: + dims = ["null"] * len(shape) + + array_adapter = ArrayAdapter(dims, shape) + expressions = array_adapter.make_slot() + + slot = SlotDefinition( + name=attr.name, + range=handle_dtype(attr.dtype), + multivalued=multivalued, + description=attr.doc, + required=attr.required, + **expressions, + **cls.handle_defaults(attr), + ) + return BuildResult(slots=[slot]) + + +class AttributeAdapter(Adapter): + """ + Create slot definitions from nwb schema language attributes + """ + + TYPE: ClassVar[Type] = Attribute + + cls: Attribute + + def build(self) -> "BuildResult": + """ + Build the slot definitions, every attribute should have a map. + """ + map = self.match() + return map.apply(self.cls) + + def match(self) -> Optional[Type[AttributeMap]]: + """ + Find the map class that applies to this attribute + + Returns: + :class:`.AttributeMap` + + Raises: + RuntimeError - if more than one map matches + """ + # find a map to use + matches = [m for m in AttributeMap.__subclasses__() if m.check(self.cls)] + + if len(matches) > 1: # pragma: no cover + raise RuntimeError( + "Only one map should apply to a dataset, you need to refactor the maps! Got maps:" + f" {matches}" + ) + elif len(matches) == 0: + return None + else: + return matches[0] diff --git a/nwb_linkml/src/nwb_linkml/adapters/classes.py b/nwb_linkml/src/nwb_linkml/adapters/classes.py index fcdf43c..054a401 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/classes.py +++ b/nwb_linkml/src/nwb_linkml/adapters/classes.py @@ -9,9 +9,10 @@ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition from pydantic import field_validator from nwb_linkml.adapters.adapter import Adapter, BuildResult +from nwb_linkml.adapters.attribute import AttributeAdapter from nwb_linkml.maps import QUANTITY_MAP from nwb_linkml.maps.naming import camel_to_snake -from nwb_schema_language import CompoundDtype, Dataset, DTypeType, FlatDtype, Group, ReferenceDtype +from nwb_schema_language import Dataset, Group T = TypeVar("T", bound=Type[Dataset] | Type[Group]) TI = TypeVar("TI", bound=Dataset | Group) @@ -118,16 +119,9 @@ class ClassAdapter(Adapter): Returns: list[:class:`.SlotDefinition`] """ - attrs = [ - SlotDefinition( - name=attr.name, - description=attr.doc, - range=self.handle_dtype(attr.dtype), - ) - for attr in cls.attributes - ] - - return attrs + results = [AttributeAdapter(cls=attr).build() for attr in cls.attributes] + slots = [r.slots[0] for r in results] + return slots def _get_full_name(self) -> str: """The full name of the object in the generated linkml @@ -205,37 +199,6 @@ class ClassAdapter(Adapter): return name - @classmethod - def handle_dtype(cls, dtype: DTypeType | None) -> str: - """ - Get the string form of a dtype - - Args: - dtype (:class:`.DTypeType`): Dtype to stringify - - Returns: - str - """ - if isinstance(dtype, ReferenceDtype): - return dtype.target_type - elif dtype is None or dtype == []: - # Some ill-defined datasets are "abstract" despite that not being in the schema language - return "AnyType" - elif isinstance(dtype, FlatDtype): - return dtype.value - elif isinstance(dtype, list) and isinstance(dtype[0], CompoundDtype): - # there is precisely one class that uses compound dtypes: - # TimeSeriesReferenceVectorData - # compoundDtypes are able to define a ragged table according to the schema - # but are used in this single case equivalently to attributes. - # so we'll... uh... treat them as slots. - # TODO - return "AnyType" - - else: - # flat dtype - return dtype - def build_name_slot(self) -> SlotDefinition: """ If a class has a name, then that name should be a slot with a diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index e9268cb..bcbd109 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -7,13 +7,13 @@ from typing import ClassVar, Optional, Type from linkml_runtime.linkml_model.meta import ArrayExpression, SlotDefinition -from nwb_linkml.adapters.adapter import BuildResult +from nwb_linkml.adapters.adapter import BuildResult, is_1d, is_compound, has_attrs from nwb_linkml.adapters.array import ArrayAdapter from nwb_linkml.adapters.classes import ClassAdapter from nwb_linkml.maps import QUANTITY_MAP, Map -from nwb_linkml.maps.dtype import flat_to_linkml +from nwb_linkml.maps.dtype import flat_to_linkml, handle_dtype from nwb_linkml.maps.naming import camel_to_snake -from nwb_schema_language import CompoundDtype, Dataset +from nwb_schema_language import Dataset class DatasetMap(Map): @@ -106,7 +106,7 @@ class MapScalar(DatasetMap): this_slot = SlotDefinition( name=cls.name, description=cls.doc, - range=ClassAdapter.handle_dtype(cls.dtype), + range=handle_dtype(cls.dtype), **QUANTITY_MAP[cls.quantity], ) res = BuildResult(slots=[this_slot]) @@ -203,9 +203,7 @@ class MapScalarAttributes(DatasetMap): """ Map to a scalar attribute with an adjoining "value" slot """ - value_slot = SlotDefinition( - name="value", range=ClassAdapter.handle_dtype(cls.dtype), required=True - ) + value_slot = SlotDefinition(name="value", range=handle_dtype(cls.dtype), required=True) res.classes[0].attributes["value"] = value_slot return res @@ -271,7 +269,7 @@ class MapListlike(DatasetMap): * - ``dtype`` - ``Class`` """ - dtype = ClassAdapter.handle_dtype(cls.dtype) + dtype = handle_dtype(cls.dtype) return ( cls.neurodata_type_inc != "VectorData" and is_1d(cls) @@ -289,7 +287,7 @@ class MapListlike(DatasetMap): slot = SlotDefinition( name="value", multivalued=True, - range=ClassAdapter.handle_dtype(cls.dtype), + range=handle_dtype(cls.dtype), description=cls.doc, required=cls.quantity not in ("*", "?"), annotations=[{"source_type": "reference"}], @@ -378,7 +376,7 @@ class MapArraylike(DatasetMap): - ``False`` """ - dtype = ClassAdapter.handle_dtype(cls.dtype) + dtype = handle_dtype(cls.dtype) return ( cls.name and (all([cls.dims, cls.shape]) or cls.neurodata_type_inc == "VectorData") @@ -409,7 +407,7 @@ class MapArraylike(DatasetMap): SlotDefinition( name=name, multivalued=False, - range=ClassAdapter.handle_dtype(cls.dtype), + range=handle_dtype(cls.dtype), description=cls.doc, required=cls.quantity not in ("*", "?"), **expressions, @@ -513,7 +511,7 @@ class MapArrayLikeAttributes(DatasetMap): """ Check that we're an array with some additional metadata """ - dtype = ClassAdapter.handle_dtype(cls.dtype) + dtype = handle_dtype(cls.dtype) return ( all([cls.dims, cls.shape]) and cls.neurodata_type_inc != "VectorData" @@ -532,9 +530,7 @@ class MapArrayLikeAttributes(DatasetMap): array_adapter = ArrayAdapter(cls.dims, cls.shape) expressions = array_adapter.make_slot() # make a slot for the arraylike class - array_slot = SlotDefinition( - name="value", range=ClassAdapter.handle_dtype(cls.dtype), **expressions - ) + array_slot = SlotDefinition(name="value", range=handle_dtype(cls.dtype), **expressions) res.classes[0].attributes.update({"value": array_slot}) return res @@ -596,7 +592,7 @@ class MapVectorClassRange(DatasetMap): Check that we are a VectorData object without any additional attributes with a dtype that refers to another class """ - dtype = ClassAdapter.handle_dtype(cls.dtype) + dtype = handle_dtype(cls.dtype) return ( cls.neurodata_type_inc == "VectorData" and cls.name @@ -617,7 +613,7 @@ class MapVectorClassRange(DatasetMap): name=cls.name, description=cls.doc, multivalued=True, - range=ClassAdapter.handle_dtype(cls.dtype), + range=handle_dtype(cls.dtype), required=cls.quantity not in ("*", "?"), ) res = BuildResult(slots=[this_slot]) @@ -672,7 +668,7 @@ class MapVectorClassRange(DatasetMap): # this_slot = SlotDefinition( # name=cls.name, # description=cls.doc, -# range=ClassAdapter.handle_dtype(cls.dtype), +# range=handle_dtype(cls.dtype), # multivalued=True, # ) # # No need to make a class for us, so we replace the existing build results @@ -783,7 +779,7 @@ class MapCompoundDtype(DatasetMap): slots[a_dtype.name] = SlotDefinition( name=a_dtype.name, description=a_dtype.doc, - range=ClassAdapter.handle_dtype(a_dtype.dtype), + range=handle_dtype(a_dtype.dtype), **QUANTITY_MAP[cls.quantity], ) res.classes[0].attributes.update(slots) @@ -836,36 +832,3 @@ class DatasetAdapter(ClassAdapter): return None else: return matches[0] - - -def is_1d(cls: Dataset) -> bool: - """ - Check if the values of a dataset are 1-dimensional. - - Specifically: - * a single-layer dim/shape list of length 1, or - * a nested dim/shape list where every nested spec is of length 1 - """ - return ( - not any([isinstance(dim, list) for dim in cls.dims]) and len(cls.dims) == 1 - ) or ( # nested list - all([isinstance(dim, list) for dim in cls.dims]) - and len(cls.dims) == 1 - and len(cls.dims[0]) == 1 - ) - - -def is_compound(cls: Dataset) -> bool: - """Check if dataset has a compound dtype""" - return ( - isinstance(cls.dtype, list) - and len(cls.dtype) > 0 - and isinstance(cls.dtype[0], CompoundDtype) - ) - - -def has_attrs(cls: Dataset) -> bool: - """ - Check if a dataset has any attributes at all without defaults - """ - return len(cls.attributes) > 0 and all([not a.value for a in cls.attributes]) diff --git a/nwb_linkml/src/nwb_linkml/maps/dtype.py b/nwb_linkml/src/nwb_linkml/maps/dtype.py index 9a7756f..0cc7c79 100644 --- a/nwb_linkml/src/nwb_linkml/maps/dtype.py +++ b/nwb_linkml/src/nwb_linkml/maps/dtype.py @@ -7,6 +7,7 @@ from typing import Any, Type import nptyping import numpy as np +from nwb_schema_language import CompoundDtype, DTypeType, FlatDtype, ReferenceDtype flat_to_linkml = { "float": "float", @@ -185,3 +186,34 @@ def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]: struct_pieces = [f"{k}: {flat_to_nptyping[v[0].name]}" for k, v in dtype.fields.items()] struct_dtype = ", ".join(struct_pieces) return nptyping.Structure[struct_dtype] + + +def handle_dtype(dtype: DTypeType | None) -> str: + """ + Get the string form of a dtype + + Args: + dtype (:class:`.DTypeType`): Dtype to stringify + + Returns: + str + """ + if isinstance(dtype, ReferenceDtype): + return dtype.target_type + elif dtype is None or dtype == []: + # Some ill-defined datasets are "abstract" despite that not being in the schema language + return "AnyType" + elif isinstance(dtype, FlatDtype): + return dtype.value + elif isinstance(dtype, list) and isinstance(dtype[0], CompoundDtype): + # there is precisely one class that uses compound dtypes: + # TimeSeriesReferenceVectorData + # compoundDtypes are able to define a ragged table according to the schema + # but are used in this single case equivalently to attributes. + # so we'll... uh... treat them as slots. + # TODO + return "AnyType" + + else: + # flat dtype + return dtype diff --git a/scripts/generate_core.py b/scripts/generate_core.py index 826bf30..6cbb83f 100644 --- a/scripts/generate_core.py +++ b/scripts/generate_core.py @@ -1,6 +1,8 @@ import shutil import os +import sys import traceback +from pdb import post_mortem from argparse import ArgumentParser from pathlib import Path @@ -53,6 +55,7 @@ def generate_versions( dry_run: bool = False, repo: GitRepo = NWB_CORE_REPO, hdmf_only=False, + pdb=False, ): """ Generate linkml models for all versions @@ -128,6 +131,11 @@ def generate_versions( build_progress.update(pydantic_task, action="Built Pydantic") except Exception as e: + if pdb: + live.stop() + post_mortem() + sys.exit(1) + build_progress.stop_task(linkml_task) if linkml_task is not None: build_progress.update(linkml_task, action="[bold red]LinkML Build Failed") @@ -205,6 +213,7 @@ def parser() -> ArgumentParser: ), action="store_true", ) + parser.add_argument("--pdb", help="Launch debugger on an error", action="store_true") return parser @@ -222,7 +231,7 @@ def main(): generate_core_yaml(args.yaml, args.dry_run, args.hdmf) generate_core_pydantic(args.yaml, args.pydantic, args.dry_run) else: - generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf) + generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf, pdb=args.pdb) if __name__ == "__main__":