From 994b79e0f2126d2aa04d48e83ddcfb010f4ab52f Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 12 Aug 2024 19:25:12 -0700 Subject: [PATCH] actually no that's stupid, linkml handles inheritance except for the one special case of compound dtypes which aren't a thing in linkml and are here used exclusively for 1d vectors. --- nwb_linkml/src/nwb_linkml/adapters/dataset.py | 13 +++--- .../src/nwb_linkml/adapters/namespaces.py | 41 +------------------ scripts/generate_core.py | 15 ++++--- 3 files changed, 16 insertions(+), 53 deletions(-) diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index 2490ef5..ef5eb61 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -742,6 +742,10 @@ class MapCompoundDtype(DatasetMap): We render them just as a class with each of the dtypes as slots - they are typically used by other datasets to create a table. + Since there is exactly one class (``TimeSeriesReferenceVectorData``) that uses compound dtypes + meaningfully, we just hardcode the behavior of inheriting the array shape from the VectorData + parent classes. Otherwise, linkml schemas correctly propagate the ``value`` property. + Eg. ``base.TimeSeriesReferenceVectorData`` .. code-block:: yaml @@ -784,24 +788,17 @@ class MapCompoundDtype(DatasetMap): Make a new class for this dtype, using its sub-dtypes as fields, and use it as the range for the parent class """ - # all the slots share the same ndarray spec if there is one - array = {} - if cls.dims or cls.shape: - array_adapter = ArrayAdapter(cls.dims, cls.shape) - array = array_adapter.make_slot() - slots = {} for a_dtype in cls.dtype: slots[a_dtype.name] = SlotDefinition( name=a_dtype.name, description=a_dtype.doc, range=handle_dtype(a_dtype.dtype), + array=ArrayExpression(exact_number_dimensions=1), **QUANTITY_MAP[cls.quantity], - **array, ) res.classes[0].attributes.update(slots) - # the compound dtype replaces the ``value`` slot, if present if "value" in res.classes[0].attributes: del res.classes[0].attributes["value"] return res diff --git a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py index 59194e4..266906e 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py +++ b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py @@ -19,7 +19,7 @@ from nwb_linkml.adapters.adapter import Adapter, BuildResult from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.lang_elements import NwbLangSchema from nwb_linkml.ui import AdapterProgress -from nwb_schema_language import Namespaces, Group, Dataset +from nwb_schema_language import Namespaces class NamespacesAdapter(Adapter): @@ -196,45 +196,6 @@ class NamespacesAdapter(Adapter): return self - @model_validator(mode="after") - def _populate_inheritance(self): - """ - ensure properties from `neurodata_type_inc` are propaged through to inheriting classes. - - This seems super expensive but we'll optimize for perf later if that proves to be the case - """ - # don't use walk_types here so we can replace the objects as we mutate them - for sch in self.schemas: - for i, group in enumerate(sch.groups): - if getattr(group, "neurodata_type_inc", None) is not None: - merged_attrs = self._merge_inheritance(group) - sch.groups[i] = Group(**merged_attrs) - for i, dataset in enumerate(sch.datasets): - if getattr(dataset, "neurodata_type_inc", None) is not None: - merged_attrs = self._merge_inheritance(dataset) - sch.datasets[i] = Dataset(**merged_attrs) - return self - - def _merge_inheritance(self, obj: Group | Dataset) -> dict: - obj_dict = obj.model_dump(exclude_none=True) - if obj.neurodata_type_inc: - name = obj.neurodata_type_def if obj.neurodata_type_def else obj.name - self.logger.debug(f"Merging {name} with {obj.neurodata_type_inc}") - # there must be only one type with this name - parent: Group | Dataset = next( - self.walk_field_values(self, "neurodata_type_def", obj.neurodata_type_inc) - ) - if obj.neurodata_type_def == "TimeSeriesReferenceVectorData": - pdb.set_trace() - parent_dict = copy(self._merge_inheritance(parent)) - # children don't inherit the type_def - del parent_dict["neurodata_type_def"] - # overwrite with child values - parent_dict.update(obj_dict) - return parent_dict - - return obj_dict - def to_yaml(self, base_dir: Path) -> None: """ Build the schemas, saving them to ``yaml`` files according to diff --git a/scripts/generate_core.py b/scripts/generate_core.py index 35faf43..af33c37 100644 --- a/scripts/generate_core.py +++ b/scripts/generate_core.py @@ -104,14 +104,19 @@ def generate_versions( repo.tag = version build_progress.update(linkml_task, advance=1, action="Load Namespaces") - # first load the core namespace - core_ns = io.load_namespace_adapter(repo.namespace_file) if repo.namespace == NWB_CORE_REPO: - # then the hdmf-common namespace + # first load HDMF common hdmf_common_ns = io.load_namespace_adapter( repo.temp_directory / "hdmf-common-schema" / "common" / "namespace.yaml" ) - core_ns.imported.append(hdmf_common_ns) + # then load nwb core + core_ns = io.load_namespace_adapter( + repo.namespace_file, imported=[hdmf_common_ns] + ) + + else: + # otherwise just load HDMF + core_ns = io.load_namespace_adapter(repo.namespace_file) build_progress.update(linkml_task, advance=1, action="Build LinkML") @@ -169,7 +174,7 @@ def generate_versions( # import the most recent version of the schemaz we built latest_version = sorted( - (pydantic_path / "pydantic" / "core").iterdir(), key=os.path.getmtime + (pydantic_path / "pydantic" / "core").glob('v*'), key=os.path.getmtime )[-1] # make inits to use the schema! we don't usually do this in the