From 749703e0779790c072500bfba8c65a742be2b0d4 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Fri, 13 Sep 2024 02:42:01 -0700 Subject: [PATCH] partially functioning rolldown, but getting some wonky results - missing attributes in nested inheritance, and the models are now extremely noisy, creating the same fields over and over even when they aren't overridden or modified by the child class. Need to redo the rolldown, make it less generic, don't dump to dicts, merge in a more targeted way. --- nwb_linkml/src/nwb_linkml/adapters/adapter.py | 8 +- nwb_linkml/src/nwb_linkml/adapters/dataset.py | 37 +- .../src/nwb_linkml/adapters/namespaces.py | 78 +++-- nwb_linkml/src/nwb_linkml/io/schema.py | 2 +- nwb_linkml/src/nwb_linkml/plot.py | 2 +- nwb_linkml/src/nwb_linkml/providers/linkml.py | 2 +- nwb_linkml/src/nwb_linkml/util.py | 73 ++++ .../test_adapters/test_adapter_namespaces.py | 42 ++- .../models/pydantic/hdmf_common/__init__.py | 1 + .../datamodel/nwb_schema_pydantic.py | 315 ++++++++++++++---- .../src/nwb_schema_language/generator.py | 20 +- 11 files changed, 489 insertions(+), 91 deletions(-) create mode 100644 nwb_linkml/src/nwb_linkml/util.py diff --git a/nwb_linkml/src/nwb_linkml/adapters/adapter.py b/nwb_linkml/src/nwb_linkml/adapters/adapter.py index f7b4f2f..cb16165 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py +++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py @@ -17,7 +17,7 @@ from linkml_runtime.linkml_model import ( SlotDefinition, TypeDefinition, ) -from pydantic import BaseModel +from pydantic import BaseModel, PrivateAttr from nwb_linkml.logging import init_logger from nwb_schema_language import Attribute, CompoundDtype, Dataset, Group, Schema @@ -103,6 +103,7 @@ class Adapter(BaseModel): _logger: Optional[Logger] = None _debug: Optional[bool] = None + _nwb_classes: dict[str, Dataset | Group] = PrivateAttr(default_factory=dict) @property def debug(self) -> bool: @@ -135,7 +136,10 @@ class Adapter(BaseModel): Convenience wrapper around :meth:`.walk_field_values` """ - return next(self.walk_field_values(self, "neurodata_type_def", name)) + if name not in self._nwb_classes: + cls = next(self.walk_field_values(self, "neurodata_type_def", name)) + self._nwb_classes[name] = cls + return self._nwb_classes[name] def get_model_with_field(self, field: str) -> Generator[Union[Group, Dataset], None, None]: """ diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index f0b0053..39d4450 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -616,7 +616,8 @@ class MapNVectors(DatasetMap): DynamicTable (and the slot VectorData where this is called for) is handled specially and just dropped, because we handle the possibility for - arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes. + arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes + (see :class:`.MapNVectorData` ). So really this is just a handler for the `Images` case """ @@ -652,6 +653,40 @@ class MapNVectors(DatasetMap): return res +class MapNVectorData(DatasetMap): + """ + An extremely special case just for DynamicTable: + DynamicTable indicates that all of its extra columns are ``VectorData`` with an + unnamed, * quantity dataset similar to the case of :class:`.MapNVectors` . + + We handle this with the :mod:`.includes.hdmf` module mixin classes instead, + and so to avoid generating a pointless slot and class, + we just catch that case and return nothing. + """ + + @classmethod + def check(c, cls: Dataset) -> bool: + """ + Check for being an unnamed multivalued vector class that IS VectorData + """ + return ( + cls.name is None + and cls.neurodata_type_def is None + and cls.neurodata_type_inc + and cls.neurodata_type_inc == "VectorData" + and cls.quantity in ("*", "+") + ) + + @classmethod + def apply( + c, cls: Dataset, res: Optional[BuildResult] = None, name: Optional[str] = None + ) -> BuildResult: + """ + Return ... nothing + """ + return BuildResult() + + class MapCompoundDtype(DatasetMap): """ A ``dtype`` declared as an array of types that function effectively as a row in a table. diff --git a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py index 96d653e..78e3027 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py +++ b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py @@ -14,13 +14,13 @@ from typing import Dict, Generator, List, Optional from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.linkml_model import Annotation, SchemaDefinition from pydantic import Field, model_validator -import networkx as nx from nwb_linkml.adapters.adapter import Adapter, BuildResult from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.lang_elements import NwbLangSchema from nwb_linkml.ui import AdapterProgress -from nwb_schema_language import Namespaces +from nwb_linkml.util import merge_dicts +from nwb_schema_language import Dataset, Group, Namespaces class NamespacesAdapter(Adapter): @@ -156,7 +156,7 @@ class NamespacesAdapter(Adapter): break return self - def complete_namespaces(self): + def complete_namespaces(self) -> None: """ After loading the namespace, and after any imports have been added afterwards, this must be called to complete the definitions of the contained schema objects. @@ -167,7 +167,7 @@ class NamespacesAdapter(Adapter): It **is** automatically called if it hasn't been already by the :meth:`.build` method. """ - self.populate_imports() + self._populate_imports() self._roll_down_inheritance() for i in self.imported: @@ -175,7 +175,7 @@ class NamespacesAdapter(Adapter): self._completed = True - def _roll_down_inheritance(self): + def _roll_down_inheritance(self) -> None: """ nwb-schema-language inheritance doesn't work like normal python inheritance - instead of inheriting everything at the 'top level' of a class, it also @@ -184,21 +184,59 @@ class NamespacesAdapter(Adapter): References: https://github.com/NeurodataWithoutBorders/pynwb/issues/1954 """ - pass + for cls in self.walk_types(self, (Group, Dataset)): + if not cls.neurodata_type_inc: + continue - def inheritance_graph(self) -> nx.DiGraph: - """ - Make a graph of all ``neurodata_types`` in the namespace and imports such that - each node contains the group or dataset it describes, - and has directed edges pointing at all the classes that inherit from it. + # get parents + parent = self.get(cls.neurodata_type_inc) + parents = [parent] + while parent.neurodata_type_inc: + parent = self.get(parent.neurodata_type_inc) + parents.insert(0, parent) + parents.append(cls) - In the case that the inheriting class does not itself have a ``neurodata_type_def``, - it is - """ - g = nx.DiGraph() - for sch in self.all_schemas(): - for cls in sch.created_classes: - pass + # merge and cast + # note that we don't want to exclude_none in the model dump here, + # if the child class has a field completely unset, we want to inherit it + # from the parent without rolling it down - we are only rolling down + # the things that need to be modified/merged in the child + new_cls: dict = {} + for parent in parents: + new_cls = merge_dicts( + new_cls, + parent.model_dump(exclude_unset=True), + list_key="name", + exclude=["neurodata_type_def"], + ) + new_cls: Group | Dataset = type(cls)(**new_cls) + new_cls.parent = cls.parent + + # reinsert + if new_cls.parent: + if isinstance(cls, Dataset): + new_cls.parent.datasets[new_cls.parent.datasets.index(cls)] = new_cls + else: + new_cls.parent.groups[new_cls.parent.groups.index(cls)] = new_cls + else: + # top level class, need to go and find it + found = False + for schema in self.all_schemas(): + if isinstance(cls, Dataset): + if cls in schema.datasets: + schema.datasets[schema.datasets.index(cls)] = new_cls + found = True + break + else: + if cls in schema.groups: + schema.groups[schema.groups.index(cls)] = new_cls + found = True + break + if not found: + raise KeyError( + f"Unable to find source schema for {cls} when reinserting after rolling" + " down!" + ) def find_type_source(self, name: str) -> SchemaAdapter: """ @@ -238,7 +276,7 @@ class NamespacesAdapter(Adapter): else: raise KeyError(f"No schema found that define {name}") - def populate_imports(self) -> "NamespacesAdapter": + def _populate_imports(self) -> "NamespacesAdapter": """ Populate the imports that are needed for each schema file @@ -338,5 +376,5 @@ class NamespacesAdapter(Adapter): for sch in self.schemas: yield sch for imported in self.imported: - for sch in imported: + for sch in imported.schemas: yield sch diff --git a/nwb_linkml/src/nwb_linkml/io/schema.py b/nwb_linkml/src/nwb_linkml/io/schema.py index 8f960c7..065d0d3 100644 --- a/nwb_linkml/src/nwb_linkml/io/schema.py +++ b/nwb_linkml/src/nwb_linkml/io/schema.py @@ -131,7 +131,7 @@ def load_namespace_adapter( else: adapter = NamespacesAdapter(namespaces=namespaces, schemas=sch) - adapter.populate_imports() + adapter.complete_namespaces() return adapter diff --git a/nwb_linkml/src/nwb_linkml/plot.py b/nwb_linkml/src/nwb_linkml/plot.py index e4cb4c9..e08f536 100644 --- a/nwb_linkml/src/nwb_linkml/plot.py +++ b/nwb_linkml/src/nwb_linkml/plot.py @@ -85,7 +85,7 @@ def make_node( def make_graph(namespaces: "NamespacesAdapter", recurse: bool = True) -> List[CytoElement]: - namespaces.populate_imports() + namespaces.complete_namespaces() nodes = [] element: Namespace | Group | Dataset print("walking graph") diff --git a/nwb_linkml/src/nwb_linkml/providers/linkml.py b/nwb_linkml/src/nwb_linkml/providers/linkml.py index fe8dec5..c106389 100644 --- a/nwb_linkml/src/nwb_linkml/providers/linkml.py +++ b/nwb_linkml/src/nwb_linkml/providers/linkml.py @@ -127,7 +127,7 @@ class LinkMLProvider(Provider): for schema_needs in adapter.needed_imports.values(): for needed in schema_needs: adapter.imported.append(ns_adapters[needed]) - adapter.populate_imports() + adapter.complete_namespaces() # then do the build res = {} diff --git a/nwb_linkml/src/nwb_linkml/util.py b/nwb_linkml/src/nwb_linkml/util.py new file mode 100644 index 0000000..ca85357 --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/util.py @@ -0,0 +1,73 @@ +""" +The much maligned junk drawer +""" + + +def merge_dicts( + source: dict, target: dict, list_key: str | None = None, exclude: list[str] | None = None +) -> dict: + """ + Deeply merge nested dictionaries, replacing already-declared keys rather than + e.g. merging lists as well + + Args: + source (dict): source dictionary + target (dict): target dictionary (values merged over source) + list_key (str | None): Optional: if present, merge lists of dicts using this to + identify matching dicts + exclude: (list[str] | None): Optional: if present, exclude keys from parent. + + References: + https://stackoverflow.com/a/20666342/13113166 + + """ + if exclude is None: + exclude = [] + ret = {k: v for k, v in source.items() if k not in exclude} + for key, value in target.items(): + if key not in ret: + ret[key] = value + elif isinstance(value, dict): + if key in ret: + ret[key] = merge_dicts(ret[key], value, list_key, exclude) + else: + ret[key] = value + elif isinstance(value, list) and list_key and all([isinstance(v, dict) for v in value]): + src_keys = {v[list_key]: ret[key].index(v) for v in ret.get(key, {}) if list_key in v} + target_keys = {v[list_key]: value.index(v) for v in value if list_key in v} + + # all dicts not in target + # screwy double iteration to preserve dict order + new_val = [ + ret[key][src_keys[k]] + for k in src_keys + if k in set(src_keys.keys()) - set(target_keys.keys()) + ] + # all dicts not in source + new_val.extend( + [ + value[target_keys[k]] + for k in target_keys + if k in set(target_keys.keys()) - set(src_keys.keys()) + ] + ) + # merge dicts in both + new_val.extend( + [ + merge_dicts(ret[key][src_keys[k]], value[target_keys[k]], list_key, exclude) + for k in target_keys + if k in set(src_keys.keys()).intersection(set(target_keys.keys())) + ] + ) + new_val = sorted(new_val, key=lambda i: i[list_key]) + # add any dicts that don't have the list_key + # they can't be merged since they can't be matched + new_val.extend([v for v in ret.get(key, {}) if list_key not in v]) + new_val.extend([v for v in value if list_key not in v]) + + ret[key] = new_val + + else: + ret[key] = value + + return ret diff --git a/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py b/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py index 768669b..2052778 100644 --- a/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py +++ b/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py @@ -1,7 +1,9 @@ -import pytest from pathlib import Path + +import pytest + from nwb_linkml.adapters import NamespacesAdapter, SchemaAdapter -from nwb_schema_language import Attribute, Group, Namespace, Dataset, Namespaces, Schema, FlatDtype +from nwb_schema_language import Attribute, Dataset, FlatDtype, Group, Namespace, Namespaces, Schema @pytest.mark.parametrize( @@ -20,7 +22,7 @@ def test_find_type_source(nwb_core_fixture, class_name, schema_file, namespace_n def test_populate_imports(nwb_core_fixture): - nwb_core_fixture.populate_imports() + nwb_core_fixture._populate_imports() schema: SchemaAdapter assert len(nwb_core_fixture.schemas) > 0 for schema in nwb_core_fixture.schemas: @@ -97,14 +99,15 @@ def test_roll_down_inheritance(): neurodata_type_def="Child", neurodata_type_inc="Parent", doc="child", - attributes=[Attribute(name="a", doc="a")], + attributes=[Attribute(name="a", doc="a", value="z")], datasets=[ Dataset( name="data", doc="data again", - attributes=[Attribute(name="a", doc="c", value="z"), Attribute(name="c", doc="c")], - ) + attributes=[Attribute(name="c", doc="c", value="z"), Attribute(name="e", doc="e")], + ), ], + groups=[Group(name="untyped_child", neurodata_type_inc="Parent", doc="untyped child")], ) child_sch = Schema(source="child.yaml") child_ns = Namespaces( @@ -130,3 +133,30 @@ def test_roll_down_inheritance(): child_ns_adapter.complete_namespaces() child = child_ns_adapter.get("Child") + # overrides simple attrs + assert child.doc == "child" + # gets unassigned parent attrs + assert "b" in [attr.name for attr in child.attributes] + # overrides values while preserving remaining values when set + attr_a = [attr for attr in child.attributes if attr.name == "a"][0] + assert attr_a.value == "z" + assert attr_a.dims == parent_cls.attributes[0].dims + assert [attr.value for attr in child.attributes if attr.name == "a"][0] == "z" + + # preserve unset values in child datasets + assert child.datasets[0].dtype == parent_cls.datasets[0].dtype + assert child.datasets[0].dims == parent_cls.datasets[0].dims + # gets undeclared attrs in child datasets + assert "d" in [attr.name for attr in child.datasets[0].attributes] + # overrides set values in child datasets while preserving unset + c_attr = [attr for attr in child.datasets[0].attributes if attr.name == "c"][0] + assert c_attr.value == "z" + assert c_attr.dtype == FlatDtype.int32 + # preserves new attrs + assert "e" in [attr.name for attr in child.datasets[0].attributes] + + # neurodata_type_def is not included in untyped children + assert child.groups[0].neurodata_type_def is None + # we don't set any of the attrs from the parent class here because we don't override them, + # so we don't need to merge them, and we don't want to clutter our linkml models unnecessarily + assert child.groups[0].attributes is None diff --git a/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py b/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py index e69de29..8b13789 100644 --- a/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py +++ b/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py @@ -0,0 +1 @@ + diff --git a/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py b/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py index d1bbac3..ca7e8be 100644 --- a/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py +++ b/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py @@ -81,7 +81,10 @@ linkml_meta = LinkMLMeta( "see_also": ["https://p2p_ld.github.io/nwb-schema-language"], "settings": { "email": {"setting_key": "email", "setting_value": "\\S+@\\S+{\\.\\w}+"}, - "protected_string": {"setting_key": "protected_string", "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$"}, + "protected_string": { + "setting_key": "protected_string", + "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$", + }, }, "source_file": "/Users/jonny/git/p2p-ld/nwb-linkml/nwb_schema_language/src/nwb_schema_language/schema/nwb_schema_language.yaml", "title": "nwb-schema-language", @@ -180,7 +183,15 @@ class Namespace(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -189,7 +200,14 @@ class Namespace(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -199,7 +217,9 @@ class Namespace(ConfiguredBaseModel): description="""Optional string with extended full name for the namespace.""", json_schema_extra={"linkml_meta": {"alias": "full_name", "domain_of": ["Namespace"]}}, ) - version: str = Field(..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}}) + version: str = Field( + ..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}} + ) date: Optional[datetime] = Field( None, description="""Date that a namespace was last modified or released""", @@ -215,7 +235,13 @@ class Namespace(ConfiguredBaseModel): author: List[str] | str = Field( ..., description="""List of strings with the names of the authors of the namespace.""", - json_schema_extra={"linkml_meta": {"alias": "author", "domain_of": ["Namespace"], "slot_uri": "schema:author"}}, + json_schema_extra={ + "linkml_meta": { + "alias": "author", + "domain_of": ["Namespace"], + "slot_uri": "schema:author", + } + }, ) contact: List[str] | str = Field( ..., @@ -238,10 +264,13 @@ class Namespace(ConfiguredBaseModel): class Namespaces(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) namespaces: Optional[List[Namespace]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}}, ) @@ -252,29 +281,51 @@ class Schema(ConfiguredBaseModel): "rules": [ { "description": "If namespace is absent, source is required", - "postconditions": {"slot_conditions": {"source": {"name": "source", "required": True}}}, + "postconditions": { + "slot_conditions": {"source": {"name": "source", "required": True}} + }, "preconditions": { - "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}} + "slot_conditions": { + "namespace": {"name": "namespace", "value_presence": "ABSENT"} + } }, }, { "description": "If source is absent, namespace is required.", - "postconditions": {"slot_conditions": {"namespace": {"name": "namespace", "required": True}}}, - "preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}}, + "postconditions": { + "slot_conditions": {"namespace": {"name": "namespace", "required": True}} + }, + "preconditions": { + "slot_conditions": { + "source": {"name": "source", "value_presence": "ABSENT"} + } + }, }, { "description": "If namespace is present, source is cannot be", - "postconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}}, + "postconditions": { + "slot_conditions": { + "source": {"name": "source", "value_presence": "ABSENT"} + } + }, "preconditions": { - "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "PRESENT"}} + "slot_conditions": { + "namespace": {"name": "namespace", "value_presence": "PRESENT"} + } }, }, { "description": "If source is present, namespace cannot be.", "postconditions": { - "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}} + "slot_conditions": { + "namespace": {"name": "namespace", "value_presence": "ABSENT"} + } + }, + "preconditions": { + "slot_conditions": { + "source": {"name": "source", "value_presence": "PRESENT"} + } }, - "preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "PRESENT"}}}, }, ], } @@ -311,14 +362,24 @@ class Schema(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) class Group(ConfiguredBaseModel, ParentizeMixin): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) neurodata_type_def: Optional[str] = Field( None, @@ -347,7 +408,14 @@ class Group(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -368,7 +436,15 @@ class Group(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -380,21 +456,32 @@ class Group(ConfiguredBaseModel, ParentizeMixin): "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "domain_of": ["Group", "Link", "Dataset"], "ifabsent": "int(1)", - "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], + "todos": [ + "logic to check that the corresponding class can only be " + "implemented quantity times." + ], } }, ) linkable: Optional[bool] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}, ) attributes: Optional[List[Attribute]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]} + }, ) datasets: Optional[List[Dataset]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]} + }, ) groups: Optional[List[Group]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}, ) links: Optional[List[Link]] = Field( None, json_schema_extra={"linkml_meta": {"alias": "links", "domain_of": ["Group"]}} @@ -403,27 +490,41 @@ class Group(ConfiguredBaseModel, ParentizeMixin): None, exclude=True, description="""The parent group that contains this dataset or group""", - json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]} + }, ) class Groups(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) groups: Optional[List[Group]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}, ) class Link(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) name: Optional[str] = Field( None, json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -434,14 +535,24 @@ class Link(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) target_type: str = Field( ..., description="""Describes the neurodata_type of the target that the reference points to""", - json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]} + }, ) quantity: Optional[Union[QuantityEnum, int]] = Field( "1", @@ -451,27 +562,39 @@ class Link(ConfiguredBaseModel): "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "domain_of": ["Group", "Link", "Dataset"], "ifabsent": "int(1)", - "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], + "todos": [ + "logic to check that the corresponding class can only be " + "implemented quantity times." + ], } }, ) class Datasets(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) datasets: Optional[List[Dataset]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]} + }, ) class ReferenceDtype(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) target_type: str = Field( ..., description="""Describes the neurodata_type of the target that the reference points to""", - json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]} + }, ) reftype: Optional[ReftypeOptions] = Field( None, @@ -501,7 +624,14 @@ class CompoundDtype(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -512,7 +642,15 @@ class CompoundDtype(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -535,8 +673,12 @@ class DtypeMixin(ConfiguredBaseModel): "mixin": True, "rules": [ { - "postconditions": {"slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}}, - "preconditions": {"slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}}, + "postconditions": { + "slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}} + }, + "preconditions": { + "slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}} + }, } ], } @@ -547,7 +689,11 @@ class DtypeMixin(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "dtype", - "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], + "any_of": [ + {"range": "FlatDtype"}, + {"range": "CompoundDtype"}, + {"range": "ReferenceDtype"}, + ], "domain_of": ["CompoundDtype", "DtypeMixin"], } }, @@ -571,7 +717,14 @@ class Attribute(DtypeMixin): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -611,12 +764,16 @@ class Attribute(DtypeMixin): value: Optional[Any] = Field( None, description="""Optional constant, fixed value for the attribute.""", - json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]} + }, ) default_value: Optional[Any] = Field( None, description="""Optional default value for variable-valued attributes.""", - json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]} + }, ) doc: str = Field( ..., @@ -624,14 +781,24 @@ class Attribute(DtypeMixin): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) required: Optional[bool] = Field( True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""", - json_schema_extra={"linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}}, + json_schema_extra={ + "linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"} + }, ) parent: Optional[Union[Dataset, Group]] = Field( None, @@ -650,7 +817,11 @@ class Attribute(DtypeMixin): json_schema_extra={ "linkml_meta": { "alias": "dtype", - "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], + "any_of": [ + {"range": "FlatDtype"}, + {"range": "CompoundDtype"}, + {"range": "ReferenceDtype"}, + ], "domain_of": ["CompoundDtype", "DtypeMixin"], } }, @@ -689,7 +860,14 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -739,12 +917,16 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): value: Optional[Any] = Field( None, description="""Optional constant, fixed value for the attribute.""", - json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]} + }, ) default_value: Optional[Any] = Field( None, description="""Optional default value for variable-valued attributes.""", - json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]} + }, ) doc: str = Field( ..., @@ -752,7 +934,15 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -764,28 +954,41 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "domain_of": ["Group", "Link", "Dataset"], "ifabsent": "int(1)", - "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], + "todos": [ + "logic to check that the corresponding class can only be " + "implemented quantity times." + ], } }, ) linkable: Optional[bool] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}, ) attributes: Optional[List[Attribute]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]} + }, ) parent: Optional[Group] = Field( None, exclude=True, description="""The parent group that contains this dataset or group""", - json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]} + }, ) dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field( None, json_schema_extra={ "linkml_meta": { "alias": "dtype", - "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], + "any_of": [ + {"range": "FlatDtype"}, + {"range": "CompoundDtype"}, + {"range": "ReferenceDtype"}, + ], "domain_of": ["CompoundDtype", "DtypeMixin"], } }, diff --git a/nwb_schema_language/src/nwb_schema_language/generator.py b/nwb_schema_language/src/nwb_schema_language/generator.py index eefad6b..38519a4 100644 --- a/nwb_schema_language/src/nwb_schema_language/generator.py +++ b/nwb_schema_language/src/nwb_schema_language/generator.py @@ -1,5 +1,9 @@ -from pathlib import Path +""" +Customization of linkml pydantic generator +""" + from dataclasses import dataclass +from pathlib import Path from linkml.generators.pydanticgen import PydanticGenerator from linkml.generators.pydanticgen.build import ClassResult @@ -9,9 +13,10 @@ from pydantic import BaseModel, model_validator class ParentizeMixin(BaseModel): + """Mixin to populate the parent field for nested datasets and groups""" @model_validator(mode="after") - def parentize(self): + def parentize(self) -> BaseModel: """Set the parent attribute for all our fields they have one""" for field_name in self.model_fields: if field_name == "parent": @@ -28,6 +33,9 @@ class ParentizeMixin(BaseModel): @dataclass class NWBSchemaLangGenerator(PydanticGenerator): + """ + Customization of linkml pydantic generator + """ def __init__(self, *args, **kwargs): kwargs["injected_classes"] = [ParentizeMixin] @@ -38,12 +46,18 @@ class NWBSchemaLangGenerator(PydanticGenerator): super().__init__(*args, **kwargs) def after_generate_class(self, cls: ClassResult, sv: SchemaView) -> ClassResult: + """ + Add the ParentizeMixin to the bases of Dataset and Group + """ if cls.cls.name in ("Dataset", "Group"): cls.cls.bases = ["ConfiguredBaseModel", "ParentizeMixin"] return cls -def generate(): +def generate() -> None: + """ + Generate pydantic models for nwb_schema_language + """ schema = Path(__file__).parent / "schema" / "nwb_schema_language.yaml" output = Path(__file__).parent / "datamodel" / "nwb_schema_pydantic.py" generator = NWBSchemaLangGenerator(schema=schema)