diff --git a/nwb_linkml/src/nwb_linkml/adapters/adapter.py b/nwb_linkml/src/nwb_linkml/adapters/adapter.py index f7b4f2f..cb16165 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py +++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py @@ -17,7 +17,7 @@ from linkml_runtime.linkml_model import ( SlotDefinition, TypeDefinition, ) -from pydantic import BaseModel +from pydantic import BaseModel, PrivateAttr from nwb_linkml.logging import init_logger from nwb_schema_language import Attribute, CompoundDtype, Dataset, Group, Schema @@ -103,6 +103,7 @@ class Adapter(BaseModel): _logger: Optional[Logger] = None _debug: Optional[bool] = None + _nwb_classes: dict[str, Dataset | Group] = PrivateAttr(default_factory=dict) @property def debug(self) -> bool: @@ -135,7 +136,10 @@ class Adapter(BaseModel): Convenience wrapper around :meth:`.walk_field_values` """ - return next(self.walk_field_values(self, "neurodata_type_def", name)) + if name not in self._nwb_classes: + cls = next(self.walk_field_values(self, "neurodata_type_def", name)) + self._nwb_classes[name] = cls + return self._nwb_classes[name] def get_model_with_field(self, field: str) -> Generator[Union[Group, Dataset], None, None]: """ diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index f0b0053..39d4450 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -616,7 +616,8 @@ class MapNVectors(DatasetMap): DynamicTable (and the slot VectorData where this is called for) is handled specially and just dropped, because we handle the possibility for - arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes. + arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes + (see :class:`.MapNVectorData` ). So really this is just a handler for the `Images` case """ @@ -652,6 +653,40 @@ class MapNVectors(DatasetMap): return res +class MapNVectorData(DatasetMap): + """ + An extremely special case just for DynamicTable: + DynamicTable indicates that all of its extra columns are ``VectorData`` with an + unnamed, * quantity dataset similar to the case of :class:`.MapNVectors` . + + We handle this with the :mod:`.includes.hdmf` module mixin classes instead, + and so to avoid generating a pointless slot and class, + we just catch that case and return nothing. + """ + + @classmethod + def check(c, cls: Dataset) -> bool: + """ + Check for being an unnamed multivalued vector class that IS VectorData + """ + return ( + cls.name is None + and cls.neurodata_type_def is None + and cls.neurodata_type_inc + and cls.neurodata_type_inc == "VectorData" + and cls.quantity in ("*", "+") + ) + + @classmethod + def apply( + c, cls: Dataset, res: Optional[BuildResult] = None, name: Optional[str] = None + ) -> BuildResult: + """ + Return ... nothing + """ + return BuildResult() + + class MapCompoundDtype(DatasetMap): """ A ``dtype`` declared as an array of types that function effectively as a row in a table. diff --git a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py index 96d653e..78e3027 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py +++ b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py @@ -14,13 +14,13 @@ from typing import Dict, Generator, List, Optional from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.linkml_model import Annotation, SchemaDefinition from pydantic import Field, model_validator -import networkx as nx from nwb_linkml.adapters.adapter import Adapter, BuildResult from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.lang_elements import NwbLangSchema from nwb_linkml.ui import AdapterProgress -from nwb_schema_language import Namespaces +from nwb_linkml.util import merge_dicts +from nwb_schema_language import Dataset, Group, Namespaces class NamespacesAdapter(Adapter): @@ -156,7 +156,7 @@ class NamespacesAdapter(Adapter): break return self - def complete_namespaces(self): + def complete_namespaces(self) -> None: """ After loading the namespace, and after any imports have been added afterwards, this must be called to complete the definitions of the contained schema objects. @@ -167,7 +167,7 @@ class NamespacesAdapter(Adapter): It **is** automatically called if it hasn't been already by the :meth:`.build` method. """ - self.populate_imports() + self._populate_imports() self._roll_down_inheritance() for i in self.imported: @@ -175,7 +175,7 @@ class NamespacesAdapter(Adapter): self._completed = True - def _roll_down_inheritance(self): + def _roll_down_inheritance(self) -> None: """ nwb-schema-language inheritance doesn't work like normal python inheritance - instead of inheriting everything at the 'top level' of a class, it also @@ -184,21 +184,59 @@ class NamespacesAdapter(Adapter): References: https://github.com/NeurodataWithoutBorders/pynwb/issues/1954 """ - pass + for cls in self.walk_types(self, (Group, Dataset)): + if not cls.neurodata_type_inc: + continue - def inheritance_graph(self) -> nx.DiGraph: - """ - Make a graph of all ``neurodata_types`` in the namespace and imports such that - each node contains the group or dataset it describes, - and has directed edges pointing at all the classes that inherit from it. + # get parents + parent = self.get(cls.neurodata_type_inc) + parents = [parent] + while parent.neurodata_type_inc: + parent = self.get(parent.neurodata_type_inc) + parents.insert(0, parent) + parents.append(cls) - In the case that the inheriting class does not itself have a ``neurodata_type_def``, - it is - """ - g = nx.DiGraph() - for sch in self.all_schemas(): - for cls in sch.created_classes: - pass + # merge and cast + # note that we don't want to exclude_none in the model dump here, + # if the child class has a field completely unset, we want to inherit it + # from the parent without rolling it down - we are only rolling down + # the things that need to be modified/merged in the child + new_cls: dict = {} + for parent in parents: + new_cls = merge_dicts( + new_cls, + parent.model_dump(exclude_unset=True), + list_key="name", + exclude=["neurodata_type_def"], + ) + new_cls: Group | Dataset = type(cls)(**new_cls) + new_cls.parent = cls.parent + + # reinsert + if new_cls.parent: + if isinstance(cls, Dataset): + new_cls.parent.datasets[new_cls.parent.datasets.index(cls)] = new_cls + else: + new_cls.parent.groups[new_cls.parent.groups.index(cls)] = new_cls + else: + # top level class, need to go and find it + found = False + for schema in self.all_schemas(): + if isinstance(cls, Dataset): + if cls in schema.datasets: + schema.datasets[schema.datasets.index(cls)] = new_cls + found = True + break + else: + if cls in schema.groups: + schema.groups[schema.groups.index(cls)] = new_cls + found = True + break + if not found: + raise KeyError( + f"Unable to find source schema for {cls} when reinserting after rolling" + " down!" + ) def find_type_source(self, name: str) -> SchemaAdapter: """ @@ -238,7 +276,7 @@ class NamespacesAdapter(Adapter): else: raise KeyError(f"No schema found that define {name}") - def populate_imports(self) -> "NamespacesAdapter": + def _populate_imports(self) -> "NamespacesAdapter": """ Populate the imports that are needed for each schema file @@ -338,5 +376,5 @@ class NamespacesAdapter(Adapter): for sch in self.schemas: yield sch for imported in self.imported: - for sch in imported: + for sch in imported.schemas: yield sch diff --git a/nwb_linkml/src/nwb_linkml/io/schema.py b/nwb_linkml/src/nwb_linkml/io/schema.py index 8f960c7..065d0d3 100644 --- a/nwb_linkml/src/nwb_linkml/io/schema.py +++ b/nwb_linkml/src/nwb_linkml/io/schema.py @@ -131,7 +131,7 @@ def load_namespace_adapter( else: adapter = NamespacesAdapter(namespaces=namespaces, schemas=sch) - adapter.populate_imports() + adapter.complete_namespaces() return adapter diff --git a/nwb_linkml/src/nwb_linkml/plot.py b/nwb_linkml/src/nwb_linkml/plot.py index e4cb4c9..e08f536 100644 --- a/nwb_linkml/src/nwb_linkml/plot.py +++ b/nwb_linkml/src/nwb_linkml/plot.py @@ -85,7 +85,7 @@ def make_node( def make_graph(namespaces: "NamespacesAdapter", recurse: bool = True) -> List[CytoElement]: - namespaces.populate_imports() + namespaces.complete_namespaces() nodes = [] element: Namespace | Group | Dataset print("walking graph") diff --git a/nwb_linkml/src/nwb_linkml/providers/linkml.py b/nwb_linkml/src/nwb_linkml/providers/linkml.py index fe8dec5..c106389 100644 --- a/nwb_linkml/src/nwb_linkml/providers/linkml.py +++ b/nwb_linkml/src/nwb_linkml/providers/linkml.py @@ -127,7 +127,7 @@ class LinkMLProvider(Provider): for schema_needs in adapter.needed_imports.values(): for needed in schema_needs: adapter.imported.append(ns_adapters[needed]) - adapter.populate_imports() + adapter.complete_namespaces() # then do the build res = {} diff --git a/nwb_linkml/src/nwb_linkml/util.py b/nwb_linkml/src/nwb_linkml/util.py new file mode 100644 index 0000000..ca85357 --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/util.py @@ -0,0 +1,73 @@ +""" +The much maligned junk drawer +""" + + +def merge_dicts( + source: dict, target: dict, list_key: str | None = None, exclude: list[str] | None = None +) -> dict: + """ + Deeply merge nested dictionaries, replacing already-declared keys rather than + e.g. merging lists as well + + Args: + source (dict): source dictionary + target (dict): target dictionary (values merged over source) + list_key (str | None): Optional: if present, merge lists of dicts using this to + identify matching dicts + exclude: (list[str] | None): Optional: if present, exclude keys from parent. + + References: + https://stackoverflow.com/a/20666342/13113166 + + """ + if exclude is None: + exclude = [] + ret = {k: v for k, v in source.items() if k not in exclude} + for key, value in target.items(): + if key not in ret: + ret[key] = value + elif isinstance(value, dict): + if key in ret: + ret[key] = merge_dicts(ret[key], value, list_key, exclude) + else: + ret[key] = value + elif isinstance(value, list) and list_key and all([isinstance(v, dict) for v in value]): + src_keys = {v[list_key]: ret[key].index(v) for v in ret.get(key, {}) if list_key in v} + target_keys = {v[list_key]: value.index(v) for v in value if list_key in v} + + # all dicts not in target + # screwy double iteration to preserve dict order + new_val = [ + ret[key][src_keys[k]] + for k in src_keys + if k in set(src_keys.keys()) - set(target_keys.keys()) + ] + # all dicts not in source + new_val.extend( + [ + value[target_keys[k]] + for k in target_keys + if k in set(target_keys.keys()) - set(src_keys.keys()) + ] + ) + # merge dicts in both + new_val.extend( + [ + merge_dicts(ret[key][src_keys[k]], value[target_keys[k]], list_key, exclude) + for k in target_keys + if k in set(src_keys.keys()).intersection(set(target_keys.keys())) + ] + ) + new_val = sorted(new_val, key=lambda i: i[list_key]) + # add any dicts that don't have the list_key + # they can't be merged since they can't be matched + new_val.extend([v for v in ret.get(key, {}) if list_key not in v]) + new_val.extend([v for v in value if list_key not in v]) + + ret[key] = new_val + + else: + ret[key] = value + + return ret diff --git a/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py b/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py index 768669b..2052778 100644 --- a/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py +++ b/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py @@ -1,7 +1,9 @@ -import pytest from pathlib import Path + +import pytest + from nwb_linkml.adapters import NamespacesAdapter, SchemaAdapter -from nwb_schema_language import Attribute, Group, Namespace, Dataset, Namespaces, Schema, FlatDtype +from nwb_schema_language import Attribute, Dataset, FlatDtype, Group, Namespace, Namespaces, Schema @pytest.mark.parametrize( @@ -20,7 +22,7 @@ def test_find_type_source(nwb_core_fixture, class_name, schema_file, namespace_n def test_populate_imports(nwb_core_fixture): - nwb_core_fixture.populate_imports() + nwb_core_fixture._populate_imports() schema: SchemaAdapter assert len(nwb_core_fixture.schemas) > 0 for schema in nwb_core_fixture.schemas: @@ -97,14 +99,15 @@ def test_roll_down_inheritance(): neurodata_type_def="Child", neurodata_type_inc="Parent", doc="child", - attributes=[Attribute(name="a", doc="a")], + attributes=[Attribute(name="a", doc="a", value="z")], datasets=[ Dataset( name="data", doc="data again", - attributes=[Attribute(name="a", doc="c", value="z"), Attribute(name="c", doc="c")], - ) + attributes=[Attribute(name="c", doc="c", value="z"), Attribute(name="e", doc="e")], + ), ], + groups=[Group(name="untyped_child", neurodata_type_inc="Parent", doc="untyped child")], ) child_sch = Schema(source="child.yaml") child_ns = Namespaces( @@ -130,3 +133,30 @@ def test_roll_down_inheritance(): child_ns_adapter.complete_namespaces() child = child_ns_adapter.get("Child") + # overrides simple attrs + assert child.doc == "child" + # gets unassigned parent attrs + assert "b" in [attr.name for attr in child.attributes] + # overrides values while preserving remaining values when set + attr_a = [attr for attr in child.attributes if attr.name == "a"][0] + assert attr_a.value == "z" + assert attr_a.dims == parent_cls.attributes[0].dims + assert [attr.value for attr in child.attributes if attr.name == "a"][0] == "z" + + # preserve unset values in child datasets + assert child.datasets[0].dtype == parent_cls.datasets[0].dtype + assert child.datasets[0].dims == parent_cls.datasets[0].dims + # gets undeclared attrs in child datasets + assert "d" in [attr.name for attr in child.datasets[0].attributes] + # overrides set values in child datasets while preserving unset + c_attr = [attr for attr in child.datasets[0].attributes if attr.name == "c"][0] + assert c_attr.value == "z" + assert c_attr.dtype == FlatDtype.int32 + # preserves new attrs + assert "e" in [attr.name for attr in child.datasets[0].attributes] + + # neurodata_type_def is not included in untyped children + assert child.groups[0].neurodata_type_def is None + # we don't set any of the attrs from the parent class here because we don't override them, + # so we don't need to merge them, and we don't want to clutter our linkml models unnecessarily + assert child.groups[0].attributes is None diff --git a/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py b/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py index e69de29..8b13789 100644 --- a/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py +++ b/nwb_models/src/nwb_models/models/pydantic/hdmf_common/__init__.py @@ -0,0 +1 @@ + diff --git a/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py b/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py index d1bbac3..ca7e8be 100644 --- a/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py +++ b/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py @@ -81,7 +81,10 @@ linkml_meta = LinkMLMeta( "see_also": ["https://p2p_ld.github.io/nwb-schema-language"], "settings": { "email": {"setting_key": "email", "setting_value": "\\S+@\\S+{\\.\\w}+"}, - "protected_string": {"setting_key": "protected_string", "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$"}, + "protected_string": { + "setting_key": "protected_string", + "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$", + }, }, "source_file": "/Users/jonny/git/p2p-ld/nwb-linkml/nwb_schema_language/src/nwb_schema_language/schema/nwb_schema_language.yaml", "title": "nwb-schema-language", @@ -180,7 +183,15 @@ class Namespace(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -189,7 +200,14 @@ class Namespace(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -199,7 +217,9 @@ class Namespace(ConfiguredBaseModel): description="""Optional string with extended full name for the namespace.""", json_schema_extra={"linkml_meta": {"alias": "full_name", "domain_of": ["Namespace"]}}, ) - version: str = Field(..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}}) + version: str = Field( + ..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}} + ) date: Optional[datetime] = Field( None, description="""Date that a namespace was last modified or released""", @@ -215,7 +235,13 @@ class Namespace(ConfiguredBaseModel): author: List[str] | str = Field( ..., description="""List of strings with the names of the authors of the namespace.""", - json_schema_extra={"linkml_meta": {"alias": "author", "domain_of": ["Namespace"], "slot_uri": "schema:author"}}, + json_schema_extra={ + "linkml_meta": { + "alias": "author", + "domain_of": ["Namespace"], + "slot_uri": "schema:author", + } + }, ) contact: List[str] | str = Field( ..., @@ -238,10 +264,13 @@ class Namespace(ConfiguredBaseModel): class Namespaces(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) namespaces: Optional[List[Namespace]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}}, ) @@ -252,29 +281,51 @@ class Schema(ConfiguredBaseModel): "rules": [ { "description": "If namespace is absent, source is required", - "postconditions": {"slot_conditions": {"source": {"name": "source", "required": True}}}, + "postconditions": { + "slot_conditions": {"source": {"name": "source", "required": True}} + }, "preconditions": { - "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}} + "slot_conditions": { + "namespace": {"name": "namespace", "value_presence": "ABSENT"} + } }, }, { "description": "If source is absent, namespace is required.", - "postconditions": {"slot_conditions": {"namespace": {"name": "namespace", "required": True}}}, - "preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}}, + "postconditions": { + "slot_conditions": {"namespace": {"name": "namespace", "required": True}} + }, + "preconditions": { + "slot_conditions": { + "source": {"name": "source", "value_presence": "ABSENT"} + } + }, }, { "description": "If namespace is present, source is cannot be", - "postconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}}, + "postconditions": { + "slot_conditions": { + "source": {"name": "source", "value_presence": "ABSENT"} + } + }, "preconditions": { - "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "PRESENT"}} + "slot_conditions": { + "namespace": {"name": "namespace", "value_presence": "PRESENT"} + } }, }, { "description": "If source is present, namespace cannot be.", "postconditions": { - "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}} + "slot_conditions": { + "namespace": {"name": "namespace", "value_presence": "ABSENT"} + } + }, + "preconditions": { + "slot_conditions": { + "source": {"name": "source", "value_presence": "PRESENT"} + } }, - "preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "PRESENT"}}}, }, ], } @@ -311,14 +362,24 @@ class Schema(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) class Group(ConfiguredBaseModel, ParentizeMixin): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) neurodata_type_def: Optional[str] = Field( None, @@ -347,7 +408,14 @@ class Group(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -368,7 +436,15 @@ class Group(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -380,21 +456,32 @@ class Group(ConfiguredBaseModel, ParentizeMixin): "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "domain_of": ["Group", "Link", "Dataset"], "ifabsent": "int(1)", - "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], + "todos": [ + "logic to check that the corresponding class can only be " + "implemented quantity times." + ], } }, ) linkable: Optional[bool] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}, ) attributes: Optional[List[Attribute]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]} + }, ) datasets: Optional[List[Dataset]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]} + }, ) groups: Optional[List[Group]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}, ) links: Optional[List[Link]] = Field( None, json_schema_extra={"linkml_meta": {"alias": "links", "domain_of": ["Group"]}} @@ -403,27 +490,41 @@ class Group(ConfiguredBaseModel, ParentizeMixin): None, exclude=True, description="""The parent group that contains this dataset or group""", - json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]} + }, ) class Groups(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) groups: Optional[List[Group]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}, ) class Link(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) name: Optional[str] = Field( None, json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -434,14 +535,24 @@ class Link(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) target_type: str = Field( ..., description="""Describes the neurodata_type of the target that the reference points to""", - json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]} + }, ) quantity: Optional[Union[QuantityEnum, int]] = Field( "1", @@ -451,27 +562,39 @@ class Link(ConfiguredBaseModel): "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "domain_of": ["Group", "Link", "Dataset"], "ifabsent": "int(1)", - "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], + "todos": [ + "logic to check that the corresponding class can only be " + "implemented quantity times." + ], } }, ) class Datasets(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) datasets: Optional[List[Dataset]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]} + }, ) class ReferenceDtype(ConfiguredBaseModel): - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( + {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"} + ) target_type: str = Field( ..., description="""Describes the neurodata_type of the target that the reference points to""", - json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]} + }, ) reftype: Optional[ReftypeOptions] = Field( None, @@ -501,7 +624,14 @@ class CompoundDtype(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -512,7 +642,15 @@ class CompoundDtype(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -535,8 +673,12 @@ class DtypeMixin(ConfiguredBaseModel): "mixin": True, "rules": [ { - "postconditions": {"slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}}, - "preconditions": {"slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}}, + "postconditions": { + "slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}} + }, + "preconditions": { + "slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}} + }, } ], } @@ -547,7 +689,11 @@ class DtypeMixin(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "dtype", - "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], + "any_of": [ + {"range": "FlatDtype"}, + {"range": "CompoundDtype"}, + {"range": "ReferenceDtype"}, + ], "domain_of": ["CompoundDtype", "DtypeMixin"], } }, @@ -571,7 +717,14 @@ class Attribute(DtypeMixin): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -611,12 +764,16 @@ class Attribute(DtypeMixin): value: Optional[Any] = Field( None, description="""Optional constant, fixed value for the attribute.""", - json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]} + }, ) default_value: Optional[Any] = Field( None, description="""Optional default value for variable-valued attributes.""", - json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]} + }, ) doc: str = Field( ..., @@ -624,14 +781,24 @@ class Attribute(DtypeMixin): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) required: Optional[bool] = Field( True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""", - json_schema_extra={"linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}}, + json_schema_extra={ + "linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"} + }, ) parent: Optional[Union[Dataset, Group]] = Field( None, @@ -650,7 +817,11 @@ class Attribute(DtypeMixin): json_schema_extra={ "linkml_meta": { "alias": "dtype", - "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], + "any_of": [ + {"range": "FlatDtype"}, + {"range": "CompoundDtype"}, + {"range": "ReferenceDtype"}, + ], "domain_of": ["CompoundDtype", "DtypeMixin"], } }, @@ -689,7 +860,14 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "name", - "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, } }, @@ -739,12 +917,16 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): value: Optional[Any] = Field( None, description="""Optional constant, fixed value for the attribute.""", - json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]} + }, ) default_value: Optional[Any] = Field( None, description="""Optional default value for variable-valued attributes.""", - json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]} + }, ) doc: str = Field( ..., @@ -752,7 +934,15 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): json_schema_extra={ "linkml_meta": { "alias": "doc", - "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], + "domain_of": [ + "Namespace", + "Schema", + "Group", + "Attribute", + "Link", + "Dataset", + "CompoundDtype", + ], } }, ) @@ -764,28 +954,41 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin): "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "domain_of": ["Group", "Link", "Dataset"], "ifabsent": "int(1)", - "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], + "todos": [ + "logic to check that the corresponding class can only be " + "implemented quantity times." + ], } }, ) linkable: Optional[bool] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}, ) attributes: Optional[List[Attribute]] = Field( - None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}} + None, + json_schema_extra={ + "linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]} + }, ) parent: Optional[Group] = Field( None, exclude=True, description="""The parent group that contains this dataset or group""", - json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}}, + json_schema_extra={ + "linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]} + }, ) dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field( None, json_schema_extra={ "linkml_meta": { "alias": "dtype", - "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], + "any_of": [ + {"range": "FlatDtype"}, + {"range": "CompoundDtype"}, + {"range": "ReferenceDtype"}, + ], "domain_of": ["CompoundDtype", "DtypeMixin"], } }, diff --git a/nwb_schema_language/src/nwb_schema_language/generator.py b/nwb_schema_language/src/nwb_schema_language/generator.py index eefad6b..38519a4 100644 --- a/nwb_schema_language/src/nwb_schema_language/generator.py +++ b/nwb_schema_language/src/nwb_schema_language/generator.py @@ -1,5 +1,9 @@ -from pathlib import Path +""" +Customization of linkml pydantic generator +""" + from dataclasses import dataclass +from pathlib import Path from linkml.generators.pydanticgen import PydanticGenerator from linkml.generators.pydanticgen.build import ClassResult @@ -9,9 +13,10 @@ from pydantic import BaseModel, model_validator class ParentizeMixin(BaseModel): + """Mixin to populate the parent field for nested datasets and groups""" @model_validator(mode="after") - def parentize(self): + def parentize(self) -> BaseModel: """Set the parent attribute for all our fields they have one""" for field_name in self.model_fields: if field_name == "parent": @@ -28,6 +33,9 @@ class ParentizeMixin(BaseModel): @dataclass class NWBSchemaLangGenerator(PydanticGenerator): + """ + Customization of linkml pydantic generator + """ def __init__(self, *args, **kwargs): kwargs["injected_classes"] = [ParentizeMixin] @@ -38,12 +46,18 @@ class NWBSchemaLangGenerator(PydanticGenerator): super().__init__(*args, **kwargs) def after_generate_class(self, cls: ClassResult, sv: SchemaView) -> ClassResult: + """ + Add the ParentizeMixin to the bases of Dataset and Group + """ if cls.cls.name in ("Dataset", "Group"): cls.cls.bases = ["ConfiguredBaseModel", "ParentizeMixin"] return cls -def generate(): +def generate() -> None: + """ + Generate pydantic models for nwb_schema_language + """ schema = Path(__file__).parent / "schema" / "nwb_schema_language.yaml" output = Path(__file__).parent / "datamodel" / "nwb_schema_pydantic.py" generator = NWBSchemaLangGenerator(schema=schema)