partially functioning rolldown, but getting some wonky results - missing attributes in nested inheritance, and the models are now extremely noisy, creating the same fields over and over even when they aren't overridden or modified by the child class. Need to redo the rolldown, make it less generic, don't dump to dicts, merge in a more targeted way.

2025-01-09 05:34:28 +00:00 · 2024-09-13 02:42:01 -07:00 · 2024-09-13 02:42:01 -07:00 · 749703e077
commit 749703e077
parent 880352d9a4
11 changed files with 489 additions and 91 deletions
--- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py
@ -17,7 +17,7 @@ from linkml_runtime.linkml_model import (
    SlotDefinition,
    TypeDefinition,
 )
-from pydantic import BaseModel
+from pydantic import BaseModel, PrivateAttr

 from nwb_linkml.logging import init_logger
 from nwb_schema_language import Attribute, CompoundDtype, Dataset, Group, Schema
@ -103,6 +103,7 @@ class Adapter(BaseModel):

    _logger: Optional[Logger] = None
    _debug: Optional[bool] = None
+    _nwb_classes: dict[str, Dataset | Group] = PrivateAttr(default_factory=dict)

    @property
    def debug(self) -> bool:
@ -135,7 +136,10 @@ class Adapter(BaseModel):

        Convenience wrapper around :meth:`.walk_field_values`
        """
-        return next(self.walk_field_values(self, "neurodata_type_def", name))
+        if name not in self._nwb_classes:
+            cls = next(self.walk_field_values(self, "neurodata_type_def", name))
+            self._nwb_classes[name] = cls
+        return self._nwb_classes[name]

    def get_model_with_field(self, field: str) -> Generator[Union[Group, Dataset], None, None]:
        """
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -616,7 +616,8 @@ class MapNVectors(DatasetMap):

    DynamicTable (and the slot VectorData where this is called for)
    is handled specially and just dropped, because we handle the possibility for
-    arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes.
+    arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes
+    (see :class:`.MapNVectorData` ).

    So really this is just a handler for the `Images` case
    """
@ -652,6 +653,40 @@ class MapNVectors(DatasetMap):
        return res


+class MapNVectorData(DatasetMap):
+    """
+    An extremely special case just for DynamicTable:
+    DynamicTable indicates that all of its extra columns are ``VectorData`` with an
+    unnamed, * quantity dataset similar to the case of :class:`.MapNVectors` .
+
+    We handle this with the :mod:`.includes.hdmf` module mixin classes instead,
+    and so to avoid generating a pointless slot and class,
+    we just catch that case and return nothing.
+    """
+
+    @classmethod
+    def check(c, cls: Dataset) -> bool:
+        """
+        Check for being an unnamed multivalued vector class that IS VectorData
+        """
+        return (
+            cls.name is None
+            and cls.neurodata_type_def is None
+            and cls.neurodata_type_inc
+            and cls.neurodata_type_inc == "VectorData"
+            and cls.quantity in ("*", "+")
+        )
+
+    @classmethod
+    def apply(
+        c, cls: Dataset, res: Optional[BuildResult] = None, name: Optional[str] = None
+    ) -> BuildResult:
+        """
+        Return ... nothing
+        """
+        return BuildResult()
+
+
 class MapCompoundDtype(DatasetMap):
    """
    A ``dtype`` declared as an array of types that function effectively as a row in a table.
--- a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py
@ -14,13 +14,13 @@ from typing import Dict, Generator, List, Optional
 from linkml_runtime.dumpers import yaml_dumper
 from linkml_runtime.linkml_model import Annotation, SchemaDefinition
 from pydantic import Field, model_validator
-import networkx as nx

 from nwb_linkml.adapters.adapter import Adapter, BuildResult
 from nwb_linkml.adapters.schema import SchemaAdapter
 from nwb_linkml.lang_elements import NwbLangSchema
 from nwb_linkml.ui import AdapterProgress
-from nwb_schema_language import Namespaces
+from nwb_linkml.util import merge_dicts
+from nwb_schema_language import Dataset, Group, Namespaces


 class NamespacesAdapter(Adapter):
@ -156,7 +156,7 @@ class NamespacesAdapter(Adapter):
                    break
        return self

-    def complete_namespaces(self):
+    def complete_namespaces(self) -> None:
        """
        After loading the namespace, and after any imports have been added afterwards,
        this must be called to complete the definitions of the contained schema objects.
@ -167,7 +167,7 @@ class NamespacesAdapter(Adapter):

        It **is** automatically called if it hasn't been already by the :meth:`.build` method.
        """
-        self.populate_imports()
+        self._populate_imports()
        self._roll_down_inheritance()

        for i in self.imported:
@ -175,7 +175,7 @@ class NamespacesAdapter(Adapter):

        self._completed = True

-    def _roll_down_inheritance(self):
+    def _roll_down_inheritance(self) -> None:
        """
        nwb-schema-language inheritance doesn't work like normal python inheritance -
        instead of inheriting everything at the 'top level' of a class, it also
@ -184,21 +184,59 @@ class NamespacesAdapter(Adapter):
        References:
            https://github.com/NeurodataWithoutBorders/pynwb/issues/1954
        """
-        pass
+        for cls in self.walk_types(self, (Group, Dataset)):
+            if not cls.neurodata_type_inc:
+                continue

-    def inheritance_graph(self) -> nx.DiGraph:
-        """
-        Make a graph of all ``neurodata_types`` in the namespace and imports such that
-        each node contains the group or dataset it describes,
-        and has directed edges pointing at all the classes that inherit from it.
+            # get parents
+            parent = self.get(cls.neurodata_type_inc)
+            parents = [parent]
+            while parent.neurodata_type_inc:
+                parent = self.get(parent.neurodata_type_inc)
+                parents.insert(0, parent)
+            parents.append(cls)

-        In the case that the inheriting class does not itself have a ``neurodata_type_def``,
-        it is
-        """
-        g = nx.DiGraph()
-        for sch in self.all_schemas():
-            for cls in sch.created_classes:
-                pass
+            # merge and cast
+            # note that we don't want to exclude_none in the model dump here,
+            # if the child class has a field completely unset, we want to inherit it
+            # from the parent without rolling it down - we are only rolling down
+            # the things that need to be modified/merged in the child
+            new_cls: dict = {}
+            for parent in parents:
+                new_cls = merge_dicts(
+                    new_cls,
+                    parent.model_dump(exclude_unset=True),
+                    list_key="name",
+                    exclude=["neurodata_type_def"],
+                )
+            new_cls: Group | Dataset = type(cls)(**new_cls)
+            new_cls.parent = cls.parent
+
+            # reinsert
+            if new_cls.parent:
+                if isinstance(cls, Dataset):
+                    new_cls.parent.datasets[new_cls.parent.datasets.index(cls)] = new_cls
+                else:
+                    new_cls.parent.groups[new_cls.parent.groups.index(cls)] = new_cls
+            else:
+                # top level class, need to go and find it
+                found = False
+                for schema in self.all_schemas():
+                    if isinstance(cls, Dataset):
+                        if cls in schema.datasets:
+                            schema.datasets[schema.datasets.index(cls)] = new_cls
+                            found = True
+                            break
+                    else:
+                        if cls in schema.groups:
+                            schema.groups[schema.groups.index(cls)] = new_cls
+                            found = True
+                            break
+                if not found:
+                    raise KeyError(
+                        f"Unable to find source schema for {cls} when reinserting after rolling"
+                        " down!"
+                    )

    def find_type_source(self, name: str) -> SchemaAdapter:
        """
@ -238,7 +276,7 @@ class NamespacesAdapter(Adapter):
        else:
            raise KeyError(f"No schema found that define {name}")

-    def populate_imports(self) -> "NamespacesAdapter":
+    def _populate_imports(self) -> "NamespacesAdapter":
        """
        Populate the imports that are needed for each schema file

@ -338,5 +376,5 @@ class NamespacesAdapter(Adapter):
        for sch in self.schemas:
            yield sch
        for imported in self.imported:
-            for sch in imported:
+            for sch in imported.schemas:
                yield sch
--- a/nwb_linkml/src/nwb_linkml/io/schema.py
+++ b/nwb_linkml/src/nwb_linkml/io/schema.py
@ -131,7 +131,7 @@ def load_namespace_adapter(
    else:
        adapter = NamespacesAdapter(namespaces=namespaces, schemas=sch)

-    adapter.populate_imports()
+    adapter.complete_namespaces()

    return adapter

--- a/nwb_linkml/src/nwb_linkml/plot.py
+++ b/nwb_linkml/src/nwb_linkml/plot.py
@ -85,7 +85,7 @@ def make_node(


 def make_graph(namespaces: "NamespacesAdapter", recurse: bool = True) -> List[CytoElement]:
-    namespaces.populate_imports()
+    namespaces.complete_namespaces()
    nodes = []
    element: Namespace | Group | Dataset
    print("walking graph")
--- a/nwb_linkml/src/nwb_linkml/providers/linkml.py
+++ b/nwb_linkml/src/nwb_linkml/providers/linkml.py
@ -127,7 +127,7 @@ class LinkMLProvider(Provider):
            for schema_needs in adapter.needed_imports.values():
                for needed in schema_needs:
                    adapter.imported.append(ns_adapters[needed])
-            adapter.populate_imports()
+            adapter.complete_namespaces()

        # then do the build
        res = {}
--- a/nwb_linkml/src/nwb_linkml/util.py
+++ b/nwb_linkml/src/nwb_linkml/util.py
@ -0,0 +1,73 @@
+"""
+The much maligned junk drawer
+"""
+
+
+def merge_dicts(
+    source: dict, target: dict, list_key: str | None = None, exclude: list[str] | None = None
+) -> dict:
+    """
+    Deeply merge nested dictionaries, replacing already-declared keys rather than
+    e.g. merging lists as well
+
+    Args:
+        source (dict): source dictionary
+        target (dict): target dictionary (values merged over source)
+        list_key (str | None): Optional: if present, merge lists of dicts using this to
+            identify matching dicts
+        exclude: (list[str] | None): Optional: if present, exclude keys from parent.
+
+    References:
+        https://stackoverflow.com/a/20666342/13113166
+
+    """
+    if exclude is None:
+        exclude = []
+    ret = {k: v for k, v in source.items() if k not in exclude}
+    for key, value in target.items():
+        if key not in ret:
+            ret[key] = value
+        elif isinstance(value, dict):
+            if key in ret:
+                ret[key] = merge_dicts(ret[key], value, list_key, exclude)
+            else:
+                ret[key] = value
+        elif isinstance(value, list) and list_key and all([isinstance(v, dict) for v in value]):
+            src_keys = {v[list_key]: ret[key].index(v) for v in ret.get(key, {}) if list_key in v}
+            target_keys = {v[list_key]: value.index(v) for v in value if list_key in v}
+
+            # all dicts not in target
+            # screwy double iteration to preserve dict order
+            new_val = [
+                ret[key][src_keys[k]]
+                for k in src_keys
+                if k in set(src_keys.keys()) - set(target_keys.keys())
+            ]
+            # all dicts not in source
+            new_val.extend(
+                [
+                    value[target_keys[k]]
+                    for k in target_keys
+                    if k in set(target_keys.keys()) - set(src_keys.keys())
+                ]
+            )
+            # merge dicts in both
+            new_val.extend(
+                [
+                    merge_dicts(ret[key][src_keys[k]], value[target_keys[k]], list_key, exclude)
+                    for k in target_keys
+                    if k in set(src_keys.keys()).intersection(set(target_keys.keys()))
+                ]
+            )
+            new_val = sorted(new_val, key=lambda i: i[list_key])
+            # add any dicts that don't have the list_key
+            # they can't be merged since they can't be matched
+            new_val.extend([v for v in ret.get(key, {}) if list_key not in v])
+            new_val.extend([v for v in value if list_key not in v])
+
+            ret[key] = new_val
+
+        else:
+            ret[key] = value
+
+    return ret
--- a/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py
+++ b/nwb_linkml/tests/test_adapters/test_adapter_namespaces.py
@ -1,7 +1,9 @@
-import pytest
 from pathlib import Path
+
+import pytest
+
 from nwb_linkml.adapters import NamespacesAdapter, SchemaAdapter
-from nwb_schema_language import Attribute, Group, Namespace, Dataset, Namespaces, Schema, FlatDtype
+from nwb_schema_language import Attribute, Dataset, FlatDtype, Group, Namespace, Namespaces, Schema


@pytest.mark.parametrize(
@ -20,7 +22,7 @@ def test_find_type_source(nwb_core_fixture, class_name, schema_file, namespace_n


 def test_populate_imports(nwb_core_fixture):
-    nwb_core_fixture.populate_imports()
+    nwb_core_fixture._populate_imports()
    schema: SchemaAdapter
    assert len(nwb_core_fixture.schemas) > 0
    for schema in nwb_core_fixture.schemas:
@ -97,14 +99,15 @@ def test_roll_down_inheritance():
        neurodata_type_def="Child",
        neurodata_type_inc="Parent",
        doc="child",
-        attributes=[Attribute(name="a", doc="a")],
+        attributes=[Attribute(name="a", doc="a", value="z")],
        datasets=[
            Dataset(
                name="data",
                doc="data again",
-                attributes=[Attribute(name="a", doc="c", value="z"), Attribute(name="c", doc="c")],
-            )
+                attributes=[Attribute(name="c", doc="c", value="z"), Attribute(name="e", doc="e")],
+            ),
        ],
+        groups=[Group(name="untyped_child", neurodata_type_inc="Parent", doc="untyped child")],
    )
    child_sch = Schema(source="child.yaml")
    child_ns = Namespaces(
@ -130,3 +133,30 @@ def test_roll_down_inheritance():
    child_ns_adapter.complete_namespaces()

    child = child_ns_adapter.get("Child")
+    # overrides simple attrs
+    assert child.doc == "child"
+    # gets unassigned parent attrs
+    assert "b" in [attr.name for attr in child.attributes]
+    # overrides values while preserving remaining values when set
+    attr_a = [attr for attr in child.attributes if attr.name == "a"][0]
+    assert attr_a.value == "z"
+    assert attr_a.dims == parent_cls.attributes[0].dims
+    assert [attr.value for attr in child.attributes if attr.name == "a"][0] == "z"
+
+    # preserve unset values in child datasets
+    assert child.datasets[0].dtype == parent_cls.datasets[0].dtype
+    assert child.datasets[0].dims == parent_cls.datasets[0].dims
+    # gets undeclared attrs in child datasets
+    assert "d" in [attr.name for attr in child.datasets[0].attributes]
+    # overrides set values in child datasets while preserving unset
+    c_attr = [attr for attr in child.datasets[0].attributes if attr.name == "c"][0]
+    assert c_attr.value == "z"
+    assert c_attr.dtype == FlatDtype.int32
+    # preserves new attrs
+    assert "e" in [attr.name for attr in child.datasets[0].attributes]
+
+    # neurodata_type_def is not included in untyped children
+    assert child.groups[0].neurodata_type_def is None
+    # we don't set any of the attrs from the parent class here because we don't override them,
+    # so we don't need to merge them, and we don't want to clutter our linkml models unnecessarily
+    assert child.groups[0].attributes is None
--- a/nwb_models/src/nwb_models/models/pydantic/hdmf_common/init.py
+++ b/nwb_models/src/nwb_models/models/pydantic/hdmf_common/init.py
@ -0,0 +1 @@
+
--- a/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
+++ b/nwb_schema_language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
@ -81,7 +81,10 @@ linkml_meta = LinkMLMeta(
        "see_also": ["https://p2p_ld.github.io/nwb-schema-language"],
        "settings": {
            "email": {"setting_key": "email", "setting_value": "\\S+@\\S+{\\.\\w}+"},
-            "protected_string": {"setting_key": "protected_string", "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$"},
+            "protected_string": {
+                "setting_key": "protected_string",
+                "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$",
+            },
        },
        "source_file": "/Users/jonny/git/p2p-ld/nwb-linkml/nwb_schema_language/src/nwb_schema_language/schema/nwb_schema_language.yaml",
        "title": "nwb-schema-language",
@ -180,7 +183,15 @@ class Namespace(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )
@ -189,7 +200,14 @@ class Namespace(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "name",
-                "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
                "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
            }
        },
@ -199,7 +217,9 @@ class Namespace(ConfiguredBaseModel):
        description="""Optional string with extended full name for the namespace.""",
        json_schema_extra={"linkml_meta": {"alias": "full_name", "domain_of": ["Namespace"]}},
    )
-    version: str = Field(..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}})
+    version: str = Field(
+        ..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}}
+    )
    date: Optional[datetime] = Field(
        None,
        description="""Date that a namespace was last modified or released""",
@ -215,7 +235,13 @@ class Namespace(ConfiguredBaseModel):
    author: List[str] | str = Field(
        ...,
        description="""List of strings with the names of the authors of the namespace.""",
-        json_schema_extra={"linkml_meta": {"alias": "author", "domain_of": ["Namespace"], "slot_uri": "schema:author"}},
+        json_schema_extra={
+            "linkml_meta": {
+                "alias": "author",
+                "domain_of": ["Namespace"],
+                "slot_uri": "schema:author",
+            }
+        },
    )
    contact: List[str] | str = Field(
        ...,
@ -238,10 +264,13 @@ class Namespace(ConfiguredBaseModel):


 class Namespaces(ConfiguredBaseModel):
-    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
+    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
+        {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
+    )

    namespaces: Optional[List[Namespace]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}}
+        None,
+        json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}},
    )


@ -252,29 +281,51 @@ class Schema(ConfiguredBaseModel):
            "rules": [
                {
                    "description": "If namespace is absent, source is required",
-                    "postconditions": {"slot_conditions": {"source": {"name": "source", "required": True}}},
+                    "postconditions": {
+                        "slot_conditions": {"source": {"name": "source", "required": True}}
+                    },
                    "preconditions": {
-                        "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}}
+                        "slot_conditions": {
+                            "namespace": {"name": "namespace", "value_presence": "ABSENT"}
+                        }
                    },
                },
                {
                    "description": "If source is absent, namespace is required.",
-                    "postconditions": {"slot_conditions": {"namespace": {"name": "namespace", "required": True}}},
-                    "preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}},
+                    "postconditions": {
+                        "slot_conditions": {"namespace": {"name": "namespace", "required": True}}
+                    },
+                    "preconditions": {
+                        "slot_conditions": {
+                            "source": {"name": "source", "value_presence": "ABSENT"}
+                        }
+                    },
                },
                {
                    "description": "If namespace is present, source is cannot be",
-                    "postconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}},
+                    "postconditions": {
+                        "slot_conditions": {
+                            "source": {"name": "source", "value_presence": "ABSENT"}
+                        }
+                    },
                    "preconditions": {
-                        "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "PRESENT"}}
+                        "slot_conditions": {
+                            "namespace": {"name": "namespace", "value_presence": "PRESENT"}
+                        }
                    },
                },
                {
                    "description": "If source is present, namespace cannot be.",
                    "postconditions": {
-                        "slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}}
+                        "slot_conditions": {
+                            "namespace": {"name": "namespace", "value_presence": "ABSENT"}
+                        }
+                    },
+                    "preconditions": {
+                        "slot_conditions": {
+                            "source": {"name": "source", "value_presence": "PRESENT"}
+                        }
                    },
-                    "preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "PRESENT"}}},
                },
            ],
        }
@ -311,14 +362,24 @@ class Schema(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )


 class Group(ConfiguredBaseModel, ParentizeMixin):
-    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
+    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
+        {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
+    )

    neurodata_type_def: Optional[str] = Field(
        None,
@ -347,7 +408,14 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "name",
-                "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
                "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
            }
        },
@ -368,7 +436,15 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )
@ -380,21 +456,32 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
                "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
                "domain_of": ["Group", "Link", "Dataset"],
                "ifabsent": "int(1)",
-                "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."],
+                "todos": [
+                    "logic to check that the corresponding class can only be "
+                    "implemented quantity times."
+                ],
            }
        },
    )
    linkable: Optional[bool] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}
+        None,
+        json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}},
    )
    attributes: Optional[List[Attribute]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}}
+        None,
+        json_schema_extra={
+            "linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}
+        },
    )
    datasets: Optional[List[Dataset]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}}
+        None,
+        json_schema_extra={
+            "linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}
+        },
    )
    groups: Optional[List[Group]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}
+        None,
+        json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}},
    )
    links: Optional[List[Link]] = Field(
        None, json_schema_extra={"linkml_meta": {"alias": "links", "domain_of": ["Group"]}}
@ -403,27 +490,41 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
        None,
        exclude=True,
        description="""The parent group that contains this dataset or group""",
-        json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}
+        },
    )


 class Groups(ConfiguredBaseModel):
-    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
+    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
+        {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
+    )

    groups: Optional[List[Group]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}
+        None,
+        json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}},
    )


 class Link(ConfiguredBaseModel):
-    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
+    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
+        {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
+    )

    name: Optional[str] = Field(
        None,
        json_schema_extra={
            "linkml_meta": {
                "alias": "name",
-                "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
                "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
            }
        },
@ -434,14 +535,24 @@ class Link(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )
    target_type: str = Field(
        ...,
        description="""Describes the neurodata_type of the target that the reference points to""",
-        json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}
+        },
    )
    quantity: Optional[Union[QuantityEnum, int]] = Field(
        "1",
@ -451,27 +562,39 @@ class Link(ConfiguredBaseModel):
                "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
                "domain_of": ["Group", "Link", "Dataset"],
                "ifabsent": "int(1)",
-                "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."],
+                "todos": [
+                    "logic to check that the corresponding class can only be "
+                    "implemented quantity times."
+                ],
            }
        },
    )


 class Datasets(ConfiguredBaseModel):
-    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
+    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
+        {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
+    )

    datasets: Optional[List[Dataset]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}}
+        None,
+        json_schema_extra={
+            "linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}
+        },
    )


 class ReferenceDtype(ConfiguredBaseModel):
-    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
+    linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
+        {"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
+    )

    target_type: str = Field(
        ...,
        description="""Describes the neurodata_type of the target that the reference points to""",
-        json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}
+        },
    )
    reftype: Optional[ReftypeOptions] = Field(
        None,
@ -501,7 +624,14 @@ class CompoundDtype(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "name",
-                "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
                "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
            }
        },
@ -512,7 +642,15 @@ class CompoundDtype(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )
@ -535,8 +673,12 @@ class DtypeMixin(ConfiguredBaseModel):
            "mixin": True,
            "rules": [
                {
-                    "postconditions": {"slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}},
-                    "preconditions": {"slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}},
+                    "postconditions": {
+                        "slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}
+                    },
+                    "preconditions": {
+                        "slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}
+                    },
                }
            ],
        }
@ -547,7 +689,11 @@ class DtypeMixin(ConfiguredBaseModel):
        json_schema_extra={
            "linkml_meta": {
                "alias": "dtype",
-                "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}],
+                "any_of": [
+                    {"range": "FlatDtype"},
+                    {"range": "CompoundDtype"},
+                    {"range": "ReferenceDtype"},
+                ],
                "domain_of": ["CompoundDtype", "DtypeMixin"],
            }
        },
@ -571,7 +717,14 @@ class Attribute(DtypeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "name",
-                "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
                "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
            }
        },
@ -611,12 +764,16 @@ class Attribute(DtypeMixin):
    value: Optional[Any] = Field(
        None,
        description="""Optional constant, fixed value for the attribute.""",
-        json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}
+        },
    )
    default_value: Optional[Any] = Field(
        None,
        description="""Optional default value for variable-valued attributes.""",
-        json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}
+        },
    )
    doc: str = Field(
        ...,
@ -624,14 +781,24 @@ class Attribute(DtypeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )
    required: Optional[bool] = Field(
        True,
        description="""Optional boolean key describing whether the attribute is required. Default value is True.""",
-        json_schema_extra={"linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}
+        },
    )
    parent: Optional[Union[Dataset, Group]] = Field(
        None,
@ -650,7 +817,11 @@ class Attribute(DtypeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "dtype",
-                "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}],
+                "any_of": [
+                    {"range": "FlatDtype"},
+                    {"range": "CompoundDtype"},
+                    {"range": "ReferenceDtype"},
+                ],
                "domain_of": ["CompoundDtype", "DtypeMixin"],
            }
        },
@ -689,7 +860,14 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "name",
-                "domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
                "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
            }
        },
@ -739,12 +917,16 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
    value: Optional[Any] = Field(
        None,
        description="""Optional constant, fixed value for the attribute.""",
-        json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}
+        },
    )
    default_value: Optional[Any] = Field(
        None,
        description="""Optional default value for variable-valued attributes.""",
-        json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}
+        },
    )
    doc: str = Field(
        ...,
@ -752,7 +934,15 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
        json_schema_extra={
            "linkml_meta": {
                "alias": "doc",
-                "domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
+                "domain_of": [
+                    "Namespace",
+                    "Schema",
+                    "Group",
+                    "Attribute",
+                    "Link",
+                    "Dataset",
+                    "CompoundDtype",
+                ],
            }
        },
    )
@ -764,28 +954,41 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
                "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
                "domain_of": ["Group", "Link", "Dataset"],
                "ifabsent": "int(1)",
-                "todos": ["logic to check that the corresponding class can only be " "implemented quantity times."],
+                "todos": [
+                    "logic to check that the corresponding class can only be "
+                    "implemented quantity times."
+                ],
            }
        },
    )
    linkable: Optional[bool] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}
+        None,
+        json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}},
    )
    attributes: Optional[List[Attribute]] = Field(
-        None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}}
+        None,
+        json_schema_extra={
+            "linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}
+        },
    )
    parent: Optional[Group] = Field(
        None,
        exclude=True,
        description="""The parent group that contains this dataset or group""",
-        json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}},
+        json_schema_extra={
+            "linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}
+        },
    )
    dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(
        None,
        json_schema_extra={
            "linkml_meta": {
                "alias": "dtype",
-                "any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}],
+                "any_of": [
+                    {"range": "FlatDtype"},
+                    {"range": "CompoundDtype"},
+                    {"range": "ReferenceDtype"},
+                ],
                "domain_of": ["CompoundDtype", "DtypeMixin"],
            }
        },
--- a/nwb_schema_language/src/nwb_schema_language/generator.py
+++ b/nwb_schema_language/src/nwb_schema_language/generator.py
@ -1,5 +1,9 @@
-from pathlib import Path
+"""
+Customization of linkml pydantic generator
+"""
+
 from dataclasses import dataclass
+from pathlib import Path

 from linkml.generators.pydanticgen import PydanticGenerator
 from linkml.generators.pydanticgen.build import ClassResult
@ -9,9 +13,10 @@ from pydantic import BaseModel, model_validator


 class ParentizeMixin(BaseModel):
+    """Mixin to populate the parent field for nested datasets and groups"""

    @model_validator(mode="after")
-    def parentize(self):
+    def parentize(self) -> BaseModel:
        """Set the parent attribute for all our fields they have one"""
        for field_name in self.model_fields:
            if field_name == "parent":
@ -28,6 +33,9 @@ class ParentizeMixin(BaseModel):

@dataclass
 class NWBSchemaLangGenerator(PydanticGenerator):
+    """
+    Customization of linkml pydantic generator
+    """

    def __init__(self, *args, **kwargs):
        kwargs["injected_classes"] = [ParentizeMixin]
@ -38,12 +46,18 @@ class NWBSchemaLangGenerator(PydanticGenerator):
        super().__init__(*args, **kwargs)

    def after_generate_class(self, cls: ClassResult, sv: SchemaView) -> ClassResult:
+        """
+        Add the ParentizeMixin to the bases of Dataset and Group
+        """
        if cls.cls.name in ("Dataset", "Group"):
            cls.cls.bases = ["ConfiguredBaseModel", "ParentizeMixin"]
        return cls


-def generate():
+def generate() -> None:
+    """
+    Generate pydantic models for nwb_schema_language
+    """
    schema = Path(__file__).parent / "schema" / "nwb_schema_language.yaml"
    output = Path(__file__).parent / "datamodel" / "nwb_schema_pydantic.py"
    generator = NWBSchemaLangGenerator(schema=schema)