partially functioning rolldown, but getting some wonky results - missing attributes in nested inheritance, and the models are now extremely noisy, creating the same fields over and over even when they aren't overridden or modified by the child class. Need to redo the rolldown, make it less generic, don't dump to dicts, merge in a more targeted way.

This commit is contained in:
sneakers-the-rat 2024-09-13 02:42:01 -07:00
parent 880352d9a4
commit 749703e077
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
11 changed files with 489 additions and 91 deletions

View file

@ -17,7 +17,7 @@ from linkml_runtime.linkml_model import (
SlotDefinition, SlotDefinition,
TypeDefinition, TypeDefinition,
) )
from pydantic import BaseModel from pydantic import BaseModel, PrivateAttr
from nwb_linkml.logging import init_logger from nwb_linkml.logging import init_logger
from nwb_schema_language import Attribute, CompoundDtype, Dataset, Group, Schema from nwb_schema_language import Attribute, CompoundDtype, Dataset, Group, Schema
@ -103,6 +103,7 @@ class Adapter(BaseModel):
_logger: Optional[Logger] = None _logger: Optional[Logger] = None
_debug: Optional[bool] = None _debug: Optional[bool] = None
_nwb_classes: dict[str, Dataset | Group] = PrivateAttr(default_factory=dict)
@property @property
def debug(self) -> bool: def debug(self) -> bool:
@ -135,7 +136,10 @@ class Adapter(BaseModel):
Convenience wrapper around :meth:`.walk_field_values` Convenience wrapper around :meth:`.walk_field_values`
""" """
return next(self.walk_field_values(self, "neurodata_type_def", name)) if name not in self._nwb_classes:
cls = next(self.walk_field_values(self, "neurodata_type_def", name))
self._nwb_classes[name] = cls
return self._nwb_classes[name]
def get_model_with_field(self, field: str) -> Generator[Union[Group, Dataset], None, None]: def get_model_with_field(self, field: str) -> Generator[Union[Group, Dataset], None, None]:
""" """

View file

@ -616,7 +616,8 @@ class MapNVectors(DatasetMap):
DynamicTable (and the slot VectorData where this is called for) DynamicTable (and the slot VectorData where this is called for)
is handled specially and just dropped, because we handle the possibility for is handled specially and just dropped, because we handle the possibility for
arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes. arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes
(see :class:`.MapNVectorData` ).
So really this is just a handler for the `Images` case So really this is just a handler for the `Images` case
""" """
@ -652,6 +653,40 @@ class MapNVectors(DatasetMap):
return res return res
class MapNVectorData(DatasetMap):
"""
An extremely special case just for DynamicTable:
DynamicTable indicates that all of its extra columns are ``VectorData`` with an
unnamed, * quantity dataset similar to the case of :class:`.MapNVectors` .
We handle this with the :mod:`.includes.hdmf` module mixin classes instead,
and so to avoid generating a pointless slot and class,
we just catch that case and return nothing.
"""
@classmethod
def check(c, cls: Dataset) -> bool:
"""
Check for being an unnamed multivalued vector class that IS VectorData
"""
return (
cls.name is None
and cls.neurodata_type_def is None
and cls.neurodata_type_inc
and cls.neurodata_type_inc == "VectorData"
and cls.quantity in ("*", "+")
)
@classmethod
def apply(
c, cls: Dataset, res: Optional[BuildResult] = None, name: Optional[str] = None
) -> BuildResult:
"""
Return ... nothing
"""
return BuildResult()
class MapCompoundDtype(DatasetMap): class MapCompoundDtype(DatasetMap):
""" """
A ``dtype`` declared as an array of types that function effectively as a row in a table. A ``dtype`` declared as an array of types that function effectively as a row in a table.

View file

@ -14,13 +14,13 @@ from typing import Dict, Generator, List, Optional
from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.linkml_model import Annotation, SchemaDefinition from linkml_runtime.linkml_model import Annotation, SchemaDefinition
from pydantic import Field, model_validator from pydantic import Field, model_validator
import networkx as nx
from nwb_linkml.adapters.adapter import Adapter, BuildResult from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.adapters.schema import SchemaAdapter
from nwb_linkml.lang_elements import NwbLangSchema from nwb_linkml.lang_elements import NwbLangSchema
from nwb_linkml.ui import AdapterProgress from nwb_linkml.ui import AdapterProgress
from nwb_schema_language import Namespaces from nwb_linkml.util import merge_dicts
from nwb_schema_language import Dataset, Group, Namespaces
class NamespacesAdapter(Adapter): class NamespacesAdapter(Adapter):
@ -156,7 +156,7 @@ class NamespacesAdapter(Adapter):
break break
return self return self
def complete_namespaces(self): def complete_namespaces(self) -> None:
""" """
After loading the namespace, and after any imports have been added afterwards, After loading the namespace, and after any imports have been added afterwards,
this must be called to complete the definitions of the contained schema objects. this must be called to complete the definitions of the contained schema objects.
@ -167,7 +167,7 @@ class NamespacesAdapter(Adapter):
It **is** automatically called if it hasn't been already by the :meth:`.build` method. It **is** automatically called if it hasn't been already by the :meth:`.build` method.
""" """
self.populate_imports() self._populate_imports()
self._roll_down_inheritance() self._roll_down_inheritance()
for i in self.imported: for i in self.imported:
@ -175,7 +175,7 @@ class NamespacesAdapter(Adapter):
self._completed = True self._completed = True
def _roll_down_inheritance(self): def _roll_down_inheritance(self) -> None:
""" """
nwb-schema-language inheritance doesn't work like normal python inheritance - nwb-schema-language inheritance doesn't work like normal python inheritance -
instead of inheriting everything at the 'top level' of a class, it also instead of inheriting everything at the 'top level' of a class, it also
@ -184,21 +184,59 @@ class NamespacesAdapter(Adapter):
References: References:
https://github.com/NeurodataWithoutBorders/pynwb/issues/1954 https://github.com/NeurodataWithoutBorders/pynwb/issues/1954
""" """
pass for cls in self.walk_types(self, (Group, Dataset)):
if not cls.neurodata_type_inc:
continue
def inheritance_graph(self) -> nx.DiGraph: # get parents
""" parent = self.get(cls.neurodata_type_inc)
Make a graph of all ``neurodata_types`` in the namespace and imports such that parents = [parent]
each node contains the group or dataset it describes, while parent.neurodata_type_inc:
and has directed edges pointing at all the classes that inherit from it. parent = self.get(parent.neurodata_type_inc)
parents.insert(0, parent)
parents.append(cls)
In the case that the inheriting class does not itself have a ``neurodata_type_def``, # merge and cast
it is # note that we don't want to exclude_none in the model dump here,
""" # if the child class has a field completely unset, we want to inherit it
g = nx.DiGraph() # from the parent without rolling it down - we are only rolling down
for sch in self.all_schemas(): # the things that need to be modified/merged in the child
for cls in sch.created_classes: new_cls: dict = {}
pass for parent in parents:
new_cls = merge_dicts(
new_cls,
parent.model_dump(exclude_unset=True),
list_key="name",
exclude=["neurodata_type_def"],
)
new_cls: Group | Dataset = type(cls)(**new_cls)
new_cls.parent = cls.parent
# reinsert
if new_cls.parent:
if isinstance(cls, Dataset):
new_cls.parent.datasets[new_cls.parent.datasets.index(cls)] = new_cls
else:
new_cls.parent.groups[new_cls.parent.groups.index(cls)] = new_cls
else:
# top level class, need to go and find it
found = False
for schema in self.all_schemas():
if isinstance(cls, Dataset):
if cls in schema.datasets:
schema.datasets[schema.datasets.index(cls)] = new_cls
found = True
break
else:
if cls in schema.groups:
schema.groups[schema.groups.index(cls)] = new_cls
found = True
break
if not found:
raise KeyError(
f"Unable to find source schema for {cls} when reinserting after rolling"
" down!"
)
def find_type_source(self, name: str) -> SchemaAdapter: def find_type_source(self, name: str) -> SchemaAdapter:
""" """
@ -238,7 +276,7 @@ class NamespacesAdapter(Adapter):
else: else:
raise KeyError(f"No schema found that define {name}") raise KeyError(f"No schema found that define {name}")
def populate_imports(self) -> "NamespacesAdapter": def _populate_imports(self) -> "NamespacesAdapter":
""" """
Populate the imports that are needed for each schema file Populate the imports that are needed for each schema file
@ -338,5 +376,5 @@ class NamespacesAdapter(Adapter):
for sch in self.schemas: for sch in self.schemas:
yield sch yield sch
for imported in self.imported: for imported in self.imported:
for sch in imported: for sch in imported.schemas:
yield sch yield sch

View file

@ -131,7 +131,7 @@ def load_namespace_adapter(
else: else:
adapter = NamespacesAdapter(namespaces=namespaces, schemas=sch) adapter = NamespacesAdapter(namespaces=namespaces, schemas=sch)
adapter.populate_imports() adapter.complete_namespaces()
return adapter return adapter

View file

@ -85,7 +85,7 @@ def make_node(
def make_graph(namespaces: "NamespacesAdapter", recurse: bool = True) -> List[CytoElement]: def make_graph(namespaces: "NamespacesAdapter", recurse: bool = True) -> List[CytoElement]:
namespaces.populate_imports() namespaces.complete_namespaces()
nodes = [] nodes = []
element: Namespace | Group | Dataset element: Namespace | Group | Dataset
print("walking graph") print("walking graph")

View file

@ -127,7 +127,7 @@ class LinkMLProvider(Provider):
for schema_needs in adapter.needed_imports.values(): for schema_needs in adapter.needed_imports.values():
for needed in schema_needs: for needed in schema_needs:
adapter.imported.append(ns_adapters[needed]) adapter.imported.append(ns_adapters[needed])
adapter.populate_imports() adapter.complete_namespaces()
# then do the build # then do the build
res = {} res = {}

View file

@ -0,0 +1,73 @@
"""
The much maligned junk drawer
"""
def merge_dicts(
source: dict, target: dict, list_key: str | None = None, exclude: list[str] | None = None
) -> dict:
"""
Deeply merge nested dictionaries, replacing already-declared keys rather than
e.g. merging lists as well
Args:
source (dict): source dictionary
target (dict): target dictionary (values merged over source)
list_key (str | None): Optional: if present, merge lists of dicts using this to
identify matching dicts
exclude: (list[str] | None): Optional: if present, exclude keys from parent.
References:
https://stackoverflow.com/a/20666342/13113166
"""
if exclude is None:
exclude = []
ret = {k: v for k, v in source.items() if k not in exclude}
for key, value in target.items():
if key not in ret:
ret[key] = value
elif isinstance(value, dict):
if key in ret:
ret[key] = merge_dicts(ret[key], value, list_key, exclude)
else:
ret[key] = value
elif isinstance(value, list) and list_key and all([isinstance(v, dict) for v in value]):
src_keys = {v[list_key]: ret[key].index(v) for v in ret.get(key, {}) if list_key in v}
target_keys = {v[list_key]: value.index(v) for v in value if list_key in v}
# all dicts not in target
# screwy double iteration to preserve dict order
new_val = [
ret[key][src_keys[k]]
for k in src_keys
if k in set(src_keys.keys()) - set(target_keys.keys())
]
# all dicts not in source
new_val.extend(
[
value[target_keys[k]]
for k in target_keys
if k in set(target_keys.keys()) - set(src_keys.keys())
]
)
# merge dicts in both
new_val.extend(
[
merge_dicts(ret[key][src_keys[k]], value[target_keys[k]], list_key, exclude)
for k in target_keys
if k in set(src_keys.keys()).intersection(set(target_keys.keys()))
]
)
new_val = sorted(new_val, key=lambda i: i[list_key])
# add any dicts that don't have the list_key
# they can't be merged since they can't be matched
new_val.extend([v for v in ret.get(key, {}) if list_key not in v])
new_val.extend([v for v in value if list_key not in v])
ret[key] = new_val
else:
ret[key] = value
return ret

View file

@ -1,7 +1,9 @@
import pytest
from pathlib import Path from pathlib import Path
import pytest
from nwb_linkml.adapters import NamespacesAdapter, SchemaAdapter from nwb_linkml.adapters import NamespacesAdapter, SchemaAdapter
from nwb_schema_language import Attribute, Group, Namespace, Dataset, Namespaces, Schema, FlatDtype from nwb_schema_language import Attribute, Dataset, FlatDtype, Group, Namespace, Namespaces, Schema
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -20,7 +22,7 @@ def test_find_type_source(nwb_core_fixture, class_name, schema_file, namespace_n
def test_populate_imports(nwb_core_fixture): def test_populate_imports(nwb_core_fixture):
nwb_core_fixture.populate_imports() nwb_core_fixture._populate_imports()
schema: SchemaAdapter schema: SchemaAdapter
assert len(nwb_core_fixture.schemas) > 0 assert len(nwb_core_fixture.schemas) > 0
for schema in nwb_core_fixture.schemas: for schema in nwb_core_fixture.schemas:
@ -97,14 +99,15 @@ def test_roll_down_inheritance():
neurodata_type_def="Child", neurodata_type_def="Child",
neurodata_type_inc="Parent", neurodata_type_inc="Parent",
doc="child", doc="child",
attributes=[Attribute(name="a", doc="a")], attributes=[Attribute(name="a", doc="a", value="z")],
datasets=[ datasets=[
Dataset( Dataset(
name="data", name="data",
doc="data again", doc="data again",
attributes=[Attribute(name="a", doc="c", value="z"), Attribute(name="c", doc="c")], attributes=[Attribute(name="c", doc="c", value="z"), Attribute(name="e", doc="e")],
) ),
], ],
groups=[Group(name="untyped_child", neurodata_type_inc="Parent", doc="untyped child")],
) )
child_sch = Schema(source="child.yaml") child_sch = Schema(source="child.yaml")
child_ns = Namespaces( child_ns = Namespaces(
@ -130,3 +133,30 @@ def test_roll_down_inheritance():
child_ns_adapter.complete_namespaces() child_ns_adapter.complete_namespaces()
child = child_ns_adapter.get("Child") child = child_ns_adapter.get("Child")
# overrides simple attrs
assert child.doc == "child"
# gets unassigned parent attrs
assert "b" in [attr.name for attr in child.attributes]
# overrides values while preserving remaining values when set
attr_a = [attr for attr in child.attributes if attr.name == "a"][0]
assert attr_a.value == "z"
assert attr_a.dims == parent_cls.attributes[0].dims
assert [attr.value for attr in child.attributes if attr.name == "a"][0] == "z"
# preserve unset values in child datasets
assert child.datasets[0].dtype == parent_cls.datasets[0].dtype
assert child.datasets[0].dims == parent_cls.datasets[0].dims
# gets undeclared attrs in child datasets
assert "d" in [attr.name for attr in child.datasets[0].attributes]
# overrides set values in child datasets while preserving unset
c_attr = [attr for attr in child.datasets[0].attributes if attr.name == "c"][0]
assert c_attr.value == "z"
assert c_attr.dtype == FlatDtype.int32
# preserves new attrs
assert "e" in [attr.name for attr in child.datasets[0].attributes]
# neurodata_type_def is not included in untyped children
assert child.groups[0].neurodata_type_def is None
# we don't set any of the attrs from the parent class here because we don't override them,
# so we don't need to merge them, and we don't want to clutter our linkml models unnecessarily
assert child.groups[0].attributes is None

View file

@ -81,7 +81,10 @@ linkml_meta = LinkMLMeta(
"see_also": ["https://p2p_ld.github.io/nwb-schema-language"], "see_also": ["https://p2p_ld.github.io/nwb-schema-language"],
"settings": { "settings": {
"email": {"setting_key": "email", "setting_value": "\\S+@\\S+{\\.\\w}+"}, "email": {"setting_key": "email", "setting_value": "\\S+@\\S+{\\.\\w}+"},
"protected_string": {"setting_key": "protected_string", "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$"}, "protected_string": {
"setting_key": "protected_string",
"setting_value": "^[A-Za-z_][A-Za-z0-9_]*$",
},
}, },
"source_file": "/Users/jonny/git/p2p-ld/nwb-linkml/nwb_schema_language/src/nwb_schema_language/schema/nwb_schema_language.yaml", "source_file": "/Users/jonny/git/p2p-ld/nwb-linkml/nwb_schema_language/src/nwb_schema_language/schema/nwb_schema_language.yaml",
"title": "nwb-schema-language", "title": "nwb-schema-language",
@ -180,7 +183,15 @@ class Namespace(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
@ -189,7 +200,14 @@ class Namespace(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "name", "alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
} }
}, },
@ -199,7 +217,9 @@ class Namespace(ConfiguredBaseModel):
description="""Optional string with extended full name for the namespace.""", description="""Optional string with extended full name for the namespace.""",
json_schema_extra={"linkml_meta": {"alias": "full_name", "domain_of": ["Namespace"]}}, json_schema_extra={"linkml_meta": {"alias": "full_name", "domain_of": ["Namespace"]}},
) )
version: str = Field(..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}}) version: str = Field(
..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}}
)
date: Optional[datetime] = Field( date: Optional[datetime] = Field(
None, None,
description="""Date that a namespace was last modified or released""", description="""Date that a namespace was last modified or released""",
@ -215,7 +235,13 @@ class Namespace(ConfiguredBaseModel):
author: List[str] | str = Field( author: List[str] | str = Field(
..., ...,
description="""List of strings with the names of the authors of the namespace.""", description="""List of strings with the names of the authors of the namespace.""",
json_schema_extra={"linkml_meta": {"alias": "author", "domain_of": ["Namespace"], "slot_uri": "schema:author"}}, json_schema_extra={
"linkml_meta": {
"alias": "author",
"domain_of": ["Namespace"],
"slot_uri": "schema:author",
}
},
) )
contact: List[str] | str = Field( contact: List[str] | str = Field(
..., ...,
@ -238,10 +264,13 @@ class Namespace(ConfiguredBaseModel):
class Namespaces(ConfiguredBaseModel): class Namespaces(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
)
namespaces: Optional[List[Namespace]] = Field( namespaces: Optional[List[Namespace]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}} None,
json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}},
) )
@ -252,29 +281,51 @@ class Schema(ConfiguredBaseModel):
"rules": [ "rules": [
{ {
"description": "If namespace is absent, source is required", "description": "If namespace is absent, source is required",
"postconditions": {"slot_conditions": {"source": {"name": "source", "required": True}}}, "postconditions": {
"slot_conditions": {"source": {"name": "source", "required": True}}
},
"preconditions": { "preconditions": {
"slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}} "slot_conditions": {
"namespace": {"name": "namespace", "value_presence": "ABSENT"}
}
}, },
}, },
{ {
"description": "If source is absent, namespace is required.", "description": "If source is absent, namespace is required.",
"postconditions": {"slot_conditions": {"namespace": {"name": "namespace", "required": True}}}, "postconditions": {
"preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}}, "slot_conditions": {"namespace": {"name": "namespace", "required": True}}
},
"preconditions": {
"slot_conditions": {
"source": {"name": "source", "value_presence": "ABSENT"}
}
},
}, },
{ {
"description": "If namespace is present, source is cannot be", "description": "If namespace is present, source is cannot be",
"postconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}}, "postconditions": {
"slot_conditions": {
"source": {"name": "source", "value_presence": "ABSENT"}
}
},
"preconditions": { "preconditions": {
"slot_conditions": {"namespace": {"name": "namespace", "value_presence": "PRESENT"}} "slot_conditions": {
"namespace": {"name": "namespace", "value_presence": "PRESENT"}
}
}, },
}, },
{ {
"description": "If source is present, namespace cannot be.", "description": "If source is present, namespace cannot be.",
"postconditions": { "postconditions": {
"slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}} "slot_conditions": {
"namespace": {"name": "namespace", "value_presence": "ABSENT"}
}
},
"preconditions": {
"slot_conditions": {
"source": {"name": "source", "value_presence": "PRESENT"}
}
}, },
"preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "PRESENT"}}},
}, },
], ],
} }
@ -311,14 +362,24 @@ class Schema(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
class Group(ConfiguredBaseModel, ParentizeMixin): class Group(ConfiguredBaseModel, ParentizeMixin):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
)
neurodata_type_def: Optional[str] = Field( neurodata_type_def: Optional[str] = Field(
None, None,
@ -347,7 +408,14 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "name", "alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
} }
}, },
@ -368,7 +436,15 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
@ -380,21 +456,32 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
"any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
"domain_of": ["Group", "Link", "Dataset"], "domain_of": ["Group", "Link", "Dataset"],
"ifabsent": "int(1)", "ifabsent": "int(1)",
"todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], "todos": [
"logic to check that the corresponding class can only be "
"implemented quantity times."
],
} }
}, },
) )
linkable: Optional[bool] = Field( linkable: Optional[bool] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}} None,
json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}},
) )
attributes: Optional[List[Attribute]] = Field( attributes: Optional[List[Attribute]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}} None,
json_schema_extra={
"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}
},
) )
datasets: Optional[List[Dataset]] = Field( datasets: Optional[List[Dataset]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}} None,
json_schema_extra={
"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}
},
) )
groups: Optional[List[Group]] = Field( groups: Optional[List[Group]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}} None,
json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}},
) )
links: Optional[List[Link]] = Field( links: Optional[List[Link]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "links", "domain_of": ["Group"]}} None, json_schema_extra={"linkml_meta": {"alias": "links", "domain_of": ["Group"]}}
@ -403,27 +490,41 @@ class Group(ConfiguredBaseModel, ParentizeMixin):
None, None,
exclude=True, exclude=True,
description="""The parent group that contains this dataset or group""", description="""The parent group that contains this dataset or group""",
json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}}, json_schema_extra={
"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}
},
) )
class Groups(ConfiguredBaseModel): class Groups(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
)
groups: Optional[List[Group]] = Field( groups: Optional[List[Group]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}} None,
json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}},
) )
class Link(ConfiguredBaseModel): class Link(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
)
name: Optional[str] = Field( name: Optional[str] = Field(
None, None,
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "name", "alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
} }
}, },
@ -434,14 +535,24 @@ class Link(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
target_type: str = Field( target_type: str = Field(
..., ...,
description="""Describes the neurodata_type of the target that the reference points to""", description="""Describes the neurodata_type of the target that the reference points to""",
json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}}, json_schema_extra={
"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}
},
) )
quantity: Optional[Union[QuantityEnum, int]] = Field( quantity: Optional[Union[QuantityEnum, int]] = Field(
"1", "1",
@ -451,27 +562,39 @@ class Link(ConfiguredBaseModel):
"any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
"domain_of": ["Group", "Link", "Dataset"], "domain_of": ["Group", "Link", "Dataset"],
"ifabsent": "int(1)", "ifabsent": "int(1)",
"todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], "todos": [
"logic to check that the corresponding class can only be "
"implemented quantity times."
],
} }
}, },
) )
class Datasets(ConfiguredBaseModel): class Datasets(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
)
datasets: Optional[List[Dataset]] = Field( datasets: Optional[List[Dataset]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}} None,
json_schema_extra={
"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}
},
) )
class ReferenceDtype(ConfiguredBaseModel): class ReferenceDtype(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}) linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"}
)
target_type: str = Field( target_type: str = Field(
..., ...,
description="""Describes the neurodata_type of the target that the reference points to""", description="""Describes the neurodata_type of the target that the reference points to""",
json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}}, json_schema_extra={
"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}
},
) )
reftype: Optional[ReftypeOptions] = Field( reftype: Optional[ReftypeOptions] = Field(
None, None,
@ -501,7 +624,14 @@ class CompoundDtype(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "name", "alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
} }
}, },
@ -512,7 +642,15 @@ class CompoundDtype(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
@ -535,8 +673,12 @@ class DtypeMixin(ConfiguredBaseModel):
"mixin": True, "mixin": True,
"rules": [ "rules": [
{ {
"postconditions": {"slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}}, "postconditions": {
"preconditions": {"slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}}, "slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}
},
"preconditions": {
"slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}
},
} }
], ],
} }
@ -547,7 +689,11 @@ class DtypeMixin(ConfiguredBaseModel):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "dtype", "alias": "dtype",
"any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], "any_of": [
{"range": "FlatDtype"},
{"range": "CompoundDtype"},
{"range": "ReferenceDtype"},
],
"domain_of": ["CompoundDtype", "DtypeMixin"], "domain_of": ["CompoundDtype", "DtypeMixin"],
} }
}, },
@ -571,7 +717,14 @@ class Attribute(DtypeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "name", "alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
} }
}, },
@ -611,12 +764,16 @@ class Attribute(DtypeMixin):
value: Optional[Any] = Field( value: Optional[Any] = Field(
None, None,
description="""Optional constant, fixed value for the attribute.""", description="""Optional constant, fixed value for the attribute.""",
json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}}, json_schema_extra={
"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}
},
) )
default_value: Optional[Any] = Field( default_value: Optional[Any] = Field(
None, None,
description="""Optional default value for variable-valued attributes.""", description="""Optional default value for variable-valued attributes.""",
json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}}, json_schema_extra={
"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}
},
) )
doc: str = Field( doc: str = Field(
..., ...,
@ -624,14 +781,24 @@ class Attribute(DtypeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
required: Optional[bool] = Field( required: Optional[bool] = Field(
True, True,
description="""Optional boolean key describing whether the attribute is required. Default value is True.""", description="""Optional boolean key describing whether the attribute is required. Default value is True.""",
json_schema_extra={"linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}}, json_schema_extra={
"linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}
},
) )
parent: Optional[Union[Dataset, Group]] = Field( parent: Optional[Union[Dataset, Group]] = Field(
None, None,
@ -650,7 +817,11 @@ class Attribute(DtypeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "dtype", "alias": "dtype",
"any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], "any_of": [
{"range": "FlatDtype"},
{"range": "CompoundDtype"},
{"range": "ReferenceDtype"},
],
"domain_of": ["CompoundDtype", "DtypeMixin"], "domain_of": ["CompoundDtype", "DtypeMixin"],
} }
}, },
@ -689,7 +860,14 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "name", "alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"}, "structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
} }
}, },
@ -739,12 +917,16 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
value: Optional[Any] = Field( value: Optional[Any] = Field(
None, None,
description="""Optional constant, fixed value for the attribute.""", description="""Optional constant, fixed value for the attribute.""",
json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}}, json_schema_extra={
"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}
},
) )
default_value: Optional[Any] = Field( default_value: Optional[Any] = Field(
None, None,
description="""Optional default value for variable-valued attributes.""", description="""Optional default value for variable-valued attributes.""",
json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}}, json_schema_extra={
"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}
},
) )
doc: str = Field( doc: str = Field(
..., ...,
@ -752,7 +934,15 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "doc", "alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"], "domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
"Dataset",
"CompoundDtype",
],
} }
}, },
) )
@ -764,28 +954,41 @@ class Dataset(ConfiguredBaseModel, ParentizeMixin):
"any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}], "any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
"domain_of": ["Group", "Link", "Dataset"], "domain_of": ["Group", "Link", "Dataset"],
"ifabsent": "int(1)", "ifabsent": "int(1)",
"todos": ["logic to check that the corresponding class can only be " "implemented quantity times."], "todos": [
"logic to check that the corresponding class can only be "
"implemented quantity times."
],
} }
}, },
) )
linkable: Optional[bool] = Field( linkable: Optional[bool] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}} None,
json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}},
) )
attributes: Optional[List[Attribute]] = Field( attributes: Optional[List[Attribute]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}} None,
json_schema_extra={
"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}
},
) )
parent: Optional[Group] = Field( parent: Optional[Group] = Field(
None, None,
exclude=True, exclude=True,
description="""The parent group that contains this dataset or group""", description="""The parent group that contains this dataset or group""",
json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}}, json_schema_extra={
"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}
},
) )
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field( dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(
None, None,
json_schema_extra={ json_schema_extra={
"linkml_meta": { "linkml_meta": {
"alias": "dtype", "alias": "dtype",
"any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}], "any_of": [
{"range": "FlatDtype"},
{"range": "CompoundDtype"},
{"range": "ReferenceDtype"},
],
"domain_of": ["CompoundDtype", "DtypeMixin"], "domain_of": ["CompoundDtype", "DtypeMixin"],
} }
}, },

View file

@ -1,5 +1,9 @@
from pathlib import Path """
Customization of linkml pydantic generator
"""
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from linkml.generators.pydanticgen import PydanticGenerator from linkml.generators.pydanticgen import PydanticGenerator
from linkml.generators.pydanticgen.build import ClassResult from linkml.generators.pydanticgen.build import ClassResult
@ -9,9 +13,10 @@ from pydantic import BaseModel, model_validator
class ParentizeMixin(BaseModel): class ParentizeMixin(BaseModel):
"""Mixin to populate the parent field for nested datasets and groups"""
@model_validator(mode="after") @model_validator(mode="after")
def parentize(self): def parentize(self) -> BaseModel:
"""Set the parent attribute for all our fields they have one""" """Set the parent attribute for all our fields they have one"""
for field_name in self.model_fields: for field_name in self.model_fields:
if field_name == "parent": if field_name == "parent":
@ -28,6 +33,9 @@ class ParentizeMixin(BaseModel):
@dataclass @dataclass
class NWBSchemaLangGenerator(PydanticGenerator): class NWBSchemaLangGenerator(PydanticGenerator):
"""
Customization of linkml pydantic generator
"""
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
kwargs["injected_classes"] = [ParentizeMixin] kwargs["injected_classes"] = [ParentizeMixin]
@ -38,12 +46,18 @@ class NWBSchemaLangGenerator(PydanticGenerator):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def after_generate_class(self, cls: ClassResult, sv: SchemaView) -> ClassResult: def after_generate_class(self, cls: ClassResult, sv: SchemaView) -> ClassResult:
"""
Add the ParentizeMixin to the bases of Dataset and Group
"""
if cls.cls.name in ("Dataset", "Group"): if cls.cls.name in ("Dataset", "Group"):
cls.cls.bases = ["ConfiguredBaseModel", "ParentizeMixin"] cls.cls.bases = ["ConfiguredBaseModel", "ParentizeMixin"]
return cls return cls
def generate(): def generate() -> None:
"""
Generate pydantic models for nwb_schema_language
"""
schema = Path(__file__).parent / "schema" / "nwb_schema_language.yaml" schema = Path(__file__).parent / "schema" / "nwb_schema_language.yaml"
output = Path(__file__).parent / "datamodel" / "nwb_schema_pydantic.py" output = Path(__file__).parent / "datamodel" / "nwb_schema_pydantic.py"
generator = NWBSchemaLangGenerator(schema=schema) generator = NWBSchemaLangGenerator(schema=schema)