working complete, strict validating io :)

2025-01-09 21:54:27 +00:00 · 2024-09-26 01:02:16 -07:00 · 2024-09-26 01:02:16 -07:00 · f9f1d49fca
commit f9f1d49fca
parent 886d3db860
8 changed files with 180 additions and 74 deletions
--- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py
@ -354,3 +354,40 @@ def defaults(cls: Dataset | Attribute) -> dict:
            ret["ifabsent"] = cls.default_value

    return ret
+
+
+def is_container(group: Group) -> bool:
+    """
+    Check if a group is a container group.
+
+    i.e. a group that...
+    * has no name
+    * multivalued quantity
+    * has a ``neurodata_type_inc``
+    * has no ``neurodata_type_def``
+    * has no sub-groups
+    * has no datasets
+    * has no attributes
+
+    Examples:
+
+        .. code-block:: yaml
+
+            - name: templates
+              groups:
+              - neurodata_type_inc: TimeSeries
+                doc: TimeSeries objects containing template data of presented stimuli.
+                quantity: '*'
+              - neurodata_type_inc: Images
+                doc: Images objects containing images of presented stimuli.
+                quantity: '*'
+    """
+    return (
+        not group.name
+        and group.quantity == "*"
+        and group.neurodata_type_inc
+        and not group.neurodata_type_def
+        and not group.datasets
+        and not group.groups
+        and not group.attributes
+    )
--- a/nwb_linkml/src/nwb_linkml/adapters/group.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/group.py
@ -2,11 +2,11 @@
 Adapter for NWB groups to linkml Classes
 """

-from typing import List, Type
+from typing import Type

 from linkml_runtime.linkml_model import SlotDefinition

-from nwb_linkml.adapters.adapter import BuildResult
+from nwb_linkml.adapters.adapter import BuildResult, is_container
 from nwb_linkml.adapters.classes import ClassAdapter
 from nwb_linkml.adapters.dataset import DatasetAdapter
 from nwb_linkml.maps import QUANTITY_MAP
@ -45,19 +45,21 @@ class GroupAdapter(ClassAdapter):
        ):
            return self.handle_container_slot(self.cls)

-        nested_res = self.build_subclasses()
-        # add links
-        links = self.build_links()
+        nested_res = self.build_datasets()
+        nested_res += self.build_groups()
+        nested_res += self.build_links()
+        nested_res += self.build_containers()
+        nested_res += self.build_special_cases()

        # we don't propagate slots up to the next level since they are meant for this
        # level (ie. a way to refer to our children)
-        res = self.build_base(extra_attrs=nested_res.slots + links)
+        res = self.build_base(extra_attrs=nested_res.slots)
        # we do propagate classes tho
        res.classes.extend(nested_res.classes)

        return res

-    def build_links(self) -> List[SlotDefinition]:
+    def build_links(self) -> BuildResult:
        """
        Build links specified in the ``links`` field as slots that refer to other
        classes, with an additional annotation specifying that they are in fact links.
@ -66,7 +68,7 @@ class GroupAdapter(ClassAdapter):
        file hierarchy as a string.
        """
        if not self.cls.links:
-            return []
+            return BuildResult()

        annotations = [{"tag": "source_type", "value": "link"}]

@ -83,7 +85,7 @@ class GroupAdapter(ClassAdapter):
            )
            for link in self.cls.links
        ]
-        return slots
+        return BuildResult(slots=slots)

    def handle_container_group(self, cls: Group) -> BuildResult:
        """
@ -129,7 +131,7 @@ class GroupAdapter(ClassAdapter):
            # We are a top-level container class like ProcessingModule
            base = self.build_base()
            # remove all the attributes and replace with child slot
-            base.classes[0].attributes.append(slot)
+            base.classes[0].attributes.update({slot.name: slot})
            return base

    def handle_container_slot(self, cls: Group) -> BuildResult:
@ -167,30 +169,88 @@ class GroupAdapter(ClassAdapter):

        return BuildResult(slots=[slot])

-    def build_subclasses(self) -> BuildResult:
+    def build_datasets(self) -> BuildResult:
        """
        Build nested groups and datasets

        Create ClassDefinitions for each, but then also create SlotDefinitions that
        will be used as attributes linking the main class to the subclasses
+
+        Datasets are simple, they are terminal classes, and all logic
+        for creating slots vs. classes is handled by the adapter class
        """
-        # Datasets are simple, they are terminal classes, and all logic
-        # for creating slots vs. classes is handled by the adapter class
        dataset_res = BuildResult()
        if self.cls.datasets:
            for dset in self.cls.datasets:
                dset_adapter = DatasetAdapter(cls=dset, parent=self)
                dataset_res += dset_adapter.build()
+        return dataset_res
+
+    def build_groups(self) -> BuildResult:
+        """
+        Build subgroups, excluding pure container subgroups
+        """

        group_res = BuildResult()

        if self.cls.groups:
            for group in self.cls.groups:
+                if is_container(group):
+                    continue
                group_adapter = GroupAdapter(cls=group, parent=self)
                group_res += group_adapter.build()

-        res = dataset_res + group_res
+        return group_res

+    def build_containers(self) -> BuildResult:
+        """
+        Build all container types into a single ``value`` slot
+        """
+        res = BuildResult()
+        if not self.cls.groups:
+            return res
+        containers = [grp for grp in self.cls.groups if is_container(grp)]
+        if not containers:
+            return res
+
+        if len(containers) == 1:
+            range = {"range": containers[0].neurodata_type_inc}
+            description = containers[0].doc
+        else:
+            range = {"any_of": [{"range": subcls.neurodata_type_inc} for subcls in containers]}
+            description = "\n\n".join([grp.doc for grp in containers])
+
+        slot = SlotDefinition(
+            name="value",
+            multivalued=True,
+            inlined=True,
+            inlined_as_list=False,
+            description=description,
+            **range,
+        )
+
+        if self.debug:  # pragma: no cover - only used in development
+            slot.annotations["group_adapter"] = {
+                "tag": "slot_adapter",
+                "value": "container_value_slot",
+            }
+        res.slots = [slot]
+        return res
+
+    def build_special_cases(self) -> BuildResult:
+        """
+        Special cases, at this point just for NWBFile, which has
+        extra ``.specloc`` and ``specifications`` attrs
+        """
+        res = BuildResult()
+        if self.cls.neurodata_type_def == "NWBFile":
+            res.slots = [
+                SlotDefinition(
+                    name="specifications",
+                    range="dict",
+                    description="Nested dictionary of schema specifications",
+                ),
+            ]
        return res

    def build_self_slot(self) -> SlotDefinition:
--- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py
+++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py
@ -15,7 +15,7 @@ from linkml.generators import PydanticGenerator
 from linkml.generators.pydanticgen.array import ArrayRepresentation, NumpydanticArray
 from linkml.generators.pydanticgen.build import ClassResult, SlotResult
 from linkml.generators.pydanticgen.pydanticgen import SplitMode
-from linkml.generators.pydanticgen.template import Import, Imports, PydanticModule
+from linkml.generators.pydanticgen.template import Import, Imports, PydanticModule, ObjectImport
 from linkml_runtime.linkml_model.meta import (
    ArrayExpression,
    SchemaDefinition,
@ -30,6 +30,7 @@ from nwb_linkml.includes.base import (
    BASEMODEL_COERCE_CHILD,
    BASEMODEL_COERCE_VALUE,
    BASEMODEL_GETITEM,
+    BASEMODEL_EXTRA_TO_VALUE,
 )
 from nwb_linkml.includes.hdmf import (
    DYNAMIC_TABLE_IMPORTS,
@ -58,9 +59,15 @@ class NWBPydanticGenerator(PydanticGenerator):
        BASEMODEL_COERCE_VALUE,
        BASEMODEL_CAST_WITH_VALUE,
        BASEMODEL_COERCE_CHILD,
+        BASEMODEL_EXTRA_TO_VALUE,
    )
    split: bool = True
-    imports: list[Import] = field(default_factory=lambda: [Import(module="numpy", alias="np")])
+    imports: list[Import] = field(
+        default_factory=lambda: [
+            Import(module="numpy", alias="np"),
+            Import(module="pydantic", objects=[ObjectImport(name="model_validator")]),
+        ]
+    )

    schema_map: Optional[Dict[str, SchemaDefinition]] = None
    """See :meth:`.LinkMLProvider.build` for usage - a list of specific versions to import from"""
--- a/nwb_linkml/src/nwb_linkml/includes/base.py
+++ b/nwb_linkml/src/nwb_linkml/includes/base.py
@ -3,7 +3,7 @@ Modifications to the ConfiguredBaseModel used by all generated classes
 """

 BASEMODEL_GETITEM = """
-    def __getitem__(self, val: Union[int, slice]) -> Any:
+    def __getitem__(self, val: Union[int, slice, str]) -> Any:
        \"\"\"Try and get a value from value or "data" if we have it\"\"\"
        if hasattr(self, "value") and self.value is not None:
            return self.value[val]
@ -64,3 +64,23 @@ BASEMODEL_COERCE_CHILD = """
                pass
        return v
 """
+
+BASEMODEL_EXTRA_TO_VALUE = """
+    @model_validator(mode="before")
+    @classmethod
+    def gather_extra_to_value(cls, v: Any, handler) -> Any:
+        \"\"\"
+        For classes that don't allow extra fields and have a value slot,
+        pack those extra kwargs into ``value``
+        \"\"\"
+        if cls.model_config["extra"] == "forbid" and "value" in cls.model_fields and isinstance(v, dict):
+            extras = {key:val for key,val in v.items() if key not in cls.model_fields}
+            if extras:
+                for k in extras:
+                    del v[k]
+                if "value" in v:
+                    v["value"].update(extras)
+                else:
+                    v["value"] = extras
+        return v
+"""
--- a/nwb_linkml/src/nwb_linkml/io/hdf5.py
+++ b/nwb_linkml/src/nwb_linkml/io/hdf5.py
@ -35,7 +35,7 @@ import h5py
 import networkx as nx
 import numpy as np
 from numpydantic.interface.hdf5 import H5ArrayPath
-from pydantic import BaseModel, ValidationError
+from pydantic import BaseModel
 from tqdm import tqdm

 from nwb_linkml.maps.hdf5 import (
@ -166,24 +166,28 @@ def _load_node(
        raise TypeError(f"Nodes can only be h5py Datasets and Groups, got {obj}")

    if "neurodata_type" in obj.attrs:
+        # SPECIAL CASE: ignore `.specloc`
+        if ".specloc" in args:
+            del args[".specloc"]
+
        model = provider.get_class(obj.attrs["namespace"], obj.attrs["neurodata_type"])
-        try:
+        # try:
        return model(**args)
-        except ValidationError as e1:
-            # try to restack extra fields into ``value``
-            if "value" in model.model_fields:
-                value_dict = {
-                    key: val for key, val in args.items() if key not in model.model_fields
-                }
-                for k in value_dict:
-                    del args[k]
-                args["value"] = value_dict
-                try:
-                    return model(**args)
-                except Exception as e2:
-                    raise e2 from e1
-            else:
-                raise e1
+        # except ValidationError as e1:
+        #     # try to restack extra fields into ``value``
+        #     if "value" in model.model_fields:
+        #         value_dict = {
+        #             key: val for key, val in args.items() if key not in model.model_fields
+        #         }
+        #         for k in value_dict:
+        #             del args[k]
+        #         args["value"] = value_dict
+        #         try:
+        #             return model(**args)
+        #         except Exception as e2:
+        #             raise e2 from e1
+        #     else:
+        #         raise e1

    else:
        if "name" in args:
--- a/nwb_linkml/src/nwb_linkml/lang_elements.py
+++ b/nwb_linkml/src/nwb_linkml/lang_elements.py
@ -39,6 +39,10 @@ def _make_dtypes() -> List[TypeDefinition]:
            repr=linkml_reprs.get(nwbtype, None),
        )
        DTypeTypes.append(atype)
+
+    # a dict type!
+    DTypeTypes.append(TypeDefinition(name="dict", repr="dict"))
+
    return DTypeTypes


--- a/nwb_linkml/tests/test_io/test_io_nwb.py
+++ b/nwb_linkml/tests/test_io/test_io_nwb.py
@ -80,7 +80,7 @@ def test_position(read_nwbfile, read_pynwb):
    py_trials = read_pynwb.trials.to_dataframe()
    pd.testing.assert_frame_equal(py_trials, trials)

-    spatial = read_nwbfile.processing["behavior"].Position.SpatialSeries
+    spatial = read_nwbfile.processing["behavior"]["Position"]["SpatialSeries"]
    py_spatial = read_pynwb.processing["behavior"]["Position"]["SpatialSeries"]
    _compare_attrs(spatial, py_spatial)
    assert np.array_equal(spatial[:], py_spatial.data[:])
--- a/scripts/generate_core.py
+++ b/scripts/generate_core.py
@ -19,37 +19,6 @@ from nwb_linkml.providers import LinkMLProvider, PydanticProvider
 from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo
 from nwb_linkml.io import schema as io

-
-def generate_core_yaml(output_path: Path, dry_run: bool = False, hdmf_only: bool = False):
-    """Just build the latest version of the core schema"""
-
-    core = io.load_nwb_core(hdmf_only=hdmf_only)
-    built_schemas = core.build().schemas
-    for schema in built_schemas:
-        output_file = output_path / (schema.name + ".yaml")
-        if not dry_run:
-            yaml_dumper.dump(schema, output_file)
-
-
-def generate_core_pydantic(yaml_path: Path, output_path: Path, dry_run: bool = False):
-    """Just generate the latest version of the core schema"""
-    for schema in yaml_path.glob("*.yaml"):
-        python_name = schema.stem.replace(".", "_").replace("-", "_")
-        pydantic_file = (output_path / python_name).with_suffix(".py")
-
-        generator = NWBPydanticGenerator(
-            str(schema),
-            pydantic_version="2",
-            emit_metadata=True,
-            gen_classvars=True,
-            gen_slots=True,
-        )
-        gen_pydantic = generator.serialize()
-        if not dry_run:
-            with open(pydantic_file, "w") as pfile:
-                pfile.write(gen_pydantic)
-
-
 def make_tmp_dir(clear: bool = False) -> Path:
    # use a directory underneath this one as the temporary directory rather than
    # the default hidden one
@ -68,6 +37,7 @@ def generate_versions(
    dry_run: bool = False,
    repo: GitRepo = NWB_CORE_REPO,
    pdb=False,
+    latest: bool = False,
 ):
    """
    Generate linkml models for all versions
@ -82,8 +52,13 @@ def generate_versions(

    failed_versions = {}

+    if latest:
+        versions = [repo.namespace.versions[-1]]
+    else:
+        versions = repo.namespace.versions
+
    overall_progress = Progress()
-    overall_task = overall_progress.add_task("All Versions", total=len(NWB_CORE_REPO.versions))
+    overall_task = overall_progress.add_task("All Versions", total=len(versions))

    build_progress = Progress(
        TextColumn(
@ -100,7 +75,7 @@ def generate_versions(
            linkml_task = None
            pydantic_task = None

-            for version in repo.namespace.versions:
+            for version in versions:
                # build linkml
                try:
                    # check out the version (this should also refresh the hdmf-common schema)
@ -251,11 +226,10 @@ def main():
    if not args.dry_run:
        args.yaml.mkdir(exist_ok=True)
        args.pydantic.mkdir(exist_ok=True)
-    if args.latest:
-        generate_core_yaml(args.yaml, args.dry_run)
-        generate_core_pydantic(args.yaml, args.pydantic, args.dry_run)
-    else:
-        generate_versions(args.yaml, args.pydantic, args.dry_run, repo, pdb=args.pdb)
+
+    generate_versions(
+        args.yaml, args.pydantic, args.dry_run, repo, pdb=args.pdb, latest=args.latest
+    )


 if __name__ == "__main__":