unify naming of derived child groups, arrays, and values as just all being named "value". Add link building. Fix erroneously collapsing anonymous and named container groups.

2025-01-09 13:44:27 +00:00 · 2024-08-05 16:05:44 -07:00 · 2024-08-05 16:05:44 -07:00 · b3b5b9d793
commit b3b5b9d793
parent e72c860edd
5 changed files with 156 additions and 105 deletions
--- a/docs/intro/translation.md
+++ b/docs/intro/translation.md
@ -289,8 +289,31 @@ When generating pydantic models we...

 There are several different ways to create references between objects in nwb/hdmf:

- ...
+- [`links`](https://schema-language.readthedocs.io/en/latest/description.html#sec-link-spec) are group-level
+  properties that can reference other groups or datasets like this:
+  ```yaml
+  links:
+  - name: Link name
+    doc: Required string with the description of the link
+    target_type: Type of target
+    quantity: Optional quantity identifier for the group (default=1).
+  ```
+- [Reference `dtype`](https://schema-language.readthedocs.io/en/latest/description.html#reference-dtype)s are
+  dataset, and attribute-level properties that can reference both other objects and regions within other objects:
+  ```yaml
+  dtype:
+    target_type: ElectrodeGroup
+    reftype: object
+  ```
+- Implicitly, hdmf creates references between objects according to some naming conventions, eg.
+  an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
+  object `mydata`.
+- There is currrently a note in the schema language docs that there will be an additional
+  [Relationships](https://schema-language.readthedocs.io/en/latest/description.html#relationships) system
+  that explicitly models relationships, but it is unclear how that would be different than references. 

+We represent all of these by just directly referring to the object type, preserving the source type
+in an annotation, when necessary.


 ## LinkML to Everything
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -216,8 +216,8 @@ class MapListlike(DatasetMap):

    Used exactly once in the core schema, in ``ImageReferences`` -
    an array of references to other ``Image`` datasets. We ignore the
-    usual array structure and unnest the implicit array into a slot names from the
-    target type rather than the oddly-named ``num_images`` dimension so that
+    usual array structure and unnest the implicit array into a slot named "value"
+    rather than the oddly-named ``num_images`` dimension so that
    ultimately in the pydantic model we get a nicely behaved single-level list.

    Examples:
@ -245,7 +245,7 @@ class MapListlike(DatasetMap):
                      name: name
                      range: string
                      required: true
-                    image:
+                    value:
                      name: image
                      description: Ordered dataset of references to Image objects.
                      multivalued: true
@ -286,15 +286,15 @@ class MapListlike(DatasetMap):
        """
        Map to a list of the given class
        """
-        dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype))
        slot = SlotDefinition(
-            name=dtype,
+            name="value",
            multivalued=True,
            range=ClassAdapter.handle_dtype(cls.dtype),
            description=cls.doc,
            required=cls.quantity not in ("*", "?"),
+            annotations=[{"source_type": "reference"}],
        )
-        res.classes[0].attributes[dtype] = slot
+        res.classes[0].attributes["value"] = slot
        return res


@ -533,9 +533,9 @@ class MapArrayLikeAttributes(DatasetMap):
        expressions = array_adapter.make_slot()
        # make a slot for the arraylike class
        array_slot = SlotDefinition(
-            name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
+            name="value", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
        )
-        res.classes[0].attributes.update({"array": array_slot})
+        res.classes[0].attributes.update({"value": array_slot})
        return res


@ -572,7 +572,7 @@ class MapClassRange(DatasetMap):
            name=cls.name,
            description=cls.doc,
            range=f"{cls.neurodata_type_inc}",
-            annotations=[{"named": True}],
+            annotations=[{"named": True}, {"source_type": "neurodata_type_inc"}],
            **QUANTITY_MAP[cls.quantity],
        )
        res = BuildResult(slots=[this_slot])
@ -686,17 +686,28 @@ class MapNVectors(DatasetMap):

    Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
    arbitrary columns.
+
+    Used twice:
+    - Images
+    - DynamicTable (and all its uses)
+
+    DynamicTable (and the slot VectorData where this is called for)
+    is handled specially and just dropped, because we handle the possibility for
+    arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes.
+
+    So really this is just a handler for the `Images` case
    """

    @classmethod
    def check(c, cls: Dataset) -> bool:
        """
-        Check for being an unnamed multivalued vector class
+        Check for being an unnamed multivalued vector class that isn't VectorData
        """
        return (
            cls.name is None
            and cls.neurodata_type_def is None
            and cls.neurodata_type_inc
+            and cls.neurodata_type_inc != "VectorData"
            and cls.quantity in ("*", "+")
        )

--- a/nwb_linkml/src/nwb_linkml/adapters/group.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/group.py
@ -2,7 +2,7 @@
 Adapter for NWB groups to linkml Classes
 """

-from typing import Type
+from typing import Type, List

 from linkml_runtime.linkml_model import SlotDefinition

@ -28,25 +28,13 @@ class GroupAdapter(ClassAdapter):
        Do the translation, yielding the BuildResult
        """
        # Handle container groups with only * quantity unnamed groups
-        if len(self.cls.groups) > 0 and all(
-            [self._check_if_container(g) for g in self.cls.groups]
+        if (
+            len(self.cls.groups) > 0
+            and not self.cls.links
+            and all([self._check_if_container(g) for g in self.cls.groups])
        ):  # and \
            # self.parent is not None:
            return self.handle_container_group(self.cls)
-        # Or you can have groups like /intervals where there are some named groups, and some unnamed
-        # but they all have the same type
-        elif (
-            len(self.cls.groups) > 0
-            and all(
-                [
-                    g.neurodata_type_inc == self.cls.groups[0].neurodata_type_inc
-                    for g in self.cls.groups
-                ]
-            )
-            and self.cls.groups[0].neurodata_type_inc is not None
-            and all([g.quantity in ("?", "*") for g in self.cls.groups])
-        ):
-            return self.handle_container_group(self.cls)

        # handle if we are a terminal container group without making a new class
        if (
@ -58,17 +46,42 @@ class GroupAdapter(ClassAdapter):
            return self.handle_container_slot(self.cls)

        nested_res = self.build_subclasses()
+        # add links
+        links = self.build_links()
+
        # we don't propagate slots up to the next level since they are meant for this
        # level (ie. a way to refer to our children)
-        res = self.build_base(extra_attrs=nested_res.slots)
+        res = self.build_base(extra_attrs=nested_res.slots + links)
        # we do propagate classes tho
        res.classes.extend(nested_res.classes)

        return res

+    def build_links(self) -> List[SlotDefinition]:
+        """
+        Build links specified in the ``links`` field as slots that refer to other
+        classes, with an additional annotation specifying that they are in fact links.
+
+        Link slots can take either the object itself or the path to that object in the
+        file hierarchy as a string.
+        """
+        if not self.cls.links:
+            return []
+
+        slots = [
+            SlotDefinition(
+                name=link.name,
+                any_of=[{"range": link.target_type}, {"range": "string"}],
+                annotations=[{"tag": "source_type", "value": "link"}],
+                **QUANTITY_MAP[link.quantity],
+            )
+            for link in self.cls.links
+        ]
+        return slots
+
    def handle_container_group(self, cls: Group) -> BuildResult:
        """
-        Make a special LinkML `children` slot that can
+        Make a special LinkML `value` slot that can
        have any number of the objects that are of `neurodata_type_inc` class

        Examples:
@ -84,14 +97,11 @@ class GroupAdapter(ClassAdapter):
                    doc: Images objects containing images of presented stimuli.
                    quantity: '*'

-        Args:
-            children (List[:class:`.Group`]): Child groups
-
        """

        # don't build subgroups as their own classes, just make a slot
        # that can contain them
-        name = cls.name if self.cls.name else "children"
+        name = cls.name if self.cls.name else "value"

        slot = SlotDefinition(
            name=name,
--- a/nwb_linkml/tests/test_includes/test_hdmf.py
+++ b/nwb_linkml/tests/test_includes/test_hdmf.py
@ -7,12 +7,12 @@ import pytest
 from nwb_linkml.models.pydantic.core.v2_7_0.namespace import (
    ElectricalSeries,
    ElectrodeGroup,
-    NWBFileGeneralExtracellularEphysElectrodes,
+    ExtracellularEphysElectrodes,
 )


@pytest.fixture()
-def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellularEphysElectrodes"]:
+def electrical_series() -> Tuple["ElectricalSeries", "ExtracellularEphysElectrodes"]:
    """
    Demo electrical series with adjoining electrodes
    """
@ -27,7 +27,7 @@ def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellula
    )

    # make electrodes tables
-    electrodes = NWBFileGeneralExtracellularEphysElectrodes(
+    electrodes = ExtracellularEphysElectrodes(
        id=np.arange(0, n_electrodes),
        x=np.arange(0, n_electrodes),
        y=np.arange(n_electrodes, n_electrodes * 2),
--- a/scripts/generate_core.py
+++ b/scripts/generate_core.py
@ -17,44 +17,53 @@ from nwb_linkml.providers import LinkMLProvider, PydanticProvider
 from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo
 from nwb_linkml.io import schema as io

-def generate_core_yaml(output_path:Path, dry_run:bool=False, hdmf_only:bool=False):
+
+def generate_core_yaml(output_path: Path, dry_run: bool = False, hdmf_only: bool = False):
    """Just build the latest version of the core schema"""

    core = io.load_nwb_core(hdmf_only=hdmf_only)
    built_schemas = core.build().schemas
    for schema in built_schemas:
-        output_file = output_path / (schema.name + '.yaml')
+        output_file = output_path / (schema.name + ".yaml")
        if not dry_run:
            yaml_dumper.dump(schema, output_file)

-def generate_core_pydantic(yaml_path:Path, output_path:Path, dry_run:bool=False):
+
+def generate_core_pydantic(yaml_path: Path, output_path: Path, dry_run: bool = False):
    """Just generate the latest version of the core schema"""
-    for schema in yaml_path.glob('*.yaml'):
-        python_name = schema.stem.replace('.', '_').replace('-', '_')
-        pydantic_file = (output_path / python_name).with_suffix('.py')
+    for schema in yaml_path.glob("*.yaml"):
+        python_name = schema.stem.replace(".", "_").replace("-", "_")
+        pydantic_file = (output_path / python_name).with_suffix(".py")

        generator = NWBPydanticGenerator(
            str(schema),
-            pydantic_version='2',
+            pydantic_version="2",
            emit_metadata=True,
            gen_classvars=True,
-            gen_slots=True
+            gen_slots=True,
        )
        gen_pydantic = generator.serialize()
        if not dry_run:
-            with open(pydantic_file, 'w') as pfile:
+            with open(pydantic_file, "w") as pfile:
                pfile.write(gen_pydantic)

-def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, repo:GitRepo=NWB_CORE_REPO, hdmf_only=False):
+
+def generate_versions(
+    yaml_path: Path,
+    pydantic_path: Path,
+    dry_run: bool = False,
+    repo: GitRepo = NWB_CORE_REPO,
+    hdmf_only=False,
+):
    """
    Generate linkml models for all versions
    """
-    #repo.clone(force=True)
+    # repo.clone(force=True)
    repo.clone()

    # use a directory underneath this one as the temporary directory rather than
    # the default hidden one
-    tmp_dir = Path(__file__).parent / '__tmp__'
+    tmp_dir = Path(__file__).parent / "__tmp__"
    if tmp_dir.exists():
        shutil.rmtree(tmp_dir)
    tmp_dir.mkdir()
@ -65,12 +74,14 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
    failed_versions = {}

    overall_progress = Progress()
-    overall_task = overall_progress.add_task('All Versions', total=len(NWB_CORE_REPO.versions))
+    overall_task = overall_progress.add_task("All Versions", total=len(NWB_CORE_REPO.versions))

    build_progress = Progress(
-        TextColumn("[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
-                   table_column=Column(ratio=1)),
-        BarColumn(table_column=Column(ratio=1), bar_width=None)
+        TextColumn(
+            "[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
+            table_column=Column(ratio=1),
+        ),
+        BarColumn(table_column=Column(ratio=1), bar_width=None),
    )
    panel = Panel(Group(build_progress, overall_progress))

@ -84,7 +95,9 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
                # build linkml
                try:
                    # check out the version (this should also refresh the hdmf-common schema)
-                    linkml_task = build_progress.add_task('', name=version, action='Checkout Version', total=3)
+                    linkml_task = build_progress.add_task(
+                        "", name=version, action="Checkout Version", total=3
+                    )
                    repo.tag = version
                    build_progress.update(linkml_task, advance=1, action="Load Namespaces")

@ -92,35 +105,36 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
                    core_ns = io.load_namespace_adapter(repo.namespace_file)
                    if repo.namespace == NWB_CORE_REPO:
                        # then the hdmf-common namespace
-                        hdmf_common_ns = io.load_namespace_adapter(repo.temp_directory / 'hdmf-common-schema' / 'common' / 'namespace.yaml')
+                        hdmf_common_ns = io.load_namespace_adapter(
+                            repo.temp_directory / "hdmf-common-schema" / "common" / "namespace.yaml"
+                        )
                        core_ns.imported.append(hdmf_common_ns)

                    build_progress.update(linkml_task, advance=1, action="Build LinkML")

-
                    linkml_res = linkml_provider.build(core_ns)
                    build_progress.update(linkml_task, advance=1, action="Built LinkML")

                    # build pydantic
                    ns_files = [res.namespace for res in linkml_res.values()]

-                    pydantic_task = build_progress.add_task('', name=version, action='', total=len(ns_files))
+                    pydantic_task = build_progress.add_task(
+                        "", name=version, action="", total=len(ns_files)
+                    )
                    for schema in ns_files:
-                        pbar_string = ' - '.join([schema.parts[-3], schema.parts[-2], schema.parts[-1]])
+                        pbar_string = schema.parts[-3]
                        build_progress.update(pydantic_task, action=pbar_string)
                        pydantic_provider.build(schema, versions=core_ns.versions, split=True)
                        build_progress.update(pydantic_task, advance=1)
-                    build_progress.update(pydantic_task, action='Built Pydantic')
-
-
+                    build_progress.update(pydantic_task, action="Built Pydantic")

                except Exception as e:
                    build_progress.stop_task(linkml_task)
                    if linkml_task is not None:
-                        build_progress.update(linkml_task, action='[bold red]LinkML Build Failed')
+                        build_progress.update(linkml_task, action="[bold red]LinkML Build Failed")
                        build_progress.stop_task(linkml_task)
                    if pydantic_task is not None:
-                        build_progress.update(pydantic_task, action='[bold red]LinkML Build Failed')
+                        build_progress.update(pydantic_task, action="[bold red]LinkML Build Failed")
                        build_progress.stop_task(pydantic_task)
                    failed_versions[version] = traceback.format_exception(e)

@ -131,67 +145,66 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re

        if not dry_run:
            if hdmf_only:
-                shutil.rmtree(yaml_path / 'linkml' / 'hdmf_common')
-                shutil.rmtree(yaml_path / 'linkml' / 'hdmf_experimental')
-                shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_common')
-                shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_experimental')
-                shutil.move(tmp_dir / 'linkml' / 'hdmf_common', yaml_path / 'linkml')
-                shutil.move(tmp_dir / 'linkml' / 'hdmf_experimental', yaml_path / 'linkml')
-                shutil.move(tmp_dir / 'pydantic' / 'hdmf_common', pydantic_path / 'pydantic')
-                shutil.move(tmp_dir / 'pydantic' / 'hdmf_experimental', pydantic_path / 'pydantic')
+                shutil.rmtree(yaml_path / "linkml" / "hdmf_common")
+                shutil.rmtree(yaml_path / "linkml" / "hdmf_experimental")
+                shutil.rmtree(pydantic_path / "pydantic" / "hdmf_common")
+                shutil.rmtree(pydantic_path / "pydantic" / "hdmf_experimental")
+                shutil.move(tmp_dir / "linkml" / "hdmf_common", yaml_path / "linkml")
+                shutil.move(tmp_dir / "linkml" / "hdmf_experimental", yaml_path / "linkml")
+                shutil.move(tmp_dir / "pydantic" / "hdmf_common", pydantic_path / "pydantic")
+                shutil.move(tmp_dir / "pydantic" / "hdmf_experimental", pydantic_path / "pydantic")
            else:
-                shutil.rmtree(yaml_path / 'linkml')
-                shutil.rmtree(pydantic_path / 'pydantic')
-                shutil.move(tmp_dir / 'linkml', yaml_path)
-                shutil.move(tmp_dir / 'pydantic', pydantic_path)
+                shutil.rmtree(yaml_path / "linkml")
+                shutil.rmtree(pydantic_path / "pydantic")
+                shutil.move(tmp_dir / "linkml", yaml_path)
+                shutil.move(tmp_dir / "pydantic", pydantic_path)

            # import the most recent version of the schemaz we built
-            latest_version = sorted((pydantic_path / 'pydantic' / 'core').iterdir(), key=os.path.getmtime)[-1]
+            latest_version = sorted(
+                (pydantic_path / "pydantic" / "core").iterdir(), key=os.path.getmtime
+            )[-1]

            # make inits to use the schema! we don't usually do this in the
            # provider class because we directly import the files there.
-            with open(pydantic_path / 'pydantic' / '__init__.py', 'w') as initfile:
-                initfile.write(' ')
+            with open(pydantic_path / "pydantic" / "__init__.py", "w") as initfile:
+                initfile.write(" ")

-            with open(pydantic_path / '__init__.py', 'w') as initfile:
-                initfile.write(f'from .pydantic.core.{latest_version.name}.namespace import *')
+            with open(pydantic_path / "__init__.py", "w") as initfile:
+                initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")

    finally:
        if len(failed_versions) > 0:
-            print('Failed Building Versions:')
+            print("Failed Building Versions:")
            print(failed_versions)


-
-
 def parser() -> ArgumentParser:
-    parser = ArgumentParser('Generate all available versions of NWB core schema')
+    parser = ArgumentParser("Generate all available versions of NWB core schema")
    parser.add_argument(
-        '--yaml',
+        "--yaml",
        help="directory to export linkML schema to",
        type=Path,
-        default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'schema'
+        default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "schema",
    )
    parser.add_argument(
-        '--pydantic',
+        "--pydantic",
        help="directory to export pydantic models",
        type=Path,
-        default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models'
+        default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "models",
    )
+    parser.add_argument("--hdmf", help="Only generate the HDMF namespaces", action="store_true")
    parser.add_argument(
-        '--hdmf',
-        help="Only generate the HDMF namespaces",
-        action="store_true"
-    )
-    parser.add_argument(
-        '--latest',
+        "--latest",
        help="Only generate the latest version of the core schemas.",
-        action="store_true"
+        action="store_true",
    )
    parser.add_argument(
-        '--dry-run',
-        help="Generate schema and pydantic models without moving them into the target directories, for testing purposes",
-        action='store_true'
+        "--dry-run",
+        help=(
+            "Generate schema and pydantic models without moving them into the target directories,"
+            " for testing purposes"
+        ),
+        action="store_true",
    )
    return parser

@ -212,12 +225,6 @@ def main():
    else:
        generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf)

+
 if __name__ == "__main__":
    main()
-
-
-
-
-
-
-