unify naming of derived child groups, arrays, and values as just all being named "value". Add link building. Fix erroneously collapsing anonymous and named container groups.

2025-01-10 06:04:28 +00:00 · 2024-08-05 16:05:44 -07:00 · 2024-08-05 16:05:44 -07:00 · b3b5b9d793
commit b3b5b9d793
parent e72c860edd
5 changed files with 156 additions and 105 deletions
--- a/docs/intro/translation.md
+++ b/docs/intro/translation.md
@ -289,8 +289,31 @@ When generating pydantic models we...
 There are several different ways to create references between objects in nwb/hdmf:
- ...
+- [`links`](https://schema-language.readthedocs.io/en/latest/description.html#sec-link-spec) are group-level
  properties that can reference other groups or datasets like this:
  ```yaml
  links:
  - name: Link name
    doc: Required string with the description of the link
    target_type: Type of target
    quantity: Optional quantity identifier for the group (default=1).
  ```
 - [Reference `dtype`](https://schema-language.readthedocs.io/en/latest/description.html#reference-dtype)s are
  dataset, and attribute-level properties that can reference both other objects and regions within other objects:
  ```yaml
  dtype:
    target_type: ElectrodeGroup
    reftype: object
  ```
 - Implicitly, hdmf creates references between objects according to some naming conventions, eg.
  an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
  object `mydata`.
 - There is currrently a note in the schema language docs that there will be an additional
  [Relationships](https://schema-language.readthedocs.io/en/latest/description.html#relationships) system
  that explicitly models relationships, but it is unclear how that would be different than references. 
 We represent all of these by just directly referring to the object type, preserving the source type
 in an annotation, when necessary.
 ## LinkML to Everything
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -216,8 +216,8 @@ class MapListlike(DatasetMap):
    Used exactly once in the core schema, in ``ImageReferences`` -
    an array of references to other ``Image`` datasets. We ignore the
-    usual array structure and unnest the implicit array into a slot names from the
+    usual array structure and unnest the implicit array into a slot named "value"
-    target type rather than the oddly-named ``num_images`` dimension so that
+    rather than the oddly-named ``num_images`` dimension so that
    ultimately in the pydantic model we get a nicely behaved single-level list.
    Examples:
@ -245,7 +245,7 @@ class MapListlike(DatasetMap):
                      name: name
                      range: string
                      required: true
-                    image:
+                    value:
                      name: image
                      description: Ordered dataset of references to Image objects.
                      multivalued: true
@ -286,15 +286,15 @@ class MapListlike(DatasetMap):
        """
        Map to a list of the given class
        """
        dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype))
        slot = SlotDefinition(
-            name=dtype,
+            name="value",
            multivalued=True,
            range=ClassAdapter.handle_dtype(cls.dtype),
            description=cls.doc,
            required=cls.quantity not in ("*", "?"),
            annotations=[{"source_type": "reference"}],
        )
-        res.classes[0].attributes[dtype] = slot
+        res.classes[0].attributes["value"] = slot
        return res
@ -533,9 +533,9 @@ class MapArrayLikeAttributes(DatasetMap):
        expressions = array_adapter.make_slot()
        # make a slot for the arraylike class
        array_slot = SlotDefinition(
-            name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
+            name="value", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
        )
-        res.classes[0].attributes.update({"array": array_slot})
+        res.classes[0].attributes.update({"value": array_slot})
        return res
@ -572,7 +572,7 @@ class MapClassRange(DatasetMap):
            name=cls.name,
            description=cls.doc,
            range=f"{cls.neurodata_type_inc}",
-            annotations=[{"named": True}],
+            annotations=[{"named": True}, {"source_type": "neurodata_type_inc"}],
            **QUANTITY_MAP[cls.quantity],
        )
        res = BuildResult(slots=[this_slot])
@ -686,17 +686,28 @@ class MapNVectors(DatasetMap):
    Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
    arbitrary columns.
    Used twice:
    - Images
    - DynamicTable (and all its uses)
    DynamicTable (and the slot VectorData where this is called for)
    is handled specially and just dropped, because we handle the possibility for
    arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes.
    So really this is just a handler for the `Images` case
    """
    @classmethod
    def check(c, cls: Dataset) -> bool:
        """
-        Check for being an unnamed multivalued vector class
+        Check for being an unnamed multivalued vector class that isn't VectorData
        """
        return (
            cls.name is None
            and cls.neurodata_type_def is None
            and cls.neurodata_type_inc
            and cls.neurodata_type_inc != "VectorData"
            and cls.quantity in ("*", "+")
        )
--- a/nwb_linkml/src/nwb_linkml/adapters/group.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/group.py
@ -2,7 +2,7 @@
 Adapter for NWB groups to linkml Classes
 """
-from typing import Type
+from typing import Type, List
 from linkml_runtime.linkml_model import SlotDefinition
@ -28,25 +28,13 @@ class GroupAdapter(ClassAdapter):
        Do the translation, yielding the BuildResult
        """
        # Handle container groups with only * quantity unnamed groups
-        if len(self.cls.groups) > 0 and all(
+        if (
-            [self._check_if_container(g) for g in self.cls.groups]
+            len(self.cls.groups) > 0
            and not self.cls.links
            and all([self._check_if_container(g) for g in self.cls.groups])
        ):  # and \
            # self.parent is not None:
            return self.handle_container_group(self.cls)
        # Or you can have groups like /intervals where there are some named groups, and some unnamed
        # but they all have the same type
        elif (
            len(self.cls.groups) > 0
            and all(
                [
                    g.neurodata_type_inc == self.cls.groups[0].neurodata_type_inc
                    for g in self.cls.groups
                ]
            )
            and self.cls.groups[0].neurodata_type_inc is not None
            and all([g.quantity in ("?", "*") for g in self.cls.groups])
        ):
            return self.handle_container_group(self.cls)
        # handle if we are a terminal container group without making a new class
        if (
@ -58,17 +46,42 @@ class GroupAdapter(ClassAdapter):
            return self.handle_container_slot(self.cls)
        nested_res = self.build_subclasses()
        # add links
        links = self.build_links()
        # we don't propagate slots up to the next level since they are meant for this
        # level (ie. a way to refer to our children)
-        res = self.build_base(extra_attrs=nested_res.slots)
+        res = self.build_base(extra_attrs=nested_res.slots + links)
        # we do propagate classes tho
        res.classes.extend(nested_res.classes)
        return res
    def build_links(self) -> List[SlotDefinition]:
        """
        Build links specified in the ``links`` field as slots that refer to other
        classes, with an additional annotation specifying that they are in fact links.
        Link slots can take either the object itself or the path to that object in the
        file hierarchy as a string.
        """
        if not self.cls.links:
            return []
        slots = [
            SlotDefinition(
                name=link.name,
                any_of=[{"range": link.target_type}, {"range": "string"}],
                annotations=[{"tag": "source_type", "value": "link"}],
                **QUANTITY_MAP[link.quantity],
            )
            for link in self.cls.links
        ]
        return slots
    def handle_container_group(self, cls: Group) -> BuildResult:
        """
-        Make a special LinkML `children` slot that can
+        Make a special LinkML `value` slot that can
        have any number of the objects that are of `neurodata_type_inc` class
        Examples:
@ -84,14 +97,11 @@ class GroupAdapter(ClassAdapter):
                    doc: Images objects containing images of presented stimuli.
                    quantity: '*'
        Args:
            children (List[:class:`.Group`]): Child groups
        """
        # don't build subgroups as their own classes, just make a slot
        # that can contain them
-        name = cls.name if self.cls.name else "children"
+        name = cls.name if self.cls.name else "value"
        slot = SlotDefinition(
            name=name,
--- a/nwb_linkml/tests/test_includes/test_hdmf.py
+++ b/nwb_linkml/tests/test_includes/test_hdmf.py
@ -7,12 +7,12 @@ import pytest
 from nwb_linkml.models.pydantic.core.v2_7_0.namespace import (
    ElectricalSeries,
    ElectrodeGroup,
-    NWBFileGeneralExtracellularEphysElectrodes,
+    ExtracellularEphysElectrodes,
 )
@pytest.fixture()
-def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellularEphysElectrodes"]:
+def electrical_series() -> Tuple["ElectricalSeries", "ExtracellularEphysElectrodes"]:
    """
    Demo electrical series with adjoining electrodes
    """
@ -27,7 +27,7 @@ def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellula
    )
    # make electrodes tables
-    electrodes = NWBFileGeneralExtracellularEphysElectrodes(
+    electrodes = ExtracellularEphysElectrodes(
        id=np.arange(0, n_electrodes),
        x=np.arange(0, n_electrodes),
        y=np.arange(n_electrodes, n_electrodes * 2),
--- a/scripts/generate_core.py
+++ b/scripts/generate_core.py
@ -17,44 +17,53 @@ from nwb_linkml.providers import LinkMLProvider, PydanticProvider
 from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo
 from nwb_linkml.io import schema as io
-def generate_core_yaml(output_path:Path, dry_run:bool=False, hdmf_only:bool=False):
+
 def generate_core_yaml(output_path: Path, dry_run: bool = False, hdmf_only: bool = False):
    """Just build the latest version of the core schema"""
    core = io.load_nwb_core(hdmf_only=hdmf_only)
    built_schemas = core.build().schemas
    for schema in built_schemas:
-        output_file = output_path / (schema.name + '.yaml')
+        output_file = output_path / (schema.name + ".yaml")
        if not dry_run:
            yaml_dumper.dump(schema, output_file)
-def generate_core_pydantic(yaml_path:Path, output_path:Path, dry_run:bool=False):
+
 def generate_core_pydantic(yaml_path: Path, output_path: Path, dry_run: bool = False):
    """Just generate the latest version of the core schema"""
-    for schema in yaml_path.glob('*.yaml'):
+    for schema in yaml_path.glob("*.yaml"):
-        python_name = schema.stem.replace('.', '_').replace('-', '_')
+        python_name = schema.stem.replace(".", "_").replace("-", "_")
-        pydantic_file = (output_path / python_name).with_suffix('.py')
+        pydantic_file = (output_path / python_name).with_suffix(".py")
        generator = NWBPydanticGenerator(
            str(schema),
-            pydantic_version='2',
+            pydantic_version="2",
            emit_metadata=True,
            gen_classvars=True,
-            gen_slots=True
+            gen_slots=True,
        )
        gen_pydantic = generator.serialize()
        if not dry_run:
-            with open(pydantic_file, 'w') as pfile:
+            with open(pydantic_file, "w") as pfile:
                pfile.write(gen_pydantic)
-def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, repo:GitRepo=NWB_CORE_REPO, hdmf_only=False):
+
 def generate_versions(
    yaml_path: Path,
    pydantic_path: Path,
    dry_run: bool = False,
    repo: GitRepo = NWB_CORE_REPO,
    hdmf_only=False,
 ):
    """
    Generate linkml models for all versions
    """
-    #repo.clone(force=True)
+    # repo.clone(force=True)
    repo.clone()
    # use a directory underneath this one as the temporary directory rather than
    # the default hidden one
-    tmp_dir = Path(__file__).parent / '__tmp__'
+    tmp_dir = Path(__file__).parent / "__tmp__"
    if tmp_dir.exists():
        shutil.rmtree(tmp_dir)
    tmp_dir.mkdir()
@ -65,12 +74,14 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
    failed_versions = {}
    overall_progress = Progress()
-    overall_task = overall_progress.add_task('All Versions', total=len(NWB_CORE_REPO.versions))
+    overall_task = overall_progress.add_task("All Versions", total=len(NWB_CORE_REPO.versions))
    build_progress = Progress(
-        TextColumn("[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
+        TextColumn(
-                   table_column=Column(ratio=1)),
+            "[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
-        BarColumn(table_column=Column(ratio=1), bar_width=None)
+            table_column=Column(ratio=1),
        ),
        BarColumn(table_column=Column(ratio=1), bar_width=None),
    )
    panel = Panel(Group(build_progress, overall_progress))
@ -84,7 +95,9 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
                # build linkml
                try:
                    # check out the version (this should also refresh the hdmf-common schema)
-                    linkml_task = build_progress.add_task('', name=version, action='Checkout Version', total=3)
+                    linkml_task = build_progress.add_task(
                        "", name=version, action="Checkout Version", total=3
                    )
                    repo.tag = version
                    build_progress.update(linkml_task, advance=1, action="Load Namespaces")
@ -92,35 +105,36 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
                    core_ns = io.load_namespace_adapter(repo.namespace_file)
                    if repo.namespace == NWB_CORE_REPO:
                        # then the hdmf-common namespace
-                        hdmf_common_ns = io.load_namespace_adapter(repo.temp_directory / 'hdmf-common-schema' / 'common' / 'namespace.yaml')
+                        hdmf_common_ns = io.load_namespace_adapter(
                            repo.temp_directory / "hdmf-common-schema" / "common" / "namespace.yaml"
                        )
                        core_ns.imported.append(hdmf_common_ns)
                    build_progress.update(linkml_task, advance=1, action="Build LinkML")
                    linkml_res = linkml_provider.build(core_ns)
                    build_progress.update(linkml_task, advance=1, action="Built LinkML")
                    # build pydantic
                    ns_files = [res.namespace for res in linkml_res.values()]
-                    pydantic_task = build_progress.add_task('', name=version, action='', total=len(ns_files))
+                    pydantic_task = build_progress.add_task(
                        "", name=version, action="", total=len(ns_files)
                    )
                    for schema in ns_files:
-                        pbar_string = ' - '.join([schema.parts[-3], schema.parts[-2], schema.parts[-1]])
+                        pbar_string = schema.parts[-3]
                        build_progress.update(pydantic_task, action=pbar_string)
                        pydantic_provider.build(schema, versions=core_ns.versions, split=True)
                        build_progress.update(pydantic_task, advance=1)
-                    build_progress.update(pydantic_task, action='Built Pydantic')
+                    build_progress.update(pydantic_task, action="Built Pydantic")
                except Exception as e:
                    build_progress.stop_task(linkml_task)
                    if linkml_task is not None:
-                        build_progress.update(linkml_task, action='[bold red]LinkML Build Failed')
+                        build_progress.update(linkml_task, action="[bold red]LinkML Build Failed")
                        build_progress.stop_task(linkml_task)
                    if pydantic_task is not None:
-                        build_progress.update(pydantic_task, action='[bold red]LinkML Build Failed')
+                        build_progress.update(pydantic_task, action="[bold red]LinkML Build Failed")
                        build_progress.stop_task(pydantic_task)
                    failed_versions[version] = traceback.format_exception(e)
@ -131,67 +145,66 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
        if not dry_run:
            if hdmf_only:
-                shutil.rmtree(yaml_path / 'linkml' / 'hdmf_common')
+                shutil.rmtree(yaml_path / "linkml" / "hdmf_common")
-                shutil.rmtree(yaml_path / 'linkml' / 'hdmf_experimental')
+                shutil.rmtree(yaml_path / "linkml" / "hdmf_experimental")
-                shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_common')
+                shutil.rmtree(pydantic_path / "pydantic" / "hdmf_common")
-                shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_experimental')
+                shutil.rmtree(pydantic_path / "pydantic" / "hdmf_experimental")
-                shutil.move(tmp_dir / 'linkml' / 'hdmf_common', yaml_path / 'linkml')
+                shutil.move(tmp_dir / "linkml" / "hdmf_common", yaml_path / "linkml")
-                shutil.move(tmp_dir / 'linkml' / 'hdmf_experimental', yaml_path / 'linkml')
+                shutil.move(tmp_dir / "linkml" / "hdmf_experimental", yaml_path / "linkml")
-                shutil.move(tmp_dir / 'pydantic' / 'hdmf_common', pydantic_path / 'pydantic')
+                shutil.move(tmp_dir / "pydantic" / "hdmf_common", pydantic_path / "pydantic")
-                shutil.move(tmp_dir / 'pydantic' / 'hdmf_experimental', pydantic_path / 'pydantic')
+                shutil.move(tmp_dir / "pydantic" / "hdmf_experimental", pydantic_path / "pydantic")
            else:
-                shutil.rmtree(yaml_path / 'linkml')
+                shutil.rmtree(yaml_path / "linkml")
-                shutil.rmtree(pydantic_path / 'pydantic')
+                shutil.rmtree(pydantic_path / "pydantic")
-                shutil.move(tmp_dir / 'linkml', yaml_path)
+                shutil.move(tmp_dir / "linkml", yaml_path)
-                shutil.move(tmp_dir / 'pydantic', pydantic_path)
+                shutil.move(tmp_dir / "pydantic", pydantic_path)
            # import the most recent version of the schemaz we built
-            latest_version = sorted((pydantic_path / 'pydantic' / 'core').iterdir(), key=os.path.getmtime)[-1]
+            latest_version = sorted(
                (pydantic_path / "pydantic" / "core").iterdir(), key=os.path.getmtime
            )[-1]
            # make inits to use the schema! we don't usually do this in the
            # provider class because we directly import the files there.
-            with open(pydantic_path / 'pydantic' / '__init__.py', 'w') as initfile:
+            with open(pydantic_path / "pydantic" / "__init__.py", "w") as initfile:
-                initfile.write(' ')
+                initfile.write(" ")
-            with open(pydantic_path / '__init__.py', 'w') as initfile:
+            with open(pydantic_path / "__init__.py", "w") as initfile:
-                initfile.write(f'from .pydantic.core.{latest_version.name}.namespace import *')
+                initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")
    finally:
        if len(failed_versions) > 0:
-            print('Failed Building Versions:')
+            print("Failed Building Versions:")
            print(failed_versions)
 def parser() -> ArgumentParser:
-    parser = ArgumentParser('Generate all available versions of NWB core schema')
+    parser = ArgumentParser("Generate all available versions of NWB core schema")
    parser.add_argument(
-        '--yaml',
+        "--yaml",
        help="directory to export linkML schema to",
        type=Path,
-        default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'schema'
+        default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "schema",
    )
    parser.add_argument(
-        '--pydantic',
+        "--pydantic",
        help="directory to export pydantic models",
        type=Path,
-        default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models'
+        default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "models",
    )
    parser.add_argument("--hdmf", help="Only generate the HDMF namespaces", action="store_true")
    parser.add_argument(
-        '--hdmf',
+        "--latest",
        help="Only generate the HDMF namespaces",
        action="store_true"
    )
    parser.add_argument(
        '--latest',
        help="Only generate the latest version of the core schemas.",
-        action="store_true"
+        action="store_true",
    )
    parser.add_argument(
-        '--dry-run',
+        "--dry-run",
-        help="Generate schema and pydantic models without moving them into the target directories, for testing purposes",
+        help=(
-        action='store_true'
+            "Generate schema and pydantic models without moving them into the target directories,"
            " for testing purposes"
        ),
        action="store_true",
    )
    return parser
@ -212,12 +225,6 @@ def main():
    else:
        generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf)
 if __name__ == "__main__":
    main()