From b3b5b9d793d7f5fff21bc8f2bc7dc6cb991da06e Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 5 Aug 2024 16:05:44 -0700 Subject: [PATCH] unify naming of derived child groups, arrays, and values as just all being named "value". Add link building. Fix erroneously collapsing anonymous and named container groups. --- docs/intro/translation.md | 25 ++- nwb_linkml/src/nwb_linkml/adapters/dataset.py | 31 ++-- nwb_linkml/src/nwb_linkml/adapters/group.py | 56 ++++--- nwb_linkml/tests/test_includes/test_hdmf.py | 6 +- scripts/generate_core.py | 143 +++++++++--------- 5 files changed, 156 insertions(+), 105 deletions(-) diff --git a/docs/intro/translation.md b/docs/intro/translation.md index 6170fee..a7dec83 100644 --- a/docs/intro/translation.md +++ b/docs/intro/translation.md @@ -289,8 +289,31 @@ When generating pydantic models we... There are several different ways to create references between objects in nwb/hdmf: -- ... +- [`links`](https://schema-language.readthedocs.io/en/latest/description.html#sec-link-spec) are group-level + properties that can reference other groups or datasets like this: + ```yaml + links: + - name: Link name + doc: Required string with the description of the link + target_type: Type of target + quantity: Optional quantity identifier for the group (default=1). + ``` +- [Reference `dtype`](https://schema-language.readthedocs.io/en/latest/description.html#reference-dtype)s are + dataset, and attribute-level properties that can reference both other objects and regions within other objects: + ```yaml + dtype: + target_type: ElectrodeGroup + reftype: object + ``` +- Implicitly, hdmf creates references between objects according to some naming conventions, eg. + an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData` + object `mydata`. +- There is currrently a note in the schema language docs that there will be an additional + [Relationships](https://schema-language.readthedocs.io/en/latest/description.html#relationships) system + that explicitly models relationships, but it is unclear how that would be different than references. +We represent all of these by just directly referring to the object type, preserving the source type +in an annotation, when necessary. ## LinkML to Everything diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index 8bc34b6..e9268cb 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -216,8 +216,8 @@ class MapListlike(DatasetMap): Used exactly once in the core schema, in ``ImageReferences`` - an array of references to other ``Image`` datasets. We ignore the - usual array structure and unnest the implicit array into a slot names from the - target type rather than the oddly-named ``num_images`` dimension so that + usual array structure and unnest the implicit array into a slot named "value" + rather than the oddly-named ``num_images`` dimension so that ultimately in the pydantic model we get a nicely behaved single-level list. Examples: @@ -245,7 +245,7 @@ class MapListlike(DatasetMap): name: name range: string required: true - image: + value: name: image description: Ordered dataset of references to Image objects. multivalued: true @@ -286,15 +286,15 @@ class MapListlike(DatasetMap): """ Map to a list of the given class """ - dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype)) slot = SlotDefinition( - name=dtype, + name="value", multivalued=True, range=ClassAdapter.handle_dtype(cls.dtype), description=cls.doc, required=cls.quantity not in ("*", "?"), + annotations=[{"source_type": "reference"}], ) - res.classes[0].attributes[dtype] = slot + res.classes[0].attributes["value"] = slot return res @@ -533,9 +533,9 @@ class MapArrayLikeAttributes(DatasetMap): expressions = array_adapter.make_slot() # make a slot for the arraylike class array_slot = SlotDefinition( - name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions + name="value", range=ClassAdapter.handle_dtype(cls.dtype), **expressions ) - res.classes[0].attributes.update({"array": array_slot}) + res.classes[0].attributes.update({"value": array_slot}) return res @@ -572,7 +572,7 @@ class MapClassRange(DatasetMap): name=cls.name, description=cls.doc, range=f"{cls.neurodata_type_inc}", - annotations=[{"named": True}], + annotations=[{"named": True}, {"source_type": "neurodata_type_inc"}], **QUANTITY_MAP[cls.quantity], ) res = BuildResult(slots=[this_slot]) @@ -686,17 +686,28 @@ class MapNVectors(DatasetMap): Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate arbitrary columns. + + Used twice: + - Images + - DynamicTable (and all its uses) + + DynamicTable (and the slot VectorData where this is called for) + is handled specially and just dropped, because we handle the possibility for + arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes. + + So really this is just a handler for the `Images` case """ @classmethod def check(c, cls: Dataset) -> bool: """ - Check for being an unnamed multivalued vector class + Check for being an unnamed multivalued vector class that isn't VectorData """ return ( cls.name is None and cls.neurodata_type_def is None and cls.neurodata_type_inc + and cls.neurodata_type_inc != "VectorData" and cls.quantity in ("*", "+") ) diff --git a/nwb_linkml/src/nwb_linkml/adapters/group.py b/nwb_linkml/src/nwb_linkml/adapters/group.py index 3b75487..451cb4c 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/group.py +++ b/nwb_linkml/src/nwb_linkml/adapters/group.py @@ -2,7 +2,7 @@ Adapter for NWB groups to linkml Classes """ -from typing import Type +from typing import Type, List from linkml_runtime.linkml_model import SlotDefinition @@ -28,25 +28,13 @@ class GroupAdapter(ClassAdapter): Do the translation, yielding the BuildResult """ # Handle container groups with only * quantity unnamed groups - if len(self.cls.groups) > 0 and all( - [self._check_if_container(g) for g in self.cls.groups] + if ( + len(self.cls.groups) > 0 + and not self.cls.links + and all([self._check_if_container(g) for g in self.cls.groups]) ): # and \ # self.parent is not None: return self.handle_container_group(self.cls) - # Or you can have groups like /intervals where there are some named groups, and some unnamed - # but they all have the same type - elif ( - len(self.cls.groups) > 0 - and all( - [ - g.neurodata_type_inc == self.cls.groups[0].neurodata_type_inc - for g in self.cls.groups - ] - ) - and self.cls.groups[0].neurodata_type_inc is not None - and all([g.quantity in ("?", "*") for g in self.cls.groups]) - ): - return self.handle_container_group(self.cls) # handle if we are a terminal container group without making a new class if ( @@ -58,17 +46,42 @@ class GroupAdapter(ClassAdapter): return self.handle_container_slot(self.cls) nested_res = self.build_subclasses() + # add links + links = self.build_links() + # we don't propagate slots up to the next level since they are meant for this # level (ie. a way to refer to our children) - res = self.build_base(extra_attrs=nested_res.slots) + res = self.build_base(extra_attrs=nested_res.slots + links) # we do propagate classes tho res.classes.extend(nested_res.classes) return res + def build_links(self) -> List[SlotDefinition]: + """ + Build links specified in the ``links`` field as slots that refer to other + classes, with an additional annotation specifying that they are in fact links. + + Link slots can take either the object itself or the path to that object in the + file hierarchy as a string. + """ + if not self.cls.links: + return [] + + slots = [ + SlotDefinition( + name=link.name, + any_of=[{"range": link.target_type}, {"range": "string"}], + annotations=[{"tag": "source_type", "value": "link"}], + **QUANTITY_MAP[link.quantity], + ) + for link in self.cls.links + ] + return slots + def handle_container_group(self, cls: Group) -> BuildResult: """ - Make a special LinkML `children` slot that can + Make a special LinkML `value` slot that can have any number of the objects that are of `neurodata_type_inc` class Examples: @@ -84,14 +97,11 @@ class GroupAdapter(ClassAdapter): doc: Images objects containing images of presented stimuli. quantity: '*' - Args: - children (List[:class:`.Group`]): Child groups - """ # don't build subgroups as their own classes, just make a slot # that can contain them - name = cls.name if self.cls.name else "children" + name = cls.name if self.cls.name else "value" slot = SlotDefinition( name=name, diff --git a/nwb_linkml/tests/test_includes/test_hdmf.py b/nwb_linkml/tests/test_includes/test_hdmf.py index 26f5109..73f08ef 100644 --- a/nwb_linkml/tests/test_includes/test_hdmf.py +++ b/nwb_linkml/tests/test_includes/test_hdmf.py @@ -7,12 +7,12 @@ import pytest from nwb_linkml.models.pydantic.core.v2_7_0.namespace import ( ElectricalSeries, ElectrodeGroup, - NWBFileGeneralExtracellularEphysElectrodes, + ExtracellularEphysElectrodes, ) @pytest.fixture() -def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellularEphysElectrodes"]: +def electrical_series() -> Tuple["ElectricalSeries", "ExtracellularEphysElectrodes"]: """ Demo electrical series with adjoining electrodes """ @@ -27,7 +27,7 @@ def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellula ) # make electrodes tables - electrodes = NWBFileGeneralExtracellularEphysElectrodes( + electrodes = ExtracellularEphysElectrodes( id=np.arange(0, n_electrodes), x=np.arange(0, n_electrodes), y=np.arange(n_electrodes, n_electrodes * 2), diff --git a/scripts/generate_core.py b/scripts/generate_core.py index b447e00..7f86171 100644 --- a/scripts/generate_core.py +++ b/scripts/generate_core.py @@ -17,44 +17,53 @@ from nwb_linkml.providers import LinkMLProvider, PydanticProvider from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo from nwb_linkml.io import schema as io -def generate_core_yaml(output_path:Path, dry_run:bool=False, hdmf_only:bool=False): + +def generate_core_yaml(output_path: Path, dry_run: bool = False, hdmf_only: bool = False): """Just build the latest version of the core schema""" core = io.load_nwb_core(hdmf_only=hdmf_only) built_schemas = core.build().schemas for schema in built_schemas: - output_file = output_path / (schema.name + '.yaml') + output_file = output_path / (schema.name + ".yaml") if not dry_run: yaml_dumper.dump(schema, output_file) -def generate_core_pydantic(yaml_path:Path, output_path:Path, dry_run:bool=False): + +def generate_core_pydantic(yaml_path: Path, output_path: Path, dry_run: bool = False): """Just generate the latest version of the core schema""" - for schema in yaml_path.glob('*.yaml'): - python_name = schema.stem.replace('.', '_').replace('-', '_') - pydantic_file = (output_path / python_name).with_suffix('.py') + for schema in yaml_path.glob("*.yaml"): + python_name = schema.stem.replace(".", "_").replace("-", "_") + pydantic_file = (output_path / python_name).with_suffix(".py") generator = NWBPydanticGenerator( str(schema), - pydantic_version='2', + pydantic_version="2", emit_metadata=True, gen_classvars=True, - gen_slots=True + gen_slots=True, ) gen_pydantic = generator.serialize() if not dry_run: - with open(pydantic_file, 'w') as pfile: + with open(pydantic_file, "w") as pfile: pfile.write(gen_pydantic) -def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, repo:GitRepo=NWB_CORE_REPO, hdmf_only=False): + +def generate_versions( + yaml_path: Path, + pydantic_path: Path, + dry_run: bool = False, + repo: GitRepo = NWB_CORE_REPO, + hdmf_only=False, +): """ Generate linkml models for all versions """ - #repo.clone(force=True) + # repo.clone(force=True) repo.clone() # use a directory underneath this one as the temporary directory rather than # the default hidden one - tmp_dir = Path(__file__).parent / '__tmp__' + tmp_dir = Path(__file__).parent / "__tmp__" if tmp_dir.exists(): shutil.rmtree(tmp_dir) tmp_dir.mkdir() @@ -65,12 +74,14 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re failed_versions = {} overall_progress = Progress() - overall_task = overall_progress.add_task('All Versions', total=len(NWB_CORE_REPO.versions)) + overall_task = overall_progress.add_task("All Versions", total=len(NWB_CORE_REPO.versions)) build_progress = Progress( - TextColumn("[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}", - table_column=Column(ratio=1)), - BarColumn(table_column=Column(ratio=1), bar_width=None) + TextColumn( + "[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}", + table_column=Column(ratio=1), + ), + BarColumn(table_column=Column(ratio=1), bar_width=None), ) panel = Panel(Group(build_progress, overall_progress)) @@ -84,7 +95,9 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re # build linkml try: # check out the version (this should also refresh the hdmf-common schema) - linkml_task = build_progress.add_task('', name=version, action='Checkout Version', total=3) + linkml_task = build_progress.add_task( + "", name=version, action="Checkout Version", total=3 + ) repo.tag = version build_progress.update(linkml_task, advance=1, action="Load Namespaces") @@ -92,35 +105,36 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re core_ns = io.load_namespace_adapter(repo.namespace_file) if repo.namespace == NWB_CORE_REPO: # then the hdmf-common namespace - hdmf_common_ns = io.load_namespace_adapter(repo.temp_directory / 'hdmf-common-schema' / 'common' / 'namespace.yaml') + hdmf_common_ns = io.load_namespace_adapter( + repo.temp_directory / "hdmf-common-schema" / "common" / "namespace.yaml" + ) core_ns.imported.append(hdmf_common_ns) build_progress.update(linkml_task, advance=1, action="Build LinkML") - linkml_res = linkml_provider.build(core_ns) build_progress.update(linkml_task, advance=1, action="Built LinkML") # build pydantic ns_files = [res.namespace for res in linkml_res.values()] - pydantic_task = build_progress.add_task('', name=version, action='', total=len(ns_files)) + pydantic_task = build_progress.add_task( + "", name=version, action="", total=len(ns_files) + ) for schema in ns_files: - pbar_string = ' - '.join([schema.parts[-3], schema.parts[-2], schema.parts[-1]]) + pbar_string = schema.parts[-3] build_progress.update(pydantic_task, action=pbar_string) pydantic_provider.build(schema, versions=core_ns.versions, split=True) build_progress.update(pydantic_task, advance=1) - build_progress.update(pydantic_task, action='Built Pydantic') - - + build_progress.update(pydantic_task, action="Built Pydantic") except Exception as e: build_progress.stop_task(linkml_task) if linkml_task is not None: - build_progress.update(linkml_task, action='[bold red]LinkML Build Failed') + build_progress.update(linkml_task, action="[bold red]LinkML Build Failed") build_progress.stop_task(linkml_task) if pydantic_task is not None: - build_progress.update(pydantic_task, action='[bold red]LinkML Build Failed') + build_progress.update(pydantic_task, action="[bold red]LinkML Build Failed") build_progress.stop_task(pydantic_task) failed_versions[version] = traceback.format_exception(e) @@ -131,67 +145,66 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re if not dry_run: if hdmf_only: - shutil.rmtree(yaml_path / 'linkml' / 'hdmf_common') - shutil.rmtree(yaml_path / 'linkml' / 'hdmf_experimental') - shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_common') - shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_experimental') - shutil.move(tmp_dir / 'linkml' / 'hdmf_common', yaml_path / 'linkml') - shutil.move(tmp_dir / 'linkml' / 'hdmf_experimental', yaml_path / 'linkml') - shutil.move(tmp_dir / 'pydantic' / 'hdmf_common', pydantic_path / 'pydantic') - shutil.move(tmp_dir / 'pydantic' / 'hdmf_experimental', pydantic_path / 'pydantic') + shutil.rmtree(yaml_path / "linkml" / "hdmf_common") + shutil.rmtree(yaml_path / "linkml" / "hdmf_experimental") + shutil.rmtree(pydantic_path / "pydantic" / "hdmf_common") + shutil.rmtree(pydantic_path / "pydantic" / "hdmf_experimental") + shutil.move(tmp_dir / "linkml" / "hdmf_common", yaml_path / "linkml") + shutil.move(tmp_dir / "linkml" / "hdmf_experimental", yaml_path / "linkml") + shutil.move(tmp_dir / "pydantic" / "hdmf_common", pydantic_path / "pydantic") + shutil.move(tmp_dir / "pydantic" / "hdmf_experimental", pydantic_path / "pydantic") else: - shutil.rmtree(yaml_path / 'linkml') - shutil.rmtree(pydantic_path / 'pydantic') - shutil.move(tmp_dir / 'linkml', yaml_path) - shutil.move(tmp_dir / 'pydantic', pydantic_path) + shutil.rmtree(yaml_path / "linkml") + shutil.rmtree(pydantic_path / "pydantic") + shutil.move(tmp_dir / "linkml", yaml_path) + shutil.move(tmp_dir / "pydantic", pydantic_path) # import the most recent version of the schemaz we built - latest_version = sorted((pydantic_path / 'pydantic' / 'core').iterdir(), key=os.path.getmtime)[-1] + latest_version = sorted( + (pydantic_path / "pydantic" / "core").iterdir(), key=os.path.getmtime + )[-1] # make inits to use the schema! we don't usually do this in the # provider class because we directly import the files there. - with open(pydantic_path / 'pydantic' / '__init__.py', 'w') as initfile: - initfile.write(' ') + with open(pydantic_path / "pydantic" / "__init__.py", "w") as initfile: + initfile.write(" ") - with open(pydantic_path / '__init__.py', 'w') as initfile: - initfile.write(f'from .pydantic.core.{latest_version.name}.namespace import *') + with open(pydantic_path / "__init__.py", "w") as initfile: + initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *") finally: if len(failed_versions) > 0: - print('Failed Building Versions:') + print("Failed Building Versions:") print(failed_versions) - - def parser() -> ArgumentParser: - parser = ArgumentParser('Generate all available versions of NWB core schema') + parser = ArgumentParser("Generate all available versions of NWB core schema") parser.add_argument( - '--yaml', + "--yaml", help="directory to export linkML schema to", type=Path, - default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'schema' + default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "schema", ) parser.add_argument( - '--pydantic', + "--pydantic", help="directory to export pydantic models", type=Path, - default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models' + default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "models", ) + parser.add_argument("--hdmf", help="Only generate the HDMF namespaces", action="store_true") parser.add_argument( - '--hdmf', - help="Only generate the HDMF namespaces", - action="store_true" - ) - parser.add_argument( - '--latest', + "--latest", help="Only generate the latest version of the core schemas.", - action="store_true" + action="store_true", ) parser.add_argument( - '--dry-run', - help="Generate schema and pydantic models without moving them into the target directories, for testing purposes", - action='store_true' + "--dry-run", + help=( + "Generate schema and pydantic models without moving them into the target directories," + " for testing purposes" + ), + action="store_true", ) return parser @@ -212,12 +225,6 @@ def main(): else: generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf) + if __name__ == "__main__": main() - - - - - - -