unify naming of derived child groups, arrays, and values as just all being named "value". Add link building. Fix erroneously collapsing anonymous and named container groups.

This commit is contained in:
sneakers-the-rat 2024-08-05 16:05:44 -07:00
parent e72c860edd
commit b3b5b9d793
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
5 changed files with 156 additions and 105 deletions

View file

@ -289,8 +289,31 @@ When generating pydantic models we...
There are several different ways to create references between objects in nwb/hdmf: There are several different ways to create references between objects in nwb/hdmf:
- ... - [`links`](https://schema-language.readthedocs.io/en/latest/description.html#sec-link-spec) are group-level
properties that can reference other groups or datasets like this:
```yaml
links:
- name: Link name
doc: Required string with the description of the link
target_type: Type of target
quantity: Optional quantity identifier for the group (default=1).
```
- [Reference `dtype`](https://schema-language.readthedocs.io/en/latest/description.html#reference-dtype)s are
dataset, and attribute-level properties that can reference both other objects and regions within other objects:
```yaml
dtype:
target_type: ElectrodeGroup
reftype: object
```
- Implicitly, hdmf creates references between objects according to some naming conventions, eg.
an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
object `mydata`.
- There is currrently a note in the schema language docs that there will be an additional
[Relationships](https://schema-language.readthedocs.io/en/latest/description.html#relationships) system
that explicitly models relationships, but it is unclear how that would be different than references.
We represent all of these by just directly referring to the object type, preserving the source type
in an annotation, when necessary.
## LinkML to Everything ## LinkML to Everything

View file

@ -216,8 +216,8 @@ class MapListlike(DatasetMap):
Used exactly once in the core schema, in ``ImageReferences`` - Used exactly once in the core schema, in ``ImageReferences`` -
an array of references to other ``Image`` datasets. We ignore the an array of references to other ``Image`` datasets. We ignore the
usual array structure and unnest the implicit array into a slot names from the usual array structure and unnest the implicit array into a slot named "value"
target type rather than the oddly-named ``num_images`` dimension so that rather than the oddly-named ``num_images`` dimension so that
ultimately in the pydantic model we get a nicely behaved single-level list. ultimately in the pydantic model we get a nicely behaved single-level list.
Examples: Examples:
@ -245,7 +245,7 @@ class MapListlike(DatasetMap):
name: name name: name
range: string range: string
required: true required: true
image: value:
name: image name: image
description: Ordered dataset of references to Image objects. description: Ordered dataset of references to Image objects.
multivalued: true multivalued: true
@ -286,15 +286,15 @@ class MapListlike(DatasetMap):
""" """
Map to a list of the given class Map to a list of the given class
""" """
dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype))
slot = SlotDefinition( slot = SlotDefinition(
name=dtype, name="value",
multivalued=True, multivalued=True,
range=ClassAdapter.handle_dtype(cls.dtype), range=ClassAdapter.handle_dtype(cls.dtype),
description=cls.doc, description=cls.doc,
required=cls.quantity not in ("*", "?"), required=cls.quantity not in ("*", "?"),
annotations=[{"source_type": "reference"}],
) )
res.classes[0].attributes[dtype] = slot res.classes[0].attributes["value"] = slot
return res return res
@ -533,9 +533,9 @@ class MapArrayLikeAttributes(DatasetMap):
expressions = array_adapter.make_slot() expressions = array_adapter.make_slot()
# make a slot for the arraylike class # make a slot for the arraylike class
array_slot = SlotDefinition( array_slot = SlotDefinition(
name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions name="value", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
) )
res.classes[0].attributes.update({"array": array_slot}) res.classes[0].attributes.update({"value": array_slot})
return res return res
@ -572,7 +572,7 @@ class MapClassRange(DatasetMap):
name=cls.name, name=cls.name,
description=cls.doc, description=cls.doc,
range=f"{cls.neurodata_type_inc}", range=f"{cls.neurodata_type_inc}",
annotations=[{"named": True}], annotations=[{"named": True}, {"source_type": "neurodata_type_inc"}],
**QUANTITY_MAP[cls.quantity], **QUANTITY_MAP[cls.quantity],
) )
res = BuildResult(slots=[this_slot]) res = BuildResult(slots=[this_slot])
@ -686,17 +686,28 @@ class MapNVectors(DatasetMap):
Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
arbitrary columns. arbitrary columns.
Used twice:
- Images
- DynamicTable (and all its uses)
DynamicTable (and the slot VectorData where this is called for)
is handled specially and just dropped, because we handle the possibility for
arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes.
So really this is just a handler for the `Images` case
""" """
@classmethod @classmethod
def check(c, cls: Dataset) -> bool: def check(c, cls: Dataset) -> bool:
""" """
Check for being an unnamed multivalued vector class Check for being an unnamed multivalued vector class that isn't VectorData
""" """
return ( return (
cls.name is None cls.name is None
and cls.neurodata_type_def is None and cls.neurodata_type_def is None
and cls.neurodata_type_inc and cls.neurodata_type_inc
and cls.neurodata_type_inc != "VectorData"
and cls.quantity in ("*", "+") and cls.quantity in ("*", "+")
) )

View file

@ -2,7 +2,7 @@
Adapter for NWB groups to linkml Classes Adapter for NWB groups to linkml Classes
""" """
from typing import Type from typing import Type, List
from linkml_runtime.linkml_model import SlotDefinition from linkml_runtime.linkml_model import SlotDefinition
@ -28,25 +28,13 @@ class GroupAdapter(ClassAdapter):
Do the translation, yielding the BuildResult Do the translation, yielding the BuildResult
""" """
# Handle container groups with only * quantity unnamed groups # Handle container groups with only * quantity unnamed groups
if len(self.cls.groups) > 0 and all( if (
[self._check_if_container(g) for g in self.cls.groups] len(self.cls.groups) > 0
and not self.cls.links
and all([self._check_if_container(g) for g in self.cls.groups])
): # and \ ): # and \
# self.parent is not None: # self.parent is not None:
return self.handle_container_group(self.cls) return self.handle_container_group(self.cls)
# Or you can have groups like /intervals where there are some named groups, and some unnamed
# but they all have the same type
elif (
len(self.cls.groups) > 0
and all(
[
g.neurodata_type_inc == self.cls.groups[0].neurodata_type_inc
for g in self.cls.groups
]
)
and self.cls.groups[0].neurodata_type_inc is not None
and all([g.quantity in ("?", "*") for g in self.cls.groups])
):
return self.handle_container_group(self.cls)
# handle if we are a terminal container group without making a new class # handle if we are a terminal container group without making a new class
if ( if (
@ -58,17 +46,42 @@ class GroupAdapter(ClassAdapter):
return self.handle_container_slot(self.cls) return self.handle_container_slot(self.cls)
nested_res = self.build_subclasses() nested_res = self.build_subclasses()
# add links
links = self.build_links()
# we don't propagate slots up to the next level since they are meant for this # we don't propagate slots up to the next level since they are meant for this
# level (ie. a way to refer to our children) # level (ie. a way to refer to our children)
res = self.build_base(extra_attrs=nested_res.slots) res = self.build_base(extra_attrs=nested_res.slots + links)
# we do propagate classes tho # we do propagate classes tho
res.classes.extend(nested_res.classes) res.classes.extend(nested_res.classes)
return res return res
def build_links(self) -> List[SlotDefinition]:
"""
Build links specified in the ``links`` field as slots that refer to other
classes, with an additional annotation specifying that they are in fact links.
Link slots can take either the object itself or the path to that object in the
file hierarchy as a string.
"""
if not self.cls.links:
return []
slots = [
SlotDefinition(
name=link.name,
any_of=[{"range": link.target_type}, {"range": "string"}],
annotations=[{"tag": "source_type", "value": "link"}],
**QUANTITY_MAP[link.quantity],
)
for link in self.cls.links
]
return slots
def handle_container_group(self, cls: Group) -> BuildResult: def handle_container_group(self, cls: Group) -> BuildResult:
""" """
Make a special LinkML `children` slot that can Make a special LinkML `value` slot that can
have any number of the objects that are of `neurodata_type_inc` class have any number of the objects that are of `neurodata_type_inc` class
Examples: Examples:
@ -84,14 +97,11 @@ class GroupAdapter(ClassAdapter):
doc: Images objects containing images of presented stimuli. doc: Images objects containing images of presented stimuli.
quantity: '*' quantity: '*'
Args:
children (List[:class:`.Group`]): Child groups
""" """
# don't build subgroups as their own classes, just make a slot # don't build subgroups as their own classes, just make a slot
# that can contain them # that can contain them
name = cls.name if self.cls.name else "children" name = cls.name if self.cls.name else "value"
slot = SlotDefinition( slot = SlotDefinition(
name=name, name=name,

View file

@ -7,12 +7,12 @@ import pytest
from nwb_linkml.models.pydantic.core.v2_7_0.namespace import ( from nwb_linkml.models.pydantic.core.v2_7_0.namespace import (
ElectricalSeries, ElectricalSeries,
ElectrodeGroup, ElectrodeGroup,
NWBFileGeneralExtracellularEphysElectrodes, ExtracellularEphysElectrodes,
) )
@pytest.fixture() @pytest.fixture()
def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellularEphysElectrodes"]: def electrical_series() -> Tuple["ElectricalSeries", "ExtracellularEphysElectrodes"]:
""" """
Demo electrical series with adjoining electrodes Demo electrical series with adjoining electrodes
""" """
@ -27,7 +27,7 @@ def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellula
) )
# make electrodes tables # make electrodes tables
electrodes = NWBFileGeneralExtracellularEphysElectrodes( electrodes = ExtracellularEphysElectrodes(
id=np.arange(0, n_electrodes), id=np.arange(0, n_electrodes),
x=np.arange(0, n_electrodes), x=np.arange(0, n_electrodes),
y=np.arange(n_electrodes, n_electrodes * 2), y=np.arange(n_electrodes, n_electrodes * 2),

View file

@ -17,44 +17,53 @@ from nwb_linkml.providers import LinkMLProvider, PydanticProvider
from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo
from nwb_linkml.io import schema as io from nwb_linkml.io import schema as io
def generate_core_yaml(output_path:Path, dry_run:bool=False, hdmf_only:bool=False):
def generate_core_yaml(output_path: Path, dry_run: bool = False, hdmf_only: bool = False):
"""Just build the latest version of the core schema""" """Just build the latest version of the core schema"""
core = io.load_nwb_core(hdmf_only=hdmf_only) core = io.load_nwb_core(hdmf_only=hdmf_only)
built_schemas = core.build().schemas built_schemas = core.build().schemas
for schema in built_schemas: for schema in built_schemas:
output_file = output_path / (schema.name + '.yaml') output_file = output_path / (schema.name + ".yaml")
if not dry_run: if not dry_run:
yaml_dumper.dump(schema, output_file) yaml_dumper.dump(schema, output_file)
def generate_core_pydantic(yaml_path:Path, output_path:Path, dry_run:bool=False):
def generate_core_pydantic(yaml_path: Path, output_path: Path, dry_run: bool = False):
"""Just generate the latest version of the core schema""" """Just generate the latest version of the core schema"""
for schema in yaml_path.glob('*.yaml'): for schema in yaml_path.glob("*.yaml"):
python_name = schema.stem.replace('.', '_').replace('-', '_') python_name = schema.stem.replace(".", "_").replace("-", "_")
pydantic_file = (output_path / python_name).with_suffix('.py') pydantic_file = (output_path / python_name).with_suffix(".py")
generator = NWBPydanticGenerator( generator = NWBPydanticGenerator(
str(schema), str(schema),
pydantic_version='2', pydantic_version="2",
emit_metadata=True, emit_metadata=True,
gen_classvars=True, gen_classvars=True,
gen_slots=True gen_slots=True,
) )
gen_pydantic = generator.serialize() gen_pydantic = generator.serialize()
if not dry_run: if not dry_run:
with open(pydantic_file, 'w') as pfile: with open(pydantic_file, "w") as pfile:
pfile.write(gen_pydantic) pfile.write(gen_pydantic)
def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, repo:GitRepo=NWB_CORE_REPO, hdmf_only=False):
def generate_versions(
yaml_path: Path,
pydantic_path: Path,
dry_run: bool = False,
repo: GitRepo = NWB_CORE_REPO,
hdmf_only=False,
):
""" """
Generate linkml models for all versions Generate linkml models for all versions
""" """
#repo.clone(force=True) # repo.clone(force=True)
repo.clone() repo.clone()
# use a directory underneath this one as the temporary directory rather than # use a directory underneath this one as the temporary directory rather than
# the default hidden one # the default hidden one
tmp_dir = Path(__file__).parent / '__tmp__' tmp_dir = Path(__file__).parent / "__tmp__"
if tmp_dir.exists(): if tmp_dir.exists():
shutil.rmtree(tmp_dir) shutil.rmtree(tmp_dir)
tmp_dir.mkdir() tmp_dir.mkdir()
@ -65,12 +74,14 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
failed_versions = {} failed_versions = {}
overall_progress = Progress() overall_progress = Progress()
overall_task = overall_progress.add_task('All Versions', total=len(NWB_CORE_REPO.versions)) overall_task = overall_progress.add_task("All Versions", total=len(NWB_CORE_REPO.versions))
build_progress = Progress( build_progress = Progress(
TextColumn("[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}", TextColumn(
table_column=Column(ratio=1)), "[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
BarColumn(table_column=Column(ratio=1), bar_width=None) table_column=Column(ratio=1),
),
BarColumn(table_column=Column(ratio=1), bar_width=None),
) )
panel = Panel(Group(build_progress, overall_progress)) panel = Panel(Group(build_progress, overall_progress))
@ -84,7 +95,9 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
# build linkml # build linkml
try: try:
# check out the version (this should also refresh the hdmf-common schema) # check out the version (this should also refresh the hdmf-common schema)
linkml_task = build_progress.add_task('', name=version, action='Checkout Version', total=3) linkml_task = build_progress.add_task(
"", name=version, action="Checkout Version", total=3
)
repo.tag = version repo.tag = version
build_progress.update(linkml_task, advance=1, action="Load Namespaces") build_progress.update(linkml_task, advance=1, action="Load Namespaces")
@ -92,35 +105,36 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
core_ns = io.load_namespace_adapter(repo.namespace_file) core_ns = io.load_namespace_adapter(repo.namespace_file)
if repo.namespace == NWB_CORE_REPO: if repo.namespace == NWB_CORE_REPO:
# then the hdmf-common namespace # then the hdmf-common namespace
hdmf_common_ns = io.load_namespace_adapter(repo.temp_directory / 'hdmf-common-schema' / 'common' / 'namespace.yaml') hdmf_common_ns = io.load_namespace_adapter(
repo.temp_directory / "hdmf-common-schema" / "common" / "namespace.yaml"
)
core_ns.imported.append(hdmf_common_ns) core_ns.imported.append(hdmf_common_ns)
build_progress.update(linkml_task, advance=1, action="Build LinkML") build_progress.update(linkml_task, advance=1, action="Build LinkML")
linkml_res = linkml_provider.build(core_ns) linkml_res = linkml_provider.build(core_ns)
build_progress.update(linkml_task, advance=1, action="Built LinkML") build_progress.update(linkml_task, advance=1, action="Built LinkML")
# build pydantic # build pydantic
ns_files = [res.namespace for res in linkml_res.values()] ns_files = [res.namespace for res in linkml_res.values()]
pydantic_task = build_progress.add_task('', name=version, action='', total=len(ns_files)) pydantic_task = build_progress.add_task(
"", name=version, action="", total=len(ns_files)
)
for schema in ns_files: for schema in ns_files:
pbar_string = ' - '.join([schema.parts[-3], schema.parts[-2], schema.parts[-1]]) pbar_string = schema.parts[-3]
build_progress.update(pydantic_task, action=pbar_string) build_progress.update(pydantic_task, action=pbar_string)
pydantic_provider.build(schema, versions=core_ns.versions, split=True) pydantic_provider.build(schema, versions=core_ns.versions, split=True)
build_progress.update(pydantic_task, advance=1) build_progress.update(pydantic_task, advance=1)
build_progress.update(pydantic_task, action='Built Pydantic') build_progress.update(pydantic_task, action="Built Pydantic")
except Exception as e: except Exception as e:
build_progress.stop_task(linkml_task) build_progress.stop_task(linkml_task)
if linkml_task is not None: if linkml_task is not None:
build_progress.update(linkml_task, action='[bold red]LinkML Build Failed') build_progress.update(linkml_task, action="[bold red]LinkML Build Failed")
build_progress.stop_task(linkml_task) build_progress.stop_task(linkml_task)
if pydantic_task is not None: if pydantic_task is not None:
build_progress.update(pydantic_task, action='[bold red]LinkML Build Failed') build_progress.update(pydantic_task, action="[bold red]LinkML Build Failed")
build_progress.stop_task(pydantic_task) build_progress.stop_task(pydantic_task)
failed_versions[version] = traceback.format_exception(e) failed_versions[version] = traceback.format_exception(e)
@ -131,67 +145,66 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
if not dry_run: if not dry_run:
if hdmf_only: if hdmf_only:
shutil.rmtree(yaml_path / 'linkml' / 'hdmf_common') shutil.rmtree(yaml_path / "linkml" / "hdmf_common")
shutil.rmtree(yaml_path / 'linkml' / 'hdmf_experimental') shutil.rmtree(yaml_path / "linkml" / "hdmf_experimental")
shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_common') shutil.rmtree(pydantic_path / "pydantic" / "hdmf_common")
shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_experimental') shutil.rmtree(pydantic_path / "pydantic" / "hdmf_experimental")
shutil.move(tmp_dir / 'linkml' / 'hdmf_common', yaml_path / 'linkml') shutil.move(tmp_dir / "linkml" / "hdmf_common", yaml_path / "linkml")
shutil.move(tmp_dir / 'linkml' / 'hdmf_experimental', yaml_path / 'linkml') shutil.move(tmp_dir / "linkml" / "hdmf_experimental", yaml_path / "linkml")
shutil.move(tmp_dir / 'pydantic' / 'hdmf_common', pydantic_path / 'pydantic') shutil.move(tmp_dir / "pydantic" / "hdmf_common", pydantic_path / "pydantic")
shutil.move(tmp_dir / 'pydantic' / 'hdmf_experimental', pydantic_path / 'pydantic') shutil.move(tmp_dir / "pydantic" / "hdmf_experimental", pydantic_path / "pydantic")
else: else:
shutil.rmtree(yaml_path / 'linkml') shutil.rmtree(yaml_path / "linkml")
shutil.rmtree(pydantic_path / 'pydantic') shutil.rmtree(pydantic_path / "pydantic")
shutil.move(tmp_dir / 'linkml', yaml_path) shutil.move(tmp_dir / "linkml", yaml_path)
shutil.move(tmp_dir / 'pydantic', pydantic_path) shutil.move(tmp_dir / "pydantic", pydantic_path)
# import the most recent version of the schemaz we built # import the most recent version of the schemaz we built
latest_version = sorted((pydantic_path / 'pydantic' / 'core').iterdir(), key=os.path.getmtime)[-1] latest_version = sorted(
(pydantic_path / "pydantic" / "core").iterdir(), key=os.path.getmtime
)[-1]
# make inits to use the schema! we don't usually do this in the # make inits to use the schema! we don't usually do this in the
# provider class because we directly import the files there. # provider class because we directly import the files there.
with open(pydantic_path / 'pydantic' / '__init__.py', 'w') as initfile: with open(pydantic_path / "pydantic" / "__init__.py", "w") as initfile:
initfile.write(' ') initfile.write(" ")
with open(pydantic_path / '__init__.py', 'w') as initfile: with open(pydantic_path / "__init__.py", "w") as initfile:
initfile.write(f'from .pydantic.core.{latest_version.name}.namespace import *') initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")
finally: finally:
if len(failed_versions) > 0: if len(failed_versions) > 0:
print('Failed Building Versions:') print("Failed Building Versions:")
print(failed_versions) print(failed_versions)
def parser() -> ArgumentParser: def parser() -> ArgumentParser:
parser = ArgumentParser('Generate all available versions of NWB core schema') parser = ArgumentParser("Generate all available versions of NWB core schema")
parser.add_argument( parser.add_argument(
'--yaml', "--yaml",
help="directory to export linkML schema to", help="directory to export linkML schema to",
type=Path, type=Path,
default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'schema' default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "schema",
) )
parser.add_argument( parser.add_argument(
'--pydantic', "--pydantic",
help="directory to export pydantic models", help="directory to export pydantic models",
type=Path, type=Path,
default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models' default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "models",
) )
parser.add_argument("--hdmf", help="Only generate the HDMF namespaces", action="store_true")
parser.add_argument( parser.add_argument(
'--hdmf', "--latest",
help="Only generate the HDMF namespaces",
action="store_true"
)
parser.add_argument(
'--latest',
help="Only generate the latest version of the core schemas.", help="Only generate the latest version of the core schemas.",
action="store_true" action="store_true",
) )
parser.add_argument( parser.add_argument(
'--dry-run', "--dry-run",
help="Generate schema and pydantic models without moving them into the target directories, for testing purposes", help=(
action='store_true' "Generate schema and pydantic models without moving them into the target directories,"
" for testing purposes"
),
action="store_true",
) )
return parser return parser
@ -212,12 +225,6 @@ def main():
else: else:
generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf) generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf)
if __name__ == "__main__": if __name__ == "__main__":
main() main()