unify naming of derived child groups, arrays, and values as just all being named "value". Add link building. Fix erroneously collapsing anonymous and named container groups.

This commit is contained in:
sneakers-the-rat 2024-08-05 16:05:44 -07:00
parent e72c860edd
commit b3b5b9d793
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
5 changed files with 156 additions and 105 deletions

View file

@ -289,8 +289,31 @@ When generating pydantic models we...
There are several different ways to create references between objects in nwb/hdmf:
- ...
- [`links`](https://schema-language.readthedocs.io/en/latest/description.html#sec-link-spec) are group-level
properties that can reference other groups or datasets like this:
```yaml
links:
- name: Link name
doc: Required string with the description of the link
target_type: Type of target
quantity: Optional quantity identifier for the group (default=1).
```
- [Reference `dtype`](https://schema-language.readthedocs.io/en/latest/description.html#reference-dtype)s are
dataset, and attribute-level properties that can reference both other objects and regions within other objects:
```yaml
dtype:
target_type: ElectrodeGroup
reftype: object
```
- Implicitly, hdmf creates references between objects according to some naming conventions, eg.
an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
object `mydata`.
- There is currrently a note in the schema language docs that there will be an additional
[Relationships](https://schema-language.readthedocs.io/en/latest/description.html#relationships) system
that explicitly models relationships, but it is unclear how that would be different than references.
We represent all of these by just directly referring to the object type, preserving the source type
in an annotation, when necessary.
## LinkML to Everything

View file

@ -216,8 +216,8 @@ class MapListlike(DatasetMap):
Used exactly once in the core schema, in ``ImageReferences`` -
an array of references to other ``Image`` datasets. We ignore the
usual array structure and unnest the implicit array into a slot names from the
target type rather than the oddly-named ``num_images`` dimension so that
usual array structure and unnest the implicit array into a slot named "value"
rather than the oddly-named ``num_images`` dimension so that
ultimately in the pydantic model we get a nicely behaved single-level list.
Examples:
@ -245,7 +245,7 @@ class MapListlike(DatasetMap):
name: name
range: string
required: true
image:
value:
name: image
description: Ordered dataset of references to Image objects.
multivalued: true
@ -286,15 +286,15 @@ class MapListlike(DatasetMap):
"""
Map to a list of the given class
"""
dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype))
slot = SlotDefinition(
name=dtype,
name="value",
multivalued=True,
range=ClassAdapter.handle_dtype(cls.dtype),
description=cls.doc,
required=cls.quantity not in ("*", "?"),
annotations=[{"source_type": "reference"}],
)
res.classes[0].attributes[dtype] = slot
res.classes[0].attributes["value"] = slot
return res
@ -533,9 +533,9 @@ class MapArrayLikeAttributes(DatasetMap):
expressions = array_adapter.make_slot()
# make a slot for the arraylike class
array_slot = SlotDefinition(
name="array", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
name="value", range=ClassAdapter.handle_dtype(cls.dtype), **expressions
)
res.classes[0].attributes.update({"array": array_slot})
res.classes[0].attributes.update({"value": array_slot})
return res
@ -572,7 +572,7 @@ class MapClassRange(DatasetMap):
name=cls.name,
description=cls.doc,
range=f"{cls.neurodata_type_inc}",
annotations=[{"named": True}],
annotations=[{"named": True}, {"source_type": "neurodata_type_inc"}],
**QUANTITY_MAP[cls.quantity],
)
res = BuildResult(slots=[this_slot])
@ -686,17 +686,28 @@ class MapNVectors(DatasetMap):
Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
arbitrary columns.
Used twice:
- Images
- DynamicTable (and all its uses)
DynamicTable (and the slot VectorData where this is called for)
is handled specially and just dropped, because we handle the possibility for
arbitrary extra VectorData in the :mod:`nwb_linkml.includes.hdmf` module mixin classes.
So really this is just a handler for the `Images` case
"""
@classmethod
def check(c, cls: Dataset) -> bool:
"""
Check for being an unnamed multivalued vector class
Check for being an unnamed multivalued vector class that isn't VectorData
"""
return (
cls.name is None
and cls.neurodata_type_def is None
and cls.neurodata_type_inc
and cls.neurodata_type_inc != "VectorData"
and cls.quantity in ("*", "+")
)

View file

@ -2,7 +2,7 @@
Adapter for NWB groups to linkml Classes
"""
from typing import Type
from typing import Type, List
from linkml_runtime.linkml_model import SlotDefinition
@ -28,25 +28,13 @@ class GroupAdapter(ClassAdapter):
Do the translation, yielding the BuildResult
"""
# Handle container groups with only * quantity unnamed groups
if len(self.cls.groups) > 0 and all(
[self._check_if_container(g) for g in self.cls.groups]
if (
len(self.cls.groups) > 0
and not self.cls.links
and all([self._check_if_container(g) for g in self.cls.groups])
): # and \
# self.parent is not None:
return self.handle_container_group(self.cls)
# Or you can have groups like /intervals where there are some named groups, and some unnamed
# but they all have the same type
elif (
len(self.cls.groups) > 0
and all(
[
g.neurodata_type_inc == self.cls.groups[0].neurodata_type_inc
for g in self.cls.groups
]
)
and self.cls.groups[0].neurodata_type_inc is not None
and all([g.quantity in ("?", "*") for g in self.cls.groups])
):
return self.handle_container_group(self.cls)
# handle if we are a terminal container group without making a new class
if (
@ -58,17 +46,42 @@ class GroupAdapter(ClassAdapter):
return self.handle_container_slot(self.cls)
nested_res = self.build_subclasses()
# add links
links = self.build_links()
# we don't propagate slots up to the next level since they are meant for this
# level (ie. a way to refer to our children)
res = self.build_base(extra_attrs=nested_res.slots)
res = self.build_base(extra_attrs=nested_res.slots + links)
# we do propagate classes tho
res.classes.extend(nested_res.classes)
return res
def build_links(self) -> List[SlotDefinition]:
"""
Build links specified in the ``links`` field as slots that refer to other
classes, with an additional annotation specifying that they are in fact links.
Link slots can take either the object itself or the path to that object in the
file hierarchy as a string.
"""
if not self.cls.links:
return []
slots = [
SlotDefinition(
name=link.name,
any_of=[{"range": link.target_type}, {"range": "string"}],
annotations=[{"tag": "source_type", "value": "link"}],
**QUANTITY_MAP[link.quantity],
)
for link in self.cls.links
]
return slots
def handle_container_group(self, cls: Group) -> BuildResult:
"""
Make a special LinkML `children` slot that can
Make a special LinkML `value` slot that can
have any number of the objects that are of `neurodata_type_inc` class
Examples:
@ -84,14 +97,11 @@ class GroupAdapter(ClassAdapter):
doc: Images objects containing images of presented stimuli.
quantity: '*'
Args:
children (List[:class:`.Group`]): Child groups
"""
# don't build subgroups as their own classes, just make a slot
# that can contain them
name = cls.name if self.cls.name else "children"
name = cls.name if self.cls.name else "value"
slot = SlotDefinition(
name=name,

View file

@ -7,12 +7,12 @@ import pytest
from nwb_linkml.models.pydantic.core.v2_7_0.namespace import (
ElectricalSeries,
ElectrodeGroup,
NWBFileGeneralExtracellularEphysElectrodes,
ExtracellularEphysElectrodes,
)
@pytest.fixture()
def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellularEphysElectrodes"]:
def electrical_series() -> Tuple["ElectricalSeries", "ExtracellularEphysElectrodes"]:
"""
Demo electrical series with adjoining electrodes
"""
@ -27,7 +27,7 @@ def electrical_series() -> Tuple["ElectricalSeries", "NWBFileGeneralExtracellula
)
# make electrodes tables
electrodes = NWBFileGeneralExtracellularEphysElectrodes(
electrodes = ExtracellularEphysElectrodes(
id=np.arange(0, n_electrodes),
x=np.arange(0, n_electrodes),
y=np.arange(n_electrodes, n_electrodes * 2),

View file

@ -17,44 +17,53 @@ from nwb_linkml.providers import LinkMLProvider, PydanticProvider
from nwb_linkml.providers.git import NWB_CORE_REPO, HDMF_COMMON_REPO, GitRepo
from nwb_linkml.io import schema as io
def generate_core_yaml(output_path:Path, dry_run:bool=False, hdmf_only:bool=False):
def generate_core_yaml(output_path: Path, dry_run: bool = False, hdmf_only: bool = False):
"""Just build the latest version of the core schema"""
core = io.load_nwb_core(hdmf_only=hdmf_only)
built_schemas = core.build().schemas
for schema in built_schemas:
output_file = output_path / (schema.name + '.yaml')
output_file = output_path / (schema.name + ".yaml")
if not dry_run:
yaml_dumper.dump(schema, output_file)
def generate_core_pydantic(yaml_path:Path, output_path:Path, dry_run:bool=False):
def generate_core_pydantic(yaml_path: Path, output_path: Path, dry_run: bool = False):
"""Just generate the latest version of the core schema"""
for schema in yaml_path.glob('*.yaml'):
python_name = schema.stem.replace('.', '_').replace('-', '_')
pydantic_file = (output_path / python_name).with_suffix('.py')
for schema in yaml_path.glob("*.yaml"):
python_name = schema.stem.replace(".", "_").replace("-", "_")
pydantic_file = (output_path / python_name).with_suffix(".py")
generator = NWBPydanticGenerator(
str(schema),
pydantic_version='2',
pydantic_version="2",
emit_metadata=True,
gen_classvars=True,
gen_slots=True
gen_slots=True,
)
gen_pydantic = generator.serialize()
if not dry_run:
with open(pydantic_file, 'w') as pfile:
with open(pydantic_file, "w") as pfile:
pfile.write(gen_pydantic)
def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, repo:GitRepo=NWB_CORE_REPO, hdmf_only=False):
def generate_versions(
yaml_path: Path,
pydantic_path: Path,
dry_run: bool = False,
repo: GitRepo = NWB_CORE_REPO,
hdmf_only=False,
):
"""
Generate linkml models for all versions
"""
#repo.clone(force=True)
# repo.clone(force=True)
repo.clone()
# use a directory underneath this one as the temporary directory rather than
# the default hidden one
tmp_dir = Path(__file__).parent / '__tmp__'
tmp_dir = Path(__file__).parent / "__tmp__"
if tmp_dir.exists():
shutil.rmtree(tmp_dir)
tmp_dir.mkdir()
@ -65,12 +74,14 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
failed_versions = {}
overall_progress = Progress()
overall_task = overall_progress.add_task('All Versions', total=len(NWB_CORE_REPO.versions))
overall_task = overall_progress.add_task("All Versions", total=len(NWB_CORE_REPO.versions))
build_progress = Progress(
TextColumn("[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
table_column=Column(ratio=1)),
BarColumn(table_column=Column(ratio=1), bar_width=None)
TextColumn(
"[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
table_column=Column(ratio=1),
),
BarColumn(table_column=Column(ratio=1), bar_width=None),
)
panel = Panel(Group(build_progress, overall_progress))
@ -84,7 +95,9 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
# build linkml
try:
# check out the version (this should also refresh the hdmf-common schema)
linkml_task = build_progress.add_task('', name=version, action='Checkout Version', total=3)
linkml_task = build_progress.add_task(
"", name=version, action="Checkout Version", total=3
)
repo.tag = version
build_progress.update(linkml_task, advance=1, action="Load Namespaces")
@ -92,35 +105,36 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
core_ns = io.load_namespace_adapter(repo.namespace_file)
if repo.namespace == NWB_CORE_REPO:
# then the hdmf-common namespace
hdmf_common_ns = io.load_namespace_adapter(repo.temp_directory / 'hdmf-common-schema' / 'common' / 'namespace.yaml')
hdmf_common_ns = io.load_namespace_adapter(
repo.temp_directory / "hdmf-common-schema" / "common" / "namespace.yaml"
)
core_ns.imported.append(hdmf_common_ns)
build_progress.update(linkml_task, advance=1, action="Build LinkML")
linkml_res = linkml_provider.build(core_ns)
build_progress.update(linkml_task, advance=1, action="Built LinkML")
# build pydantic
ns_files = [res.namespace for res in linkml_res.values()]
pydantic_task = build_progress.add_task('', name=version, action='', total=len(ns_files))
pydantic_task = build_progress.add_task(
"", name=version, action="", total=len(ns_files)
)
for schema in ns_files:
pbar_string = ' - '.join([schema.parts[-3], schema.parts[-2], schema.parts[-1]])
pbar_string = schema.parts[-3]
build_progress.update(pydantic_task, action=pbar_string)
pydantic_provider.build(schema, versions=core_ns.versions, split=True)
build_progress.update(pydantic_task, advance=1)
build_progress.update(pydantic_task, action='Built Pydantic')
build_progress.update(pydantic_task, action="Built Pydantic")
except Exception as e:
build_progress.stop_task(linkml_task)
if linkml_task is not None:
build_progress.update(linkml_task, action='[bold red]LinkML Build Failed')
build_progress.update(linkml_task, action="[bold red]LinkML Build Failed")
build_progress.stop_task(linkml_task)
if pydantic_task is not None:
build_progress.update(pydantic_task, action='[bold red]LinkML Build Failed')
build_progress.update(pydantic_task, action="[bold red]LinkML Build Failed")
build_progress.stop_task(pydantic_task)
failed_versions[version] = traceback.format_exception(e)
@ -131,67 +145,66 @@ def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False, re
if not dry_run:
if hdmf_only:
shutil.rmtree(yaml_path / 'linkml' / 'hdmf_common')
shutil.rmtree(yaml_path / 'linkml' / 'hdmf_experimental')
shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_common')
shutil.rmtree(pydantic_path / 'pydantic' / 'hdmf_experimental')
shutil.move(tmp_dir / 'linkml' / 'hdmf_common', yaml_path / 'linkml')
shutil.move(tmp_dir / 'linkml' / 'hdmf_experimental', yaml_path / 'linkml')
shutil.move(tmp_dir / 'pydantic' / 'hdmf_common', pydantic_path / 'pydantic')
shutil.move(tmp_dir / 'pydantic' / 'hdmf_experimental', pydantic_path / 'pydantic')
shutil.rmtree(yaml_path / "linkml" / "hdmf_common")
shutil.rmtree(yaml_path / "linkml" / "hdmf_experimental")
shutil.rmtree(pydantic_path / "pydantic" / "hdmf_common")
shutil.rmtree(pydantic_path / "pydantic" / "hdmf_experimental")
shutil.move(tmp_dir / "linkml" / "hdmf_common", yaml_path / "linkml")
shutil.move(tmp_dir / "linkml" / "hdmf_experimental", yaml_path / "linkml")
shutil.move(tmp_dir / "pydantic" / "hdmf_common", pydantic_path / "pydantic")
shutil.move(tmp_dir / "pydantic" / "hdmf_experimental", pydantic_path / "pydantic")
else:
shutil.rmtree(yaml_path / 'linkml')
shutil.rmtree(pydantic_path / 'pydantic')
shutil.move(tmp_dir / 'linkml', yaml_path)
shutil.move(tmp_dir / 'pydantic', pydantic_path)
shutil.rmtree(yaml_path / "linkml")
shutil.rmtree(pydantic_path / "pydantic")
shutil.move(tmp_dir / "linkml", yaml_path)
shutil.move(tmp_dir / "pydantic", pydantic_path)
# import the most recent version of the schemaz we built
latest_version = sorted((pydantic_path / 'pydantic' / 'core').iterdir(), key=os.path.getmtime)[-1]
latest_version = sorted(
(pydantic_path / "pydantic" / "core").iterdir(), key=os.path.getmtime
)[-1]
# make inits to use the schema! we don't usually do this in the
# provider class because we directly import the files there.
with open(pydantic_path / 'pydantic' / '__init__.py', 'w') as initfile:
initfile.write(' ')
with open(pydantic_path / "pydantic" / "__init__.py", "w") as initfile:
initfile.write(" ")
with open(pydantic_path / '__init__.py', 'w') as initfile:
initfile.write(f'from .pydantic.core.{latest_version.name}.namespace import *')
with open(pydantic_path / "__init__.py", "w") as initfile:
initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")
finally:
if len(failed_versions) > 0:
print('Failed Building Versions:')
print("Failed Building Versions:")
print(failed_versions)
def parser() -> ArgumentParser:
parser = ArgumentParser('Generate all available versions of NWB core schema')
parser = ArgumentParser("Generate all available versions of NWB core schema")
parser.add_argument(
'--yaml',
"--yaml",
help="directory to export linkML schema to",
type=Path,
default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'schema'
default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "schema",
)
parser.add_argument(
'--pydantic',
"--pydantic",
help="directory to export pydantic models",
type=Path,
default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models'
default=Path(__file__).parent.parent / "nwb_linkml" / "src" / "nwb_linkml" / "models",
)
parser.add_argument("--hdmf", help="Only generate the HDMF namespaces", action="store_true")
parser.add_argument(
'--hdmf',
help="Only generate the HDMF namespaces",
action="store_true"
)
parser.add_argument(
'--latest',
"--latest",
help="Only generate the latest version of the core schemas.",
action="store_true"
action="store_true",
)
parser.add_argument(
'--dry-run',
help="Generate schema and pydantic models without moving them into the target directories, for testing purposes",
action='store_true'
"--dry-run",
help=(
"Generate schema and pydantic models without moving them into the target directories,"
" for testing purposes"
),
action="store_true",
)
return parser
@ -212,12 +225,6 @@ def main():
else:
generate_versions(args.yaml, args.pydantic, args.dry_run, repo, args.hdmf)
if __name__ == "__main__":
main()