v0.2.0 of nwb_schema_language - parentization

This commit is contained in:
sneakers-the-rat 2024-09-12 22:40:14 -07:00
parent f94a144d75
commit 880352d9a4
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
15 changed files with 893 additions and 114 deletions

View file

@ -46,6 +46,10 @@ jobs:
run: pytest
working-directory: nwb_linkml
- name: Run nwb_schema_language Tests
run: pytest
working-directory: nwb_schema_language
- name: Coveralls Parallel
uses: coverallsapp/github-action@v2.3.0
if: runner.os != 'macOS'

View file

@ -12,7 +12,7 @@ dependencies = [
"nwb-models>=0.2.0",
"pyyaml>=6.0",
"linkml-runtime>=1.7.7",
"nwb-schema-language>=0.1.3",
"nwb-schema-language>=0.2.0",
"rich>=13.5.2",
#"linkml>=1.7.10",
"linkml @ git+https://github.com/sneakers-the-rat/linkml@nwb-linkml",

View file

@ -170,6 +170,10 @@ class Adapter(BaseModel):
# so skip to avoid combinatoric walking
if key == "imports" and type(input).__name__ == "SchemaAdapter":
continue
# nwb_schema_language objects have a reference to their parent,
# which causes cycles
if key == "parent":
continue
val = getattr(input, key)
yield (key, val)
if isinstance(val, (BaseModel, dict, list)):

View file

@ -29,7 +29,7 @@ class GroupAdapter(ClassAdapter):
"""
# Handle container groups with only * quantity unnamed groups
if (
len(self.cls.groups) > 0
self.cls.groups
and not self.cls.links
and all([self._check_if_container(g) for g in self.cls.groups])
): # and \
@ -38,8 +38,8 @@ class GroupAdapter(ClassAdapter):
# handle if we are a terminal container group without making a new class
if (
len(self.cls.groups) == 0
and len(self.cls.datasets) == 0
not self.cls.groups
and not self.cls.datasets
and self.cls.neurodata_type_inc is not None
and self.parent is not None
):
@ -177,15 +177,17 @@ class GroupAdapter(ClassAdapter):
# Datasets are simple, they are terminal classes, and all logic
# for creating slots vs. classes is handled by the adapter class
dataset_res = BuildResult()
for dset in self.cls.datasets:
dset_adapter = DatasetAdapter(cls=dset, parent=self)
dataset_res += dset_adapter.build()
if self.cls.datasets:
for dset in self.cls.datasets:
dset_adapter = DatasetAdapter(cls=dset, parent=self)
dataset_res += dset_adapter.build()
group_res = BuildResult()
for group in self.cls.groups:
group_adapter = GroupAdapter(cls=group, parent=self)
group_res += group_adapter.build()
if self.cls.groups:
for group in self.cls.groups:
group_adapter = GroupAdapter(cls=group, parent=self)
group_res += group_adapter.build()
res = dataset_res + group_res

View file

@ -9,11 +9,12 @@ import contextlib
from copy import copy
from pathlib import Path
from pprint import pformat
from typing import Dict, List, Optional
from typing import Dict, Generator, List, Optional
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.linkml_model import Annotation, SchemaDefinition
from pydantic import Field, model_validator
import networkx as nx
from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.schema import SchemaAdapter
@ -31,6 +32,9 @@ class NamespacesAdapter(Adapter):
schemas: List[SchemaAdapter]
imported: List["NamespacesAdapter"] = Field(default_factory=list)
_completed: bool = False
"""whether we have run the :meth:`.complete_namespace` method"""
@classmethod
def from_yaml(cls, path: Path) -> "NamespacesAdapter":
"""
@ -65,7 +69,7 @@ class NamespacesAdapter(Adapter):
needed_adapter = NamespacesAdapter.from_yaml(needed_source_ns)
ns_adapter.imported.append(needed_adapter)
ns_adapter.populate_imports()
ns_adapter.complete_namespaces()
return ns_adapter
@ -76,6 +80,9 @@ class NamespacesAdapter(Adapter):
Build the NWB namespace to the LinkML Schema
"""
if not self._completed:
self.complete_namespaces()
sch_result = BuildResult()
for sch in self.schemas:
if progress is not None:
@ -149,6 +156,50 @@ class NamespacesAdapter(Adapter):
break
return self
def complete_namespaces(self):
"""
After loading the namespace, and after any imports have been added afterwards,
this must be called to complete the definitions of the contained schema objects.
This is not automatic because NWB doesn't have a formal dependency resolution system,
so it is often impossible to know which imports are needed until after the namespace
adapter has been instantiated.
It **is** automatically called if it hasn't been already by the :meth:`.build` method.
"""
self.populate_imports()
self._roll_down_inheritance()
for i in self.imported:
i.complete_namespaces()
self._completed = True
def _roll_down_inheritance(self):
"""
nwb-schema-language inheritance doesn't work like normal python inheritance -
instead of inheriting everything at the 'top level' of a class, it also
recursively merges all properties from the parent objects.
References:
https://github.com/NeurodataWithoutBorders/pynwb/issues/1954
"""
pass
def inheritance_graph(self) -> nx.DiGraph:
"""
Make a graph of all ``neurodata_types`` in the namespace and imports such that
each node contains the group or dataset it describes,
and has directed edges pointing at all the classes that inherit from it.
In the case that the inheriting class does not itself have a ``neurodata_type_def``,
it is
"""
g = nx.DiGraph()
for sch in self.all_schemas():
for cls in sch.created_classes:
pass
def find_type_source(self, name: str) -> SchemaAdapter:
"""
Given some neurodata_type_inc, find the schema that it's defined in.
@ -279,3 +330,13 @@ class NamespacesAdapter(Adapter):
if name in sources:
return ns.name
return None
def all_schemas(self) -> Generator[SchemaAdapter, None, None]:
"""
Iterator over all schemas including imports
"""
for sch in self.schemas:
yield sch
for imported in self.imported:
for sch in imported:
yield sch

View file

@ -1,6 +1,7 @@
import pytest
from nwb_linkml.adapters import SchemaAdapter
from pathlib import Path
from nwb_linkml.adapters import NamespacesAdapter, SchemaAdapter
from nwb_schema_language import Attribute, Group, Namespace, Dataset, Namespaces, Schema, FlatDtype
@pytest.mark.parametrize(
@ -48,8 +49,7 @@ def test_skip_imports(nwb_core_fixture):
assert all([ns == "core" for ns in namespaces])
@pytest.mark.skip()
def test_populate_inheritance(nwb_core_fixture):
def test_roll_down_inheritance():
"""
Classes should receive and override the properties of their parents
when they have neurodata_type_inc
@ -59,4 +59,74 @@ def test_populate_inheritance(nwb_core_fixture):
Returns:
"""
pass
parent_cls = Group(
neurodata_type_def="Parent",
doc="parent",
attributes=[
Attribute(name="a", dims=["a", "b"], shape=[1, 2], doc="a", value="a"),
Attribute(name="b", dims=["c", "d"], shape=[3, 4], doc="b", value="b"),
],
datasets=[
Dataset(
name="data",
dims=["a", "b"],
shape=[1, 2],
doc="data",
attributes=[
Attribute(name="c", dtype=FlatDtype.int32, doc="c"),
Attribute(name="d", dtype=FlatDtype.int32, doc="d"),
],
)
],
)
parent_sch = Schema(source="parent.yaml")
parent_ns = Namespaces(
namespaces=[
Namespace(
author="hey",
contact="sup",
name="parent",
doc="a parent",
version="1",
schema=[parent_sch],
)
]
)
child_cls = Group(
neurodata_type_def="Child",
neurodata_type_inc="Parent",
doc="child",
attributes=[Attribute(name="a", doc="a")],
datasets=[
Dataset(
name="data",
doc="data again",
attributes=[Attribute(name="a", doc="c", value="z"), Attribute(name="c", doc="c")],
)
],
)
child_sch = Schema(source="child.yaml")
child_ns = Namespaces(
namespaces=[
Namespace(
author="hey",
contact="sup",
name="child",
doc="a child",
version="1",
schema=[child_sch, Schema(namespace="parent")],
)
]
)
parent_schema_adapter = SchemaAdapter(path=Path("parent.yaml"), groups=[parent_cls])
parent_ns_adapter = NamespacesAdapter(namespaces=parent_ns, schemas=[parent_schema_adapter])
child_schema_adapter = SchemaAdapter(path=Path("child.yaml"), groups=[child_cls])
child_ns_adapter = NamespacesAdapter(
namespaces=child_ns, schemas=[child_schema_adapter], imported=[parent_ns_adapter]
)
child_ns_adapter.complete_namespaces()
child = child_ns_adapter.get("Child")

View file

@ -6,7 +6,7 @@ SHELL := bash
.SUFFIXES:
.SECONDARY:
RUN = poetry run
RUN = pdm run
# get values from about.yaml file
SCHEMA_NAME = $(shell ${SHELL} ./utils/get-value.sh name)
SOURCE_SCHEMA_PATH = $(shell ${SHELL} ./utils/get-value.sh source_schema_path)
@ -107,7 +107,7 @@ gen-project: $(PYMODEL)
$(RUN) gen-project ${GEN_PARGS} -d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL)
gen-pydantic: $(PYMODEL)
$(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 2 > $(PYMODEL)/nwb_schema_pydantic.py
$(RUN) generate_pydantic
$(RUN) run_patches --phase post_generation_pydantic
test: test-schema test-python test-examples

View file

@ -9,7 +9,7 @@ dependencies = [
"linkml-runtime>=1.7.7",
"pydantic>=2.3.0",
]
version = "0.1.3"
version = "0.2.0"
description = "Translation of the nwb-schema-language to LinkML"
readme = "README.md"
@ -20,6 +20,7 @@ documentation = "https://nwb-linkml.readthedocs.io"
[project.scripts]
run_patches = "nwb_schema_language.patches:main"
generate_pydantic = "nwb_schema_language.generator:generate"
[tool.pdm]
[tool.pdm.dev-dependencies]

View file

@ -22,10 +22,10 @@ try:
DTypeType = Union[List[CompoundDtype], FlatDtype, ReferenceDtype]
except (NameError, RecursionError):
except (NameError, RecursionError) as e:
warnings.warn(
"Error importing pydantic classes, passing because we might be in the process of patching"
" them, but it is likely they are broken and you will be unable to use them!",
f" them, but it is likely they are broken and you will be unable to use them!\n{e}",
stacklevel=1,
)

View file

@ -1,14 +1,13 @@
from __future__ import annotations
from datetime import datetime, date
from enum import Enum
from typing import List, Dict, Optional, Any, Union
from pydantic import BaseModel as BaseModel, Field
import sys
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
import re
import sys
from datetime import date, datetime, time
from decimal import Decimal
from enum import Enum
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator, model_validator
metamodel_version = "None"
@ -16,11 +15,81 @@ version = "None"
class ConfiguredBaseModel(BaseModel):
model_config = ConfigDict(
validate_assignment=False,
validate_default=True,
extra="forbid",
arbitrary_types_allowed=True,
use_enum_values=True,
strict=False,
)
pass
class ReftypeOptions(str, Enum):
class LinkMLMeta(RootModel):
root: Dict[str, Any] = {}
model_config = ConfigDict(frozen=True)
def __getattr__(self, key: str):
return getattr(self.root, key)
def __getitem__(self, key: str):
return self.root[key]
def __setitem__(self, key: str, value):
self.root[key] = value
def __contains__(self, key: str) -> bool:
return key in self.root
class ParentizeMixin(BaseModel):
@model_validator(mode="after")
def parentize(self):
"""Set the parent attribute for all our fields they have one"""
for field_name in self.model_fields:
if field_name == "parent":
continue
field = getattr(self, field_name)
if not isinstance(field, list):
field = [field]
for item in field:
if hasattr(item, "parent"):
item.parent = self
return self
linkml_meta = LinkMLMeta(
{
"default_prefix": "nwb_schema_language",
"default_range": "string",
"description": "Translation of the nwb-schema-language to LinkML",
"id": "https://w3id.org/p2p_ld/nwb-schema-language",
"imports": ["linkml:types"],
"license": "GNU GPL v3.0",
"name": "nwb-schema-language",
"prefixes": {
"linkml": {"prefix_prefix": "linkml", "prefix_reference": "https://w3id.org/linkml/"},
"nwb_schema_language": {
"prefix_prefix": "nwb_schema_language",
"prefix_reference": "https://w3id.org/p2p_ld/nwb-schema-language/",
},
"schema": {"prefix_prefix": "schema", "prefix_reference": "http://schema.org/"},
},
"see_also": ["https://p2p_ld.github.io/nwb-schema-language"],
"settings": {
"email": {"setting_key": "email", "setting_value": "\\S+@\\S+{\\.\\w}+"},
"protected_string": {"setting_key": "protected_string", "setting_value": "^[A-Za-z_][A-Za-z0-9_]*$"},
},
"source_file": "/Users/jonny/git/p2p-ld/nwb-linkml/nwb_schema_language/src/nwb_schema_language/schema/nwb_schema_language.yaml",
"title": "nwb-schema-language",
}
)
class ReftypeOptions(str, Enum):
# Reference to another group or dataset of the given target_type
ref = "ref"
# Reference to another group or dataset of the given target_type
@ -32,7 +101,6 @@ class ReftypeOptions(str, Enum):
class QuantityEnum(str, Enum):
# Zero or more instances, equivalent to zero_or_many
ASTERISK = "*"
# Zero or one instances, equivalent to zero_or_one
@ -48,7 +116,6 @@ class QuantityEnum(str, Enum):
class FlatDtype(str, Enum):
# single precision floating point (32 bit)
float = "float"
# single precision floating point (32 bit)
@ -100,164 +167,642 @@ class FlatDtype(str, Enum):
class Namespace(ConfiguredBaseModel):
doc: str = Field(..., description="""Description of corresponding object.""")
name: str = Field(...)
full_name: Optional[str] = Field(
None, description="""Optional string with extended full name for the namespace."""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slot_usage": {"name": {"name": "name", "required": True}},
}
)
version: str = Field(...)
doc: str = Field(
...,
description="""Description of corresponding object.""",
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
name: str = Field(
...,
json_schema_extra={
"linkml_meta": {
"alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
full_name: Optional[str] = Field(
None,
description="""Optional string with extended full name for the namespace.""",
json_schema_extra={"linkml_meta": {"alias": "full_name", "domain_of": ["Namespace"]}},
)
version: str = Field(..., json_schema_extra={"linkml_meta": {"alias": "version", "domain_of": ["Namespace"]}})
date: Optional[datetime] = Field(
None, description="""Date that a namespace was last modified or released"""
None,
description="""Date that a namespace was last modified or released""",
json_schema_extra={
"linkml_meta": {
"alias": "date",
"domain_of": ["Namespace"],
"examples": [{"value": "2017-04-25 17:14:13"}],
"slot_uri": "schema:dateModified",
}
},
)
author: List[str] | str = Field(
default_factory=list,
...,
description="""List of strings with the names of the authors of the namespace.""",
json_schema_extra={"linkml_meta": {"alias": "author", "domain_of": ["Namespace"], "slot_uri": "schema:author"}},
)
contact: List[str] | str = Field(
default_factory=list,
...,
description="""List of strings with the contact information for the authors. Ordering of the contacts should match the ordering of the authors.""",
json_schema_extra={
"linkml_meta": {
"alias": "contact",
"domain_of": ["Namespace"],
"slot_uri": "schema:email",
"structured_pattern": {"interpolated": True, "syntax": "{email}"},
}
},
)
schema_: Optional[List[Schema]] = Field(
None,
alias="schema",
default_factory=list,
description="""List of the schema to be included in this namespace.""",
json_schema_extra={"linkml_meta": {"alias": "schema_", "domain_of": ["Namespace"]}},
)
class Namespaces(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
namespaces: Optional[List[Namespace]] = Field(default_factory=list)
namespaces: Optional[List[Namespace]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "namespaces", "domain_of": ["Namespaces"]}}
)
class Schema(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"rules": [
{
"description": "If namespace is absent, source is required",
"postconditions": {"slot_conditions": {"source": {"name": "source", "required": True}}},
"preconditions": {
"slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}}
},
},
{
"description": "If source is absent, namespace is required.",
"postconditions": {"slot_conditions": {"namespace": {"name": "namespace", "required": True}}},
"preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}},
},
{
"description": "If namespace is present, source is cannot be",
"postconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "ABSENT"}}},
"preconditions": {
"slot_conditions": {"namespace": {"name": "namespace", "value_presence": "PRESENT"}}
},
},
{
"description": "If source is present, namespace cannot be.",
"postconditions": {
"slot_conditions": {"namespace": {"name": "namespace", "value_presence": "ABSENT"}}
},
"preconditions": {"slot_conditions": {"source": {"name": "source", "value_presence": "PRESENT"}}},
},
],
}
)
source: Optional[str] = Field(
None,
description="""describes the name of the YAML (or JSON) file with the schema specification. The schema files should be located in the same folder as the namespace file.""",
json_schema_extra={"linkml_meta": {"alias": "source", "domain_of": ["Schema"]}},
)
namespace: Optional[str] = Field(
None,
description="""describes a named reference to another namespace. In contrast to source, this is a reference by name to a known namespace (i.e., the namespace is resolved during the build and must point to an already existing namespace). This mechanism is used to allow, e.g., extension of a core namespace (here the NWB core namespace) without requiring hard paths to the files describing the core namespace. Either source or namespace must be specified, but not both.""",
json_schema_extra={"linkml_meta": {"alias": "namespace", "domain_of": ["Schema"]}},
)
title: Optional[str] = Field(
None, description="""a descriptive title for a file for documentation purposes."""
None,
description="""a descriptive title for a file for documentation purposes.""",
json_schema_extra={"linkml_meta": {"alias": "title", "domain_of": ["Schema"]}},
)
neurodata_types: Optional[List[Union[Dataset, Group]]] = Field(
default_factory=list,
None,
description="""an optional list of strings indicating which data types should be included from the given specification source or namespace. The default is null indicating that all data types should be included.""",
json_schema_extra={
"linkml_meta": {
"alias": "neurodata_types",
"any_of": [{"range": "Dataset"}, {"range": "Group"}],
"domain_of": ["Schema"],
}
},
)
doc: Optional[str] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
doc: Optional[str] = Field(None)
class Group(ConfiguredBaseModel):
class Group(ConfiguredBaseModel, ParentizeMixin):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
neurodata_type_def: Optional[str] = Field(
None,
description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""",
json_schema_extra={
"linkml_meta": {
"alias": "neurodata_type_def",
"domain_of": ["Group", "Dataset"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
neurodata_type_inc: Optional[str] = Field(
None,
description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""",
json_schema_extra={
"linkml_meta": {
"alias": "neurodata_type_inc",
"domain_of": ["Group", "Dataset"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
name: Optional[str] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
default_name: Optional[str] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "default_name",
"domain_of": ["Group", "Dataset"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
doc: str = Field(
...,
description="""Description of corresponding object.""",
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
quantity: Optional[Union[QuantityEnum, int]] = Field(
"1",
json_schema_extra={
"linkml_meta": {
"alias": "quantity",
"any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
"domain_of": ["Group", "Link", "Dataset"],
"ifabsent": "int(1)",
"todos": ["logic to check that the corresponding class can only be " "implemented quantity times."],
}
},
)
linkable: Optional[bool] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}
)
attributes: Optional[List[Attribute]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}}
)
datasets: Optional[List[Dataset]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}}
)
groups: Optional[List[Group]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}
)
links: Optional[List[Link]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "links", "domain_of": ["Group"]}}
)
parent: Optional[Group] = Field(
None,
exclude=True,
description="""The parent group that contains this dataset or group""",
json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}},
)
name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None)
doc: str = Field(..., description="""Description of corresponding object.""")
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(default_factory=list)
datasets: Optional[List[Dataset]] = Field(default_factory=list)
groups: Optional[List[Group]] = Field(default_factory=list)
links: Optional[List[Link]] = Field(default_factory=list)
class Groups(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
groups: Optional[List[Group]] = Field(default_factory=list)
groups: Optional[List[Group]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "groups", "domain_of": ["Group", "Groups"]}}
)
class Link(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
name: Optional[str] = Field(None)
doc: str = Field(..., description="""Description of corresponding object.""")
name: Optional[str] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
doc: str = Field(
...,
description="""Description of corresponding object.""",
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
target_type: str = Field(
...,
description="""Describes the neurodata_type of the target that the reference points to""",
json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}},
)
quantity: Optional[Union[QuantityEnum, int]] = Field(
"1",
json_schema_extra={
"linkml_meta": {
"alias": "quantity",
"any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
"domain_of": ["Group", "Link", "Dataset"],
"ifabsent": "int(1)",
"todos": ["logic to check that the corresponding class can only be " "implemented quantity times."],
}
},
)
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
class Datasets(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
datasets: Optional[List[Dataset]] = Field(default_factory=list)
datasets: Optional[List[Dataset]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "datasets", "domain_of": ["Group", "Datasets"]}}
)
class ReferenceDtype(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language"})
target_type: str = Field(
...,
description="""Describes the neurodata_type of the target that the reference points to""",
json_schema_extra={"linkml_meta": {"alias": "target_type", "domain_of": ["Link", "ReferenceDtype"]}},
)
reftype: Optional[ReftypeOptions] = Field(
None, description="""describes the kind of reference"""
None,
description="""describes the kind of reference""",
json_schema_extra={"linkml_meta": {"alias": "reftype", "domain_of": ["ReferenceDtype"]}},
)
class CompoundDtype(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slot_usage": {
"dtype": {
"any_of": [{"range": "ReferenceDtype"}, {"range": "FlatDtype"}],
"multivalued": False,
"name": "dtype",
"required": True,
},
"name": {"name": "name", "required": True},
},
}
)
name: str = Field(...)
doc: str = Field(..., description="""Description of corresponding object.""")
dtype: Union[FlatDtype, ReferenceDtype] = Field(...)
name: str = Field(
...,
json_schema_extra={
"linkml_meta": {
"alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
doc: str = Field(
...,
description="""Description of corresponding object.""",
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
dtype: Union[FlatDtype, ReferenceDtype] = Field(
...,
json_schema_extra={
"linkml_meta": {
"alias": "dtype",
"any_of": [{"range": "ReferenceDtype"}, {"range": "FlatDtype"}],
"domain_of": ["CompoundDtype", "DtypeMixin"],
}
},
)
class DtypeMixin(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"mixin": True,
"rules": [
{
"postconditions": {"slot_conditions": {"dtype": {"multivalued": False, "name": "dtype"}}},
"preconditions": {"slot_conditions": {"dtype": {"name": "dtype", "range": "FlatDtype"}}},
}
],
}
)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(
default_factory=list
None,
json_schema_extra={
"linkml_meta": {
"alias": "dtype",
"any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}],
"domain_of": ["CompoundDtype", "DtypeMixin"],
}
},
)
class Attribute(DtypeMixin):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"mixins": ["DtypeMixin"],
"slot_usage": {
"name": {"name": "name", "required": True},
"parent": {"any_of": [{"range": "Group"}, {"range": "Dataset"}], "name": "parent"},
},
}
)
name: str = Field(...)
dims: Optional[List[Union[Any, str]]] = Field(None)
shape: Optional[List[Union[Any, int, str]]] = Field(None)
name: str = Field(
...,
json_schema_extra={
"linkml_meta": {
"alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
dims: Optional[List[Union[Any, str]]] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "dims",
"any_of": [{"range": "string"}, {"range": "AnyType"}],
"domain_of": ["Attribute", "Dataset"],
"todos": [
"Can't quite figure out how to allow an array of arrays - see "
"https://github.com/linkml/linkml/issues/895"
],
}
},
)
shape: Optional[List[Union[Any, int, str]]] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "shape",
"any_of": [
{"minimum_value": 1, "range": "integer"},
{"equals_string": "null", "range": "string"},
{"range": "AnyType"},
],
"domain_of": ["Attribute", "Dataset"],
"todos": [
"Can't quite figure out how to allow an array of arrays - see "
"https://github.com/linkml/linkml/issues/895"
],
}
},
)
value: Optional[Any] = Field(
None, description="""Optional constant, fixed value for the attribute."""
None,
description="""Optional constant, fixed value for the attribute.""",
json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}},
)
default_value: Optional[Any] = Field(
None, description="""Optional default value for variable-valued attributes."""
None,
description="""Optional default value for variable-valued attributes.""",
json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}},
)
doc: str = Field(
...,
description="""Description of corresponding object.""",
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
doc: str = Field(..., description="""Description of corresponding object.""")
required: Optional[bool] = Field(
True,
description="""Optional boolean key describing whether the attribute is required. Default value is True.""",
json_schema_extra={"linkml_meta": {"alias": "required", "domain_of": ["Attribute"], "ifabsent": "true"}},
)
parent: Optional[Union[Dataset, Group]] = Field(
None,
exclude=True,
description="""The parent group that contains this dataset or group""",
json_schema_extra={
"linkml_meta": {
"alias": "parent",
"any_of": [{"range": "Group"}, {"range": "Dataset"}],
"domain_of": ["Group", "Attribute", "Dataset"],
}
},
)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "dtype",
"any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}],
"domain_of": ["CompoundDtype", "DtypeMixin"],
}
},
)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(None)
class Dataset(DtypeMixin):
class Dataset(ConfiguredBaseModel, ParentizeMixin):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta(
{"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language", "mixins": ["DtypeMixin"]}
)
neurodata_type_def: Optional[str] = Field(
None,
description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""",
json_schema_extra={
"linkml_meta": {
"alias": "neurodata_type_def",
"domain_of": ["Group", "Dataset"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
neurodata_type_inc: Optional[str] = Field(
None,
description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""",
json_schema_extra={
"linkml_meta": {
"alias": "neurodata_type_inc",
"domain_of": ["Group", "Dataset"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
name: Optional[str] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "name",
"domain_of": ["Namespace", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
default_name: Optional[str] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "default_name",
"domain_of": ["Group", "Dataset"],
"structured_pattern": {"interpolated": True, "syntax": "{protected_string}"},
}
},
)
dims: Optional[List[Union[Any, str]]] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "dims",
"any_of": [{"range": "string"}, {"range": "AnyType"}],
"domain_of": ["Attribute", "Dataset"],
"todos": [
"Can't quite figure out how to allow an array of arrays - see "
"https://github.com/linkml/linkml/issues/895"
],
}
},
)
shape: Optional[List[Union[Any, int, str]]] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "shape",
"any_of": [
{"minimum_value": 1, "range": "integer"},
{"equals_string": "null", "range": "string"},
{"range": "AnyType"},
],
"domain_of": ["Attribute", "Dataset"],
"todos": [
"Can't quite figure out how to allow an array of arrays - see "
"https://github.com/linkml/linkml/issues/895"
],
}
},
)
name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None)
dims: Optional[List[Union[Any, str]]] = Field(None)
shape: Optional[List[Union[Any, int, str]]] = Field(None)
value: Optional[Any] = Field(
None, description="""Optional constant, fixed value for the attribute."""
None,
description="""Optional constant, fixed value for the attribute.""",
json_schema_extra={"linkml_meta": {"alias": "value", "domain_of": ["Attribute", "Dataset"]}},
)
default_value: Optional[Any] = Field(
None, description="""Optional default value for variable-valued attributes."""
None,
description="""Optional default value for variable-valued attributes.""",
json_schema_extra={"linkml_meta": {"alias": "default_value", "domain_of": ["Attribute", "Dataset"]}},
)
doc: str = Field(..., description="""Description of corresponding object.""")
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(None)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(None)
doc: str = Field(
...,
description="""Description of corresponding object.""",
json_schema_extra={
"linkml_meta": {
"alias": "doc",
"domain_of": ["Namespace", "Schema", "Group", "Attribute", "Link", "Dataset", "CompoundDtype"],
}
},
)
quantity: Optional[Union[QuantityEnum, int]] = Field(
"1",
json_schema_extra={
"linkml_meta": {
"alias": "quantity",
"any_of": [{"minimum_value": 1, "range": "integer"}, {"range": "QuantityEnum"}],
"domain_of": ["Group", "Link", "Dataset"],
"ifabsent": "int(1)",
"todos": ["logic to check that the corresponding class can only be " "implemented quantity times."],
}
},
)
linkable: Optional[bool] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "linkable", "domain_of": ["Group", "Dataset"]}}
)
attributes: Optional[List[Attribute]] = Field(
None, json_schema_extra={"linkml_meta": {"alias": "attributes", "domain_of": ["Group", "Dataset"]}}
)
parent: Optional[Group] = Field(
None,
exclude=True,
description="""The parent group that contains this dataset or group""",
json_schema_extra={"linkml_meta": {"alias": "parent", "domain_of": ["Group", "Attribute", "Dataset"]}},
)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(
None,
json_schema_extra={
"linkml_meta": {
"alias": "dtype",
"any_of": [{"range": "FlatDtype"}, {"range": "CompoundDtype"}, {"range": "ReferenceDtype"}],
"domain_of": ["CompoundDtype", "DtypeMixin"],
}
},
)
# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
Namespace.model_rebuild()
Namespaces.model_rebuild()
Schema.model_rebuild()
Group.model_rebuild()
Groups.model_rebuild()
Link.model_rebuild()
Datasets.model_rebuild()
ReferenceDtype.model_rebuild()
CompoundDtype.model_rebuild()
DtypeMixin.model_rebuild()
Attribute.model_rebuild()
Dataset.model_rebuild()

View file

@ -0,0 +1,52 @@
from pathlib import Path
from dataclasses import dataclass
from linkml.generators.pydanticgen import PydanticGenerator
from linkml.generators.pydanticgen.build import ClassResult
from linkml.generators.pydanticgen.template import Import, ObjectImport
from linkml_runtime import SchemaView
from pydantic import BaseModel, model_validator
class ParentizeMixin(BaseModel):
@model_validator(mode="after")
def parentize(self):
"""Set the parent attribute for all our fields they have one"""
for field_name in self.model_fields:
if field_name == "parent":
continue
field = getattr(self, field_name)
if not isinstance(field, list):
field = [field]
for item in field:
if hasattr(item, "parent"):
item.parent = self
return self
@dataclass
class NWBSchemaLangGenerator(PydanticGenerator):
def __init__(self, *args, **kwargs):
kwargs["injected_classes"] = [ParentizeMixin]
kwargs["imports"] = [
Import(module="pydantic", objects=[ObjectImport(name="model_validator")])
]
kwargs["black"] = True
super().__init__(*args, **kwargs)
def after_generate_class(self, cls: ClassResult, sv: SchemaView) -> ClassResult:
if cls.cls.name in ("Dataset", "Group"):
cls.cls.bases = ["ConfiguredBaseModel", "ParentizeMixin"]
return cls
def generate():
schema = Path(__file__).parent / "schema" / "nwb_schema_language.yaml"
output = Path(__file__).parent / "datamodel" / "nwb_schema_pydantic.py"
generator = NWBSchemaLangGenerator(schema=schema)
generated = generator.serialize()
with open(output, "w") as ofile:
ofile.write(generated)

View file

@ -49,8 +49,15 @@ class Patch:
patch_schema_slot = Patch(
phase=Phases.post_generation_pydantic,
path=Path("src/nwb_schema_language/datamodel/nwb_schema_pydantic.py"),
match=r"\n\s*(schema:)(.*Field\()(.*)",
replacement=r'\n schema_:\2alias="schema", \3',
match=r"\n\s*(schema:)(.*Field\(\n\s*None,\n)(.*)",
replacement=r'\n schema_:\2 alias="schema",\n\3',
)
patch_schema_slot_no_newline = Patch(
phase=Phases.post_generation_pydantic,
path=Path("src/nwb_schema_language/datamodel/nwb_schema_pydantic.py"),
match=r"\n\s*(schema:)(.*Field\(None,)(.*)",
replacement=r'\n schema_:\2 alias="schema", \3',
)
patch_dtype_single_multiple = Patch(
@ -74,6 +81,20 @@ patch_contact_single_multiple = Patch(
replacement="contact: List[str] | str",
)
patch_validate_assignment = Patch(
phase=Phases.post_generation_pydantic,
path=Path("src/nwb_schema_language/datamodel/nwb_schema_pydantic.py"),
match=r"validate_assignment=True",
replacement="validate_assignment=False",
)
patch_exclude_parent = Patch(
phase=Phases.post_generation_pydantic,
path=Path("src/nwb_schema_language/datamodel/nwb_schema_pydantic.py"),
match=r"(parent:.*Field\(\n\s*None,\n)(.*)",
replacement=r"\1 exclude=True,\n\2",
)
def run_patches(phase: Phases, verbose: bool = False) -> None:
"""

View file

@ -78,6 +78,7 @@ classes:
- datasets
- groups
- links
- parent
Groups:
slots:
@ -94,9 +95,14 @@ classes:
- default_value
- doc
- required
- parent
slot_usage:
name:
required: true
parent:
any_of:
- range: Group
- range: Dataset
Link:
slots:
@ -121,6 +127,7 @@ classes:
- quantity
- linkable
- attributes
- parent
Datasets:
slots:
@ -177,7 +184,7 @@ slots:
description: Optional string with extended full name for the namespace.
version:
required: true
pattern: "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$"
# pattern: "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$"
date:
range: datetime
slot_uri: schema:dateModified
@ -207,7 +214,6 @@ slots:
# schema
source:
description: describes the name of the YAML (or JSON) file with the schema specification. The schema files should be located in the same folder as the namespace file.
pattern: ".*\\.(yml|yaml|json)"
namespace:
description: describes a named reference to another namespace. In contrast to source, this is a reference by name to a known namespace (i.e., the namespace is resolved during the build and must point to an already existing namespace). This mechanism is used to allow, e.g., extension of a core namespace (here the NWB core namespace) without requiring hard paths to the files describing the core namespace. Either source or namespace must be specified, but not both.
namespaces:
@ -312,6 +318,11 @@ slots:
description: describes the kind of reference
range: reftype_options
# extra - not defined in nwb-schema-language but useful when working with class objects
parent:
description: The parent group that contains this dataset or group
range: Group
required: false
enums:

View file

@ -1,23 +0,0 @@
"""Data test."""
import os
import glob
import unittest
from linkml_runtime.loaders import yaml_loader
from nwb_schema_language.datamodel.nwb_schema_language import Namespaces
ROOT = os.path.join(os.path.dirname(__file__), "..")
DATA_DIR = os.path.join(ROOT, "src", "data", "tests")
EXAMPLE_FILES = glob.glob(os.path.join(DATA_DIR, "*.yaml"))
class TestData(unittest.TestCase):
"""Test data and datamodel."""
def test_namespaces(self):
"""Date test."""
namespace_file = [f for f in EXAMPLE_FILES if "namespace.yaml" in f][0]
obj = yaml_loader.load(namespace_file, target_class=Namespaces)
assert obj

View file

@ -0,0 +1,31 @@
from nwb_schema_language import Group, Dataset, Attribute
def test_parentize_mixin():
"""
the parentize mixin should populate the "parent" attribute for applicable children
"""
dset_attr = Attribute(name="dset_attr", doc="")
dset = Dataset(
name="dataset", doc="", attributes=[dset_attr, {"name": "dict_based_attr", "doc": ""}]
)
group_attr = Attribute(name="group_attr", doc="")
group = Group(
name="group",
doc="",
attributes=[group_attr, {"name": "dict_based_attr", "doc": ""}],
datasets=[dset, {"name": "dict_based_dset", "doc": ""}],
)
assert dset_attr.parent is dset
assert dset.attributes[1].name == "dict_based_attr"
assert dset.attributes[1].parent is dset
assert dset.parent is group
assert group_attr.parent is group
assert group.attributes[1].name == "dict_based_attr"
assert group.attributes[1].parent is group
assert group.datasets[1].name == "dict_based_dset"
assert group.datasets[1].parent is group
dumped = group.model_dump()
assert "parent" not in dumped