mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-09 13:44:27 +00:00
recursion works, more formal build method in adapters
This commit is contained in:
parent
170a424fb1
commit
a4806543ef
13 changed files with 321 additions and 28 deletions
|
@ -24,4 +24,20 @@ Steps:
|
|||
- new files: files not in the source domain
|
||||
- enum classes
|
||||
|
||||
- Rename items
|
||||
- Rename items
|
||||
|
||||
|
||||
## Translation choices
|
||||
|
||||
We aren't doing a 1:1 translation of NWB! The goal is to make something that is *import*
|
||||
backwards-compatible - ie. we can read NWB traditional nwb files - but not necessarily
|
||||
*export* for now. We will get to that eventually. NWB as it is now is highly tied to hdf5
|
||||
in multiple places - from the hdmf-common namespace to the nwb file classes,
|
||||
we want to instead abstract the structure of NWB so the schema can be used
|
||||
as a programming element (ie. labs can write their own schema extensions in yaml,
|
||||
generate pydantic modules for them, and they should Just Work TM) with various different
|
||||
storage backends.
|
||||
|
||||
- Don't try and emulate the nwb.file schema - it is basically a file layout that indicates
|
||||
what should go where. We are moving I/O out of the schema: storage layout is at a different level than the schema
|
||||
- Don't worry about most of hdmf-common: instead create sensible generics that can be implemented in different ways by different storage mediums
|
|
@ -1,11 +1,58 @@
|
|||
"""
|
||||
Base class for adapters
|
||||
"""
|
||||
from typing import List, Dict, Type, Generator, Any, Tuple
|
||||
from pydantic import BaseModel
|
||||
from abc import abstractmethod
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Type, Generator, Any, Tuple, Optional
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from linkml_runtime.linkml_model import Element, SchemaDefinition, ClassDefinition, SlotDefinition, TypeDefinition
|
||||
|
||||
# SchemaDefClass = dataclass(SchemaDefinition).__pydantic_model__
|
||||
|
||||
@dataclass
|
||||
class BuildResult:
|
||||
# pass
|
||||
schemas: List[SchemaDefinition] = field(default_factory=list)
|
||||
classes: List[ClassDefinition] = field(default_factory=list)
|
||||
slots: List[SlotDefinition] = field(default_factory=list)
|
||||
types: List[TypeDefinition] = field(default_factory=list)
|
||||
|
||||
def __post_init__(self):
|
||||
for field in ('schemas', 'classes', 'slots', 'types'):
|
||||
attr = getattr(self, field)
|
||||
if not isinstance(attr, list):
|
||||
setattr(self, field, [attr])
|
||||
|
||||
def _dedupe(self, ours, others):
|
||||
existing_names = [c.name for c in ours]
|
||||
others_dedupe = [o for o in others if o.name not in existing_names]
|
||||
return others_dedupe
|
||||
|
||||
def __add__(self, other:'BuildResult') -> 'BuildResult':
|
||||
# if not isinstance(other, 'BuildResult'):
|
||||
# raise TypeError('Can only add two build results together')
|
||||
|
||||
self.schemas.extend(self._dedupe(self.schemas, other.schemas))
|
||||
self.classes.extend(self._dedupe(self.classes, other.classes))
|
||||
# existing_names = [c.name for c in self.classes]
|
||||
# for newc in other.classes:
|
||||
# if newc.name in existing_names:
|
||||
# warnings.warn(f'Not creating duplicate class for {newc.name}')
|
||||
# continue
|
||||
# self.classes.append(newc)
|
||||
# self.classes.extend(other.classes)
|
||||
self.slots.extend(other.slots)
|
||||
self.types.extend(other.types)
|
||||
return self
|
||||
|
||||
|
||||
class Adapter(BaseModel):
|
||||
pass
|
||||
@abstractmethod
|
||||
def build(self) -> 'BuildResult':
|
||||
"""
|
||||
Generate the corresponding linkML element for this adapter
|
||||
"""
|
||||
|
||||
def walk(self, input: BaseModel | list | dict):
|
||||
yield input
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
"""
|
||||
Adapters to linkML classes
|
||||
"""
|
||||
|
||||
from nwb_schema_language import Dataset, Group
|
||||
from nwb_linkml.adapters.adapter import Adapter
|
||||
import pdb
|
||||
from typing import List, Optional
|
||||
from nwb_schema_language import Dataset, Group, ReferenceDtype, DTypeType
|
||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
||||
|
||||
|
||||
|
@ -12,25 +13,128 @@ class ClassAdapter(Adapter):
|
|||
Adapter to class-like things in linkml, including datasets and groups
|
||||
"""
|
||||
cls: Dataset | Group
|
||||
parent: Optional['ClassAdapter'] = None
|
||||
|
||||
def build(self) -> ClassDefinition:
|
||||
def _get_full_name(self) -> str:
|
||||
"""The full name of the object in the generated linkml
|
||||
|
||||
Distinct from 'name' which is the thing that's often used in """
|
||||
if self.cls.neurodata_type_def:
|
||||
name = self.cls.neurodata_type_def
|
||||
else:
|
||||
name = self.cls.name
|
||||
elif self.cls.name is not None:
|
||||
# not necessarily a unique name, so we combine parent names
|
||||
name_parts = []
|
||||
if self.parent is not None:
|
||||
name_parts.append(self.parent._get_full_name())
|
||||
|
||||
name_parts.append(self.cls.name)
|
||||
name = '_'.join(name_parts)
|
||||
elif self.cls.neurodata_type_inc is not None:
|
||||
# again, this is against the schema, but is common
|
||||
name = self.cls.neurodata_type_inc
|
||||
else:
|
||||
raise ValueError('Not sure what our name is!')
|
||||
|
||||
|
||||
return name
|
||||
|
||||
def _get_name(self) -> str:
|
||||
"""
|
||||
Get the "regular" name, which is used as the name of the attr
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
# return self._get_full_name()
|
||||
name = None
|
||||
if self.cls.neurodata_type_def:
|
||||
name = self.cls.neurodata_type_def
|
||||
elif self.cls.name is not None:
|
||||
# we do have a unique name
|
||||
name = self.cls.name
|
||||
elif self.cls.neurodata_type_inc:
|
||||
# group members can be anonymous? this violates the schema but is common
|
||||
name = self.cls.neurodata_type_inc
|
||||
|
||||
if name is None:
|
||||
raise ValueError(f'Class has no name!: {self.cls}')
|
||||
|
||||
return name
|
||||
|
||||
def handle_dtype(self, dtype: DTypeType):
|
||||
if isinstance(dtype, ReferenceDtype):
|
||||
return dtype.target_type
|
||||
else:
|
||||
return dtype
|
||||
|
||||
def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
|
||||
attrs = [
|
||||
SlotDefinition(
|
||||
name=attr.name,
|
||||
description=attr.doc,
|
||||
|
||||
) for attr in self.cls.attributes
|
||||
range=self.handle_dtype(attr.dtype)
|
||||
) for attr in cls.attributes
|
||||
]
|
||||
|
||||
return attrs
|
||||
|
||||
def build_subclasses(self, cls: Dataset | Group) -> BuildResult:
|
||||
"""
|
||||
Build nested groups and datasets
|
||||
|
||||
Create ClassDefinitions for each, but then also create SlotDefinitions that
|
||||
will be used as attributes linking the main class to the subclasses
|
||||
"""
|
||||
# build and flatten nested classes
|
||||
nested_classes = [ClassAdapter(cls=dset, parent=self) for dset in cls.datasets]
|
||||
nested_classes.extend([ClassAdapter(cls=grp, parent=self) for grp in cls.groups])
|
||||
nested_res = BuildResult()
|
||||
for subclass in nested_classes:
|
||||
this_slot = SlotDefinition(
|
||||
name=subclass._get_name(),
|
||||
description=subclass.cls.doc,
|
||||
range=subclass._get_full_name()
|
||||
)
|
||||
nested_res.slots.append(this_slot)
|
||||
|
||||
if subclass.cls.name is None and subclass.cls.neurodata_type_def is None:
|
||||
# anonymous group that's just an inc, we only need the slot since the class is defined elsewhere
|
||||
continue
|
||||
|
||||
this_build = subclass.build()
|
||||
nested_res += this_build
|
||||
return nested_res
|
||||
|
||||
|
||||
def build(self) -> BuildResult:
|
||||
|
||||
# Build this class
|
||||
if self.parent is not None:
|
||||
name = self._get_full_name()
|
||||
else:
|
||||
name = self._get_name()
|
||||
# if name == 'TimeSeries':
|
||||
# pdb.set_trace()
|
||||
|
||||
# Get vanilla top-level attributes
|
||||
attrs = self.build_attrs(self.cls)
|
||||
|
||||
# unnest and build subclasses in datasets and groups
|
||||
if isinstance(self.cls, Group):
|
||||
# only groups have sub-datasets and sub-groups
|
||||
nested_res = self.build_subclasses(self.cls)
|
||||
attrs.extend(nested_res.slots)
|
||||
else:
|
||||
nested_res = BuildResult()
|
||||
|
||||
cls = ClassDefinition(
|
||||
name = name,
|
||||
is_a = self.cls.neurodata_type_inc,
|
||||
description=self.cls.doc,
|
||||
attributes=attrs
|
||||
)
|
||||
return cls
|
||||
res = BuildResult(
|
||||
classes = [cls, *nested_res.classes]
|
||||
)
|
||||
|
||||
return res
|
|
@ -6,24 +6,57 @@ for extracting information and generating translated schema
|
|||
"""
|
||||
import pdb
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from pydantic import BaseModel, Field, validator, PrivateAttr
|
||||
from pprint import pformat
|
||||
from linkml_runtime.linkml_model import SchemaDefinition
|
||||
|
||||
from nwb_schema_language import Namespaces
|
||||
|
||||
from nwb_linkml.adapters.adapter import Adapter
|
||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||
from nwb_linkml.adapters.schema import SchemaAdapter
|
||||
from nwb_linkml.lang_elements import NwbLangSchema
|
||||
|
||||
class NamespacesAdapter(Adapter):
|
||||
namespaces: Namespaces
|
||||
schemas: List[SchemaAdapter]
|
||||
imported: List['NamespacesAdapter'] = Field(default_factory=list)
|
||||
|
||||
_imports_populated = PrivateAttr(False)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(NamespacesAdapter, self).__init__(**kwargs)
|
||||
self._populate_schema_namespaces()
|
||||
|
||||
def build(self) -> BuildResult:
|
||||
if not self._imports_populated:
|
||||
self.populate_imports()
|
||||
|
||||
|
||||
sch_result = BuildResult()
|
||||
for sch in self.schemas:
|
||||
sch_result += sch.build()
|
||||
# recursive step
|
||||
for imported in self.imported:
|
||||
imported_build = imported.build()
|
||||
sch_result += imported_build
|
||||
|
||||
# add in monkeypatch nwb types
|
||||
sch_result.schemas.append(NwbLangSchema)
|
||||
|
||||
# now generate the top-level namespaces that import everything
|
||||
for ns in self.namespaces.namespaces:
|
||||
ns_schemas = [sch for sch in self.schemas if sch.namespace == ns.name]
|
||||
ns_schema = SchemaDefinition(
|
||||
name = ns.name,
|
||||
id = ns.name,
|
||||
description = ns.doc,
|
||||
version = ns.version,
|
||||
imports=[sch.name for sch in ns_schemas]
|
||||
)
|
||||
sch_result.schemas.append(ns_schema)
|
||||
|
||||
return sch_result
|
||||
|
||||
def _populate_schema_namespaces(self):
|
||||
# annotate for each schema which namespace imports it
|
||||
for sch in self.schemas:
|
||||
|
@ -78,4 +111,10 @@ class NamespacesAdapter(Adapter):
|
|||
if depends_on not in sch.imports:
|
||||
sch.imports.append(depends_on)
|
||||
|
||||
# do so recursively
|
||||
for imported in self.imported:
|
||||
imported.populate_imports()
|
||||
|
||||
self._imports_populated = True
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from typing import Optional, List, TYPE_CHECKING
|
|||
from pathlib import Path
|
||||
from pydantic import Field
|
||||
|
||||
from nwb_linkml.adapters.adapter import Adapter
|
||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult
|
||||
from nwb_linkml.adapters.classes import ClassAdapter
|
||||
if TYPE_CHECKING:
|
||||
from nwb_linkml.adapters.namespaces import NamespacesAdapter
|
||||
|
@ -47,7 +47,7 @@ class SchemaAdapter(Adapter):
|
|||
|
||||
return out_str
|
||||
|
||||
def build(self) -> SchemaDefinition:
|
||||
def build(self) -> BuildResult:
|
||||
"""
|
||||
Make the LinkML representation for this schema file
|
||||
|
||||
|
@ -59,16 +59,25 @@ class SchemaAdapter(Adapter):
|
|||
"""
|
||||
classes = [ClassAdapter(cls=dset) for dset in self.datasets]
|
||||
classes.extend(ClassAdapter(cls=group) for group in self.groups)
|
||||
built_classes = [c.build() for c in classes]
|
||||
built_results = None
|
||||
for cls in classes:
|
||||
if built_results is None:
|
||||
built_results = cls.build()
|
||||
else:
|
||||
built_results += cls.build()
|
||||
|
||||
|
||||
sch = SchemaDefinition(
|
||||
name = self.name,
|
||||
id = self.name,
|
||||
imports = [i.name for i in self.imports],
|
||||
classes=built_classes
|
||||
classes=built_results.classes,
|
||||
slots=built_results.slots,
|
||||
types=built_results.types
|
||||
)
|
||||
return sch
|
||||
# every schema needs the language elements
|
||||
sch.imports.append('nwb.language')
|
||||
return BuildResult(schemas=[sch])
|
||||
|
||||
|
||||
@property
|
||||
|
|
|
@ -253,7 +253,7 @@ groups:
|
|||
dtype: text
|
||||
doc: Description of this collection of images.
|
||||
datasets:
|
||||
- neurodata_type_inc: Image
|
||||
# - neurodata_type_inc: Image
|
||||
doc: Images stored in this collection.
|
||||
quantity: '+'
|
||||
- name: order_of_images
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import warnings
|
||||
from typing import List, Union
|
||||
try:
|
||||
from .datamodel.nwb_schema_pydantic import Namespace, \
|
||||
Namespaces, \
|
||||
|
@ -8,6 +9,9 @@ try:
|
|||
Link, \
|
||||
Dataset, \
|
||||
ReferenceDtype, \
|
||||
CompoundDtype
|
||||
CompoundDtype, \
|
||||
FlatDtype
|
||||
|
||||
DTypeType = Union[List[CompoundDtype], FlatDtype, ReferenceDtype]
|
||||
except NameError:
|
||||
warnings.warn('Error importing pydantic classes, passing because we might be in the process of patching them, but it is likely they are broken and you will be unable to use them!')
|
|
@ -1 +1,3 @@
|
|||
from .nwb_schema_language import *
|
||||
|
||||
# create additional derived
|
|
@ -219,6 +219,7 @@ class Dataset(NamingMixin, DtypeMixin):
|
|||
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
|
||||
linkable: Optional[bool] = Field(None)
|
||||
attributes: Optional[List[Attribute]] = Field(default_factory=list)
|
||||
datasets: Optional[List[Dataset]] = Field(default_factory=list)
|
||||
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
|
||||
|
||||
|
||||
|
|
|
@ -124,6 +124,7 @@ classes:
|
|||
- quantity
|
||||
- linkable
|
||||
- attributes
|
||||
- groups
|
||||
|
||||
Datasets:
|
||||
slots:
|
||||
|
|
51
poetry.lock
generated
51
poetry.lock
generated
|
@ -260,6 +260,20 @@ files = [
|
|||
{file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "future-fstrings"
|
||||
version = "1.2.0"
|
||||
description = "A backport of fstrings to python<3.6"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
files = [
|
||||
{file = "future_fstrings-1.2.0-py2.py3-none-any.whl", hash = "sha256:90e49598b553d8746c4dc7d9442e0359d038c3039d802c91c0a55505da318c63"},
|
||||
{file = "future_fstrings-1.2.0.tar.gz", hash = "sha256:6cf41cbe97c398ab5a81168ce0dbb8ad95862d3caf23c21e4430627b90844089"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
rewrite = ["tokenize-rt (>=3)"]
|
||||
|
||||
[[package]]
|
||||
name = "graphviz"
|
||||
version = "0.20.1"
|
||||
|
@ -746,6 +760,24 @@ files = [
|
|||
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "networkx"
|
||||
version = "3.1"
|
||||
description = "Python package for creating and manipulating graphs and networks"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
|
||||
{file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
|
||||
developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
|
||||
doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
|
||||
extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
|
||||
test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "nwb-schema-language"
|
||||
version = "0.1.0"
|
||||
|
@ -1023,6 +1055,23 @@ pluggy = ">=0.12,<2.0"
|
|||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-depends"
|
||||
version = "1.0.1"
|
||||
description = "Tests that depend on other tests"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pytest-depends-1.0.1.tar.gz", hash = "sha256:90a28e2b87b75b18abd128c94015248544acac20e4392e9921e5a86f93319dfe"},
|
||||
{file = "pytest_depends-1.0.1-py3-none-any.whl", hash = "sha256:a1df072bcc93d77aca3f0946903f5fed8af2d9b0056db1dfc9ed5ac164ab0642"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = "*"
|
||||
future-fstrings = "*"
|
||||
networkx = "*"
|
||||
pytest = ">=3"
|
||||
|
||||
[[package]]
|
||||
name = "pytest-logging"
|
||||
version = "2015.11.4"
|
||||
|
@ -1759,4 +1808,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "90f5eaedd0572c26dfac8a9f2fed0c8ec50f70a54b7bff03a43816f96cb60bb1"
|
||||
content-hash = "debbeeaba69d6afc3da329ccc76e0c2ae3124773b85a577a10cb5e673845a9e5"
|
||||
|
|
|
@ -22,6 +22,7 @@ linkml = "^1.5.7"
|
|||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
pytest-depends = "^1.0.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
import pdb
|
||||
|
||||
import pytest
|
||||
import warnings
|
||||
|
||||
from .fixtures import nwb_core_fixture, tmp_output_dir
|
||||
from linkml_runtime.dumpers import yaml_dumper
|
||||
from linkml.generators import PydanticGenerator
|
||||
|
||||
from nwb_linkml.lang_elements import NwbLangSchema
|
||||
|
||||
|
@ -10,8 +13,25 @@ def test_generate_nwblang(tmp_output_dir):
|
|||
output_file = (tmp_output_dir / NwbLangSchema.name).with_suffix('.yml')
|
||||
yaml_dumper.dump(NwbLangSchema, output_file)
|
||||
|
||||
def test_generate_base(nwb_core_fixture, tmp_output_dir):
|
||||
schema = nwb_core_fixture.schemas[0].build()
|
||||
output_file = (tmp_output_dir / schema.name).with_suffix('.yml')
|
||||
warnings.warn(output_file)
|
||||
yaml_dumper.dump(schema, output_file)
|
||||
def test_generate_core(nwb_core_fixture, tmp_output_dir):
|
||||
schemas = nwb_core_fixture.build().schemas
|
||||
for schema in schemas:
|
||||
output_file = tmp_output_dir / (schema.name + '.yaml')
|
||||
yaml_dumper.dump(schema, output_file)
|
||||
|
||||
@pytest.mark.depends(on=['test_generate_core'])
|
||||
def test_generate_pydantic(tmp_output_dir):
|
||||
core_file = tmp_output_dir / 'core.yaml'
|
||||
pydantic_file = tmp_output_dir / 'core.py'
|
||||
|
||||
generator = PydanticGenerator(
|
||||
str(core_file),
|
||||
pydantic_version='1',
|
||||
emit_metadata=True,
|
||||
gen_classvars=True,
|
||||
gen_slots=True
|
||||
|
||||
)
|
||||
gen_pydantic = generator.serialize()
|
||||
with open(pydantic_file, 'w') as pfile:
|
||||
pfile.write(gen_pydantic)
|
||||
|
|
Loading…
Reference in a new issue