recursion works, more formal build method in adapters

This commit is contained in:
sneakers-the-rat 2023-08-23 19:56:09 -07:00
parent 170a424fb1
commit a4806543ef
13 changed files with 321 additions and 28 deletions

View file

@ -25,3 +25,19 @@ Steps:
- enum classes - enum classes
- Rename items - Rename items
## Translation choices
We aren't doing a 1:1 translation of NWB! The goal is to make something that is *import*
backwards-compatible - ie. we can read NWB traditional nwb files - but not necessarily
*export* for now. We will get to that eventually. NWB as it is now is highly tied to hdf5
in multiple places - from the hdmf-common namespace to the nwb file classes,
we want to instead abstract the structure of NWB so the schema can be used
as a programming element (ie. labs can write their own schema extensions in yaml,
generate pydantic modules for them, and they should Just Work TM) with various different
storage backends.
- Don't try and emulate the nwb.file schema - it is basically a file layout that indicates
what should go where. We are moving I/O out of the schema: storage layout is at a different level than the schema
- Don't worry about most of hdmf-common: instead create sensible generics that can be implemented in different ways by different storage mediums

View file

@ -1,11 +1,58 @@
""" """
Base class for adapters Base class for adapters
""" """
from typing import List, Dict, Type, Generator, Any, Tuple from abc import abstractmethod
from pydantic import BaseModel import warnings
from dataclasses import dataclass, field
from typing import List, Dict, Type, Generator, Any, Tuple, Optional
from pydantic import BaseModel, Field, validator
from linkml_runtime.linkml_model import Element, SchemaDefinition, ClassDefinition, SlotDefinition, TypeDefinition
# SchemaDefClass = dataclass(SchemaDefinition).__pydantic_model__
@dataclass
class BuildResult:
# pass
schemas: List[SchemaDefinition] = field(default_factory=list)
classes: List[ClassDefinition] = field(default_factory=list)
slots: List[SlotDefinition] = field(default_factory=list)
types: List[TypeDefinition] = field(default_factory=list)
def __post_init__(self):
for field in ('schemas', 'classes', 'slots', 'types'):
attr = getattr(self, field)
if not isinstance(attr, list):
setattr(self, field, [attr])
def _dedupe(self, ours, others):
existing_names = [c.name for c in ours]
others_dedupe = [o for o in others if o.name not in existing_names]
return others_dedupe
def __add__(self, other:'BuildResult') -> 'BuildResult':
# if not isinstance(other, 'BuildResult'):
# raise TypeError('Can only add two build results together')
self.schemas.extend(self._dedupe(self.schemas, other.schemas))
self.classes.extend(self._dedupe(self.classes, other.classes))
# existing_names = [c.name for c in self.classes]
# for newc in other.classes:
# if newc.name in existing_names:
# warnings.warn(f'Not creating duplicate class for {newc.name}')
# continue
# self.classes.append(newc)
# self.classes.extend(other.classes)
self.slots.extend(other.slots)
self.types.extend(other.types)
return self
class Adapter(BaseModel): class Adapter(BaseModel):
pass @abstractmethod
def build(self) -> 'BuildResult':
"""
Generate the corresponding linkML element for this adapter
"""
def walk(self, input: BaseModel | list | dict): def walk(self, input: BaseModel | list | dict):
yield input yield input

View file

@ -1,9 +1,10 @@
""" """
Adapters to linkML classes Adapters to linkML classes
""" """
import pdb
from nwb_schema_language import Dataset, Group from typing import List, Optional
from nwb_linkml.adapters.adapter import Adapter from nwb_schema_language import Dataset, Group, ReferenceDtype, DTypeType
from nwb_linkml.adapters.adapter import Adapter, BuildResult
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
@ -12,25 +13,128 @@ class ClassAdapter(Adapter):
Adapter to class-like things in linkml, including datasets and groups Adapter to class-like things in linkml, including datasets and groups
""" """
cls: Dataset | Group cls: Dataset | Group
parent: Optional['ClassAdapter'] = None
def build(self) -> ClassDefinition: def _get_full_name(self) -> str:
"""The full name of the object in the generated linkml
Distinct from 'name' which is the thing that's often used in """
if self.cls.neurodata_type_def: if self.cls.neurodata_type_def:
name = self.cls.neurodata_type_def name = self.cls.neurodata_type_def
else: elif self.cls.name is not None:
name = self.cls.name # not necessarily a unique name, so we combine parent names
name_parts = []
if self.parent is not None:
name_parts.append(self.parent._get_full_name())
name_parts.append(self.cls.name)
name = '_'.join(name_parts)
elif self.cls.neurodata_type_inc is not None:
# again, this is against the schema, but is common
name = self.cls.neurodata_type_inc
else:
raise ValueError('Not sure what our name is!')
return name
def _get_name(self) -> str:
"""
Get the "regular" name, which is used as the name of the attr
Returns:
"""
# return self._get_full_name()
name = None
if self.cls.neurodata_type_def:
name = self.cls.neurodata_type_def
elif self.cls.name is not None:
# we do have a unique name
name = self.cls.name
elif self.cls.neurodata_type_inc:
# group members can be anonymous? this violates the schema but is common
name = self.cls.neurodata_type_inc
if name is None:
raise ValueError(f'Class has no name!: {self.cls}')
return name
def handle_dtype(self, dtype: DTypeType):
if isinstance(dtype, ReferenceDtype):
return dtype.target_type
else:
return dtype
def build_attrs(self, cls: Dataset | Group) -> List[SlotDefinition]:
attrs = [ attrs = [
SlotDefinition( SlotDefinition(
name=attr.name, name=attr.name,
description=attr.doc, description=attr.doc,
range=self.handle_dtype(attr.dtype)
) for attr in self.cls.attributes ) for attr in cls.attributes
] ]
return attrs
def build_subclasses(self, cls: Dataset | Group) -> BuildResult:
"""
Build nested groups and datasets
Create ClassDefinitions for each, but then also create SlotDefinitions that
will be used as attributes linking the main class to the subclasses
"""
# build and flatten nested classes
nested_classes = [ClassAdapter(cls=dset, parent=self) for dset in cls.datasets]
nested_classes.extend([ClassAdapter(cls=grp, parent=self) for grp in cls.groups])
nested_res = BuildResult()
for subclass in nested_classes:
this_slot = SlotDefinition(
name=subclass._get_name(),
description=subclass.cls.doc,
range=subclass._get_full_name()
)
nested_res.slots.append(this_slot)
if subclass.cls.name is None and subclass.cls.neurodata_type_def is None:
# anonymous group that's just an inc, we only need the slot since the class is defined elsewhere
continue
this_build = subclass.build()
nested_res += this_build
return nested_res
def build(self) -> BuildResult:
# Build this class
if self.parent is not None:
name = self._get_full_name()
else:
name = self._get_name()
# if name == 'TimeSeries':
# pdb.set_trace()
# Get vanilla top-level attributes
attrs = self.build_attrs(self.cls)
# unnest and build subclasses in datasets and groups
if isinstance(self.cls, Group):
# only groups have sub-datasets and sub-groups
nested_res = self.build_subclasses(self.cls)
attrs.extend(nested_res.slots)
else:
nested_res = BuildResult()
cls = ClassDefinition( cls = ClassDefinition(
name = name, name = name,
is_a = self.cls.neurodata_type_inc, is_a = self.cls.neurodata_type_inc,
description=self.cls.doc, description=self.cls.doc,
attributes=attrs attributes=attrs
) )
return cls res = BuildResult(
classes = [cls, *nested_res.classes]
)
return res

View file

@ -6,24 +6,57 @@ for extracting information and generating translated schema
""" """
import pdb import pdb
from typing import List, Optional from typing import List, Optional
from pydantic import BaseModel, Field, validator from pydantic import BaseModel, Field, validator, PrivateAttr
from pprint import pformat from pprint import pformat
from linkml_runtime.linkml_model import SchemaDefinition
from nwb_schema_language import Namespaces from nwb_schema_language import Namespaces
from nwb_linkml.adapters.adapter import Adapter from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.adapters.schema import SchemaAdapter
from nwb_linkml.lang_elements import NwbLangSchema
class NamespacesAdapter(Adapter): class NamespacesAdapter(Adapter):
namespaces: Namespaces namespaces: Namespaces
schemas: List[SchemaAdapter] schemas: List[SchemaAdapter]
imported: List['NamespacesAdapter'] = Field(default_factory=list) imported: List['NamespacesAdapter'] = Field(default_factory=list)
_imports_populated = PrivateAttr(False)
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(NamespacesAdapter, self).__init__(**kwargs) super(NamespacesAdapter, self).__init__(**kwargs)
self._populate_schema_namespaces() self._populate_schema_namespaces()
def build(self) -> BuildResult:
if not self._imports_populated:
self.populate_imports()
sch_result = BuildResult()
for sch in self.schemas:
sch_result += sch.build()
# recursive step
for imported in self.imported:
imported_build = imported.build()
sch_result += imported_build
# add in monkeypatch nwb types
sch_result.schemas.append(NwbLangSchema)
# now generate the top-level namespaces that import everything
for ns in self.namespaces.namespaces:
ns_schemas = [sch for sch in self.schemas if sch.namespace == ns.name]
ns_schema = SchemaDefinition(
name = ns.name,
id = ns.name,
description = ns.doc,
version = ns.version,
imports=[sch.name for sch in ns_schemas]
)
sch_result.schemas.append(ns_schema)
return sch_result
def _populate_schema_namespaces(self): def _populate_schema_namespaces(self):
# annotate for each schema which namespace imports it # annotate for each schema which namespace imports it
for sch in self.schemas: for sch in self.schemas:
@ -78,4 +111,10 @@ class NamespacesAdapter(Adapter):
if depends_on not in sch.imports: if depends_on not in sch.imports:
sch.imports.append(depends_on) sch.imports.append(depends_on)
# do so recursively
for imported in self.imported:
imported.populate_imports()
self._imports_populated = True

View file

@ -7,7 +7,7 @@ from typing import Optional, List, TYPE_CHECKING
from pathlib import Path from pathlib import Path
from pydantic import Field from pydantic import Field
from nwb_linkml.adapters.adapter import Adapter from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.classes import ClassAdapter from nwb_linkml.adapters.classes import ClassAdapter
if TYPE_CHECKING: if TYPE_CHECKING:
from nwb_linkml.adapters.namespaces import NamespacesAdapter from nwb_linkml.adapters.namespaces import NamespacesAdapter
@ -47,7 +47,7 @@ class SchemaAdapter(Adapter):
return out_str return out_str
def build(self) -> SchemaDefinition: def build(self) -> BuildResult:
""" """
Make the LinkML representation for this schema file Make the LinkML representation for this schema file
@ -59,16 +59,25 @@ class SchemaAdapter(Adapter):
""" """
classes = [ClassAdapter(cls=dset) for dset in self.datasets] classes = [ClassAdapter(cls=dset) for dset in self.datasets]
classes.extend(ClassAdapter(cls=group) for group in self.groups) classes.extend(ClassAdapter(cls=group) for group in self.groups)
built_classes = [c.build() for c in classes] built_results = None
for cls in classes:
if built_results is None:
built_results = cls.build()
else:
built_results += cls.build()
sch = SchemaDefinition( sch = SchemaDefinition(
name = self.name, name = self.name,
id = self.name, id = self.name,
imports = [i.name for i in self.imports], imports = [i.name for i in self.imports],
classes=built_classes classes=built_results.classes,
slots=built_results.slots,
types=built_results.types
) )
return sch # every schema needs the language elements
sch.imports.append('nwb.language')
return BuildResult(schemas=[sch])
@property @property

View file

@ -253,7 +253,7 @@ groups:
dtype: text dtype: text
doc: Description of this collection of images. doc: Description of this collection of images.
datasets: datasets:
- neurodata_type_inc: Image # - neurodata_type_inc: Image
doc: Images stored in this collection. doc: Images stored in this collection.
quantity: '+' quantity: '+'
- name: order_of_images - name: order_of_images

View file

@ -1,4 +1,5 @@
import warnings import warnings
from typing import List, Union
try: try:
from .datamodel.nwb_schema_pydantic import Namespace, \ from .datamodel.nwb_schema_pydantic import Namespace, \
Namespaces, \ Namespaces, \
@ -8,6 +9,9 @@ try:
Link, \ Link, \
Dataset, \ Dataset, \
ReferenceDtype, \ ReferenceDtype, \
CompoundDtype CompoundDtype, \
FlatDtype
DTypeType = Union[List[CompoundDtype], FlatDtype, ReferenceDtype]
except NameError: except NameError:
warnings.warn('Error importing pydantic classes, passing because we might be in the process of patching them, but it is likely they are broken and you will be unable to use them!') warnings.warn('Error importing pydantic classes, passing because we might be in the process of patching them, but it is likely they are broken and you will be unable to use them!')

View file

@ -1 +1,3 @@
from .nwb_schema_language import * from .nwb_schema_language import *
# create additional derived

View file

@ -219,6 +219,7 @@ class Dataset(NamingMixin, DtypeMixin):
quantity: Optional[Union[QuantityEnum, int]] = Field(1) quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None) linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(default_factory=list) attributes: Optional[List[Attribute]] = Field(default_factory=list)
datasets: Optional[List[Dataset]] = Field(default_factory=list)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list) dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)

View file

@ -124,6 +124,7 @@ classes:
- quantity - quantity
- linkable - linkable
- attributes - attributes
- groups
Datasets: Datasets:
slots: slots:

51
poetry.lock generated
View file

@ -260,6 +260,20 @@ files = [
{file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
] ]
[[package]]
name = "future-fstrings"
version = "1.2.0"
description = "A backport of fstrings to python<3.6"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
{file = "future_fstrings-1.2.0-py2.py3-none-any.whl", hash = "sha256:90e49598b553d8746c4dc7d9442e0359d038c3039d802c91c0a55505da318c63"},
{file = "future_fstrings-1.2.0.tar.gz", hash = "sha256:6cf41cbe97c398ab5a81168ce0dbb8ad95862d3caf23c21e4430627b90844089"},
]
[package.extras]
rewrite = ["tokenize-rt (>=3)"]
[[package]] [[package]]
name = "graphviz" name = "graphviz"
version = "0.20.1" version = "0.20.1"
@ -746,6 +760,24 @@ files = [
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
] ]
[[package]]
name = "networkx"
version = "3.1"
description = "Python package for creating and manipulating graphs and networks"
optional = false
python-versions = ">=3.8"
files = [
{file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
{file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
]
[package.extras]
default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
[[package]] [[package]]
name = "nwb-schema-language" name = "nwb-schema-language"
version = "0.1.0" version = "0.1.0"
@ -1023,6 +1055,23 @@ pluggy = ">=0.12,<2.0"
[package.extras] [package.extras]
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
[[package]]
name = "pytest-depends"
version = "1.0.1"
description = "Tests that depend on other tests"
optional = false
python-versions = "*"
files = [
{file = "pytest-depends-1.0.1.tar.gz", hash = "sha256:90a28e2b87b75b18abd128c94015248544acac20e4392e9921e5a86f93319dfe"},
{file = "pytest_depends-1.0.1-py3-none-any.whl", hash = "sha256:a1df072bcc93d77aca3f0946903f5fed8af2d9b0056db1dfc9ed5ac164ab0642"},
]
[package.dependencies]
colorama = "*"
future-fstrings = "*"
networkx = "*"
pytest = ">=3"
[[package]] [[package]]
name = "pytest-logging" name = "pytest-logging"
version = "2015.11.4" version = "2015.11.4"
@ -1759,4 +1808,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "90f5eaedd0572c26dfac8a9f2fed0c8ec50f70a54b7bff03a43816f96cb60bb1" content-hash = "debbeeaba69d6afc3da329ccc76e0c2ae3124773b85a577a10cb5e673845a9e5"

View file

@ -22,6 +22,7 @@ linkml = "^1.5.7"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
pytest = "^7.4.0" pytest = "^7.4.0"
pytest-depends = "^1.0.1"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]

View file

@ -1,8 +1,11 @@
import pdb
import pytest import pytest
import warnings import warnings
from .fixtures import nwb_core_fixture, tmp_output_dir from .fixtures import nwb_core_fixture, tmp_output_dir
from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.dumpers import yaml_dumper
from linkml.generators import PydanticGenerator
from nwb_linkml.lang_elements import NwbLangSchema from nwb_linkml.lang_elements import NwbLangSchema
@ -10,8 +13,25 @@ def test_generate_nwblang(tmp_output_dir):
output_file = (tmp_output_dir / NwbLangSchema.name).with_suffix('.yml') output_file = (tmp_output_dir / NwbLangSchema.name).with_suffix('.yml')
yaml_dumper.dump(NwbLangSchema, output_file) yaml_dumper.dump(NwbLangSchema, output_file)
def test_generate_base(nwb_core_fixture, tmp_output_dir): def test_generate_core(nwb_core_fixture, tmp_output_dir):
schema = nwb_core_fixture.schemas[0].build() schemas = nwb_core_fixture.build().schemas
output_file = (tmp_output_dir / schema.name).with_suffix('.yml') for schema in schemas:
warnings.warn(output_file) output_file = tmp_output_dir / (schema.name + '.yaml')
yaml_dumper.dump(schema, output_file) yaml_dumper.dump(schema, output_file)
@pytest.mark.depends(on=['test_generate_core'])
def test_generate_pydantic(tmp_output_dir):
core_file = tmp_output_dir / 'core.yaml'
pydantic_file = tmp_output_dir / 'core.py'
generator = PydanticGenerator(
str(core_file),
pydantic_version='1',
emit_metadata=True,
gen_classvars=True,
gen_slots=True
)
gen_pydantic = generator.serialize()
with open(pydantic_file, 'w') as pfile:
pfile.write(gen_pydantic)