[tests] pydantic generator

This commit is contained in:
sneakers-the-rat 2023-10-05 21:22:00 -07:00
parent 75f39b4914
commit a0d96014e1
6 changed files with 506 additions and 13 deletions

View file

@ -1,7 +1,7 @@
# nwb-linkml # nwb-linkml
[![Documentation Status](https://readthedocs.org/projects/nwb-linkml/badge/?version=latest)](https://nwb-linkml.readthedocs.io/en/latest/?badge=latest) [![Documentation Status](https://readthedocs.org/projects/nwb-linkml/badge/?version=latest)](https://nwb-linkml.readthedocs.io/en/latest/?badge=latest)
[![Coverage Status](https://coveralls.io/repos/github/p2p-ld/nwb-linkml/badge.svg)](https://coveralls.io/github/p2p-ld/nwb-linkml)
Translating NWB schema language to linkml Translating NWB schema language to linkml

View file

@ -15,6 +15,13 @@ The `serialize` method
- Generates linkML Classes - Generates linkML Classes
- `generate_enums` runs first - `generate_enums` runs first
.. note::
This module is heinous. We have mostly copied and pasted the existing :class:`linkml.generators.PydanticGenerator`
and overridden what we need to make this work for NWB, but the source is...
a little messy. We will be tidying this up and trying to pull changes upstream,
but for now this is just our hacky little secret.
""" """
import pdb import pdb
from dataclasses import dataclass, field from dataclasses import dataclass, field
@ -218,11 +225,11 @@ class {{ c.name }}
@dataclass @dataclass
class NWBPydanticGenerator(PydanticGenerator): class NWBPydanticGenerator(PydanticGenerator):
SKIP_ENUM=('FlatDType',) SKIP_ENUM:Tuple[str]=('FlatDType',)
# SKIP_SLOTS=('VectorData',) # SKIP_SLOTS=('VectorData',)
SKIP_SLOTS=('',) SKIP_SLOTS:Tuple[str]=('',)
SKIP_CLASSES=('',) SKIP_CLASSES:Tuple[str]=('',)
INJECTED_FIELDS = ( INJECTED_FIELDS:Tuple[str] = (
'hdf5_path: Optional[str] = Field(None, description="The absolute path that this object is stored in an NWB file")', 'hdf5_path: Optional[str] = Field(None, description="The absolute path that this object is stored in an NWB file")',
'object_id: Optional[str] = Field(None, description="Unique UUID for each object")' 'object_id: Optional[str] = Field(None, description="Unique UUID for each object")'
) )
@ -231,7 +238,7 @@ class NWBPydanticGenerator(PydanticGenerator):
schema_map:Optional[Dict[str, SchemaDefinition]]=None schema_map:Optional[Dict[str, SchemaDefinition]]=None
versions:dict = None versions:dict = None
"""See :meth:`.LinkMLProvider.build` for usage - a list of specific versions to import from""" """See :meth:`.LinkMLProvider.build` for usage - a list of specific versions to import from"""
pydantic_version = "2"
def _locate_imports( def _locate_imports(
self, self,
@ -274,13 +281,12 @@ class NWBPydanticGenerator(PydanticGenerator):
def _get_namespace_imports(self, sv:SchemaView) -> Dict[str, List[str]]: def _get_namespace_imports(self, sv:SchemaView) -> Dict[str, List[str]]:
""" """
Get imports for namespace packages. For these we import all Get imports for namespace packages. For these we import all
the tree_root classes, ie. all the classes that are top-level classes rather than the tree_root classes, ie. all the classes that are top-level classes
rather than nested classes rather than nested classes
""" """
all_classes = sv.all_classes(imports=True) all_classes = sv.all_classes(imports=True)
needed_classes = [] needed_classes = []
for clsname, cls in all_classes.items(): for clsname, cls in all_classes.items():
#if cls.tree_root:
if cls.is_a != 'Arraylike': if cls.is_a != 'Arraylike':
needed_classes.append(clsname) needed_classes.append(clsname)
@ -764,12 +770,12 @@ class NWBPydanticGenerator(PydanticGenerator):
) )
return code return code
def compile_module(self, module_path:Path=None, **kwargs) -> ModuleType: # pragma: no cover - replaced with provider def compile_module(self, module_path:Path=None, module_name:str='test') -> ModuleType: # pragma: no cover - replaced with provider
""" """
Compiles generated python code to a module Compiles generated python code to a module
:return: :return:
""" """
pycode = self.serialize(**kwargs) pycode = self.serialize()
if module_path is not None: if module_path is not None:
module_path = Path(module_path) module_path = Path(module_path)
init_file = module_path / '__init__.py' init_file = module_path / '__init__.py'
@ -777,11 +783,11 @@ class NWBPydanticGenerator(PydanticGenerator):
ifile.write(' ') ifile.write(' ')
try: try:
return compile_python(pycode, module_path) return compile_python(pycode, module_path, module_name)
except NameError as e: except NameError as e:
raise e raise e
def compile_python(text_or_fn: str, package_path: Path = None) -> ModuleType: # pragma: no cover - replaced with provider def compile_python(text_or_fn: str, package_path: Path = None, module_name:str='test') -> ModuleType: # pragma: no cover - replaced with provider
""" """
Compile the text or file and return the resulting module Compile the text or file and return the resulting module
@param text_or_fn: Python text or file name that references python file @param text_or_fn: Python text or file name that references python file
@ -793,8 +799,9 @@ def compile_python(text_or_fn: str, package_path: Path = None) -> ModuleType: #
if package_path is None and python_txt != text_or_fn: if package_path is None and python_txt != text_or_fn:
package_path = Path(text_or_fn) package_path = Path(text_or_fn)
spec = compile(python_txt, '<string>', 'exec') spec = compile(python_txt, '<string>', 'exec')
module = ModuleType('test') module = ModuleType(module_name)
exec(spec, module.__dict__) exec(spec, module.__dict__)
sys.modules[module_name] = module
return module return module

View file

@ -1,8 +1,12 @@
import pytest import pytest
import os import os
from typing import NamedTuple
from linkml_runtime.dumpers import yaml_dumper
from nwb_linkml.io import schema as io from nwb_linkml.io import schema as io
from nwb_linkml.adapters.namespaces import NamespacesAdapter from nwb_linkml.adapters.namespaces import NamespacesAdapter
from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition, Prefix, TypeDefinition
import shutil import shutil
from pathlib import Path from pathlib import Path
@ -15,6 +19,30 @@ def tmp_output_dir() -> Path:
return path return path
@pytest.fixture(scope="function")
def tmp_output_dir_func(tmp_output_dir) -> Path:
"""
tmp output dir that gets cleared between every function
cleans at the start rather than at cleanup in case the output is to be inspected
"""
subpath = tmp_output_dir / '__tmp__'
if subpath.exists():
shutil.rmtree(str(subpath))
subpath.mkdir()
return subpath
@pytest.fixture(scope="module")
def tmp_output_dir_mod(tmp_output_dir) -> Path:
"""
tmp output dir that gets cleared between every function
cleans at the start rather than at cleanup in case the output is to be inspected
"""
subpath = tmp_output_dir / '__tmp__'
if subpath.exists():
shutil.rmtree(str(subpath))
subpath.mkdir()
return subpath
@pytest.fixture(autouse=True, scope='session') @pytest.fixture(autouse=True, scope='session')
def set_config_vars(tmp_output_dir): def set_config_vars(tmp_output_dir):
os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir) os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir)
@ -32,3 +60,183 @@ def nwb_core_fixture() -> NamespacesAdapter:
def data_dir() -> Path: def data_dir() -> Path:
path = Path(__file__).parent.resolve() / 'data' path = Path(__file__).parent.resolve() / 'data'
return path return path
class TestSchemas(NamedTuple):
core: SchemaDefinition
core_path: Path
imported: SchemaDefinition
imported_path: Path
namespace: SchemaDefinition
namespace_path: Path
@pytest.fixture(scope="module")
def linkml_schema(tmp_output_dir_mod) -> TestSchemas:
"""
A test schema that includes
- Two schemas, one importing from the other
- Arraylike
- Required/static "name" field
- linkml metadata like tree_root
- skipping classes
"""
test_schema_path = tmp_output_dir_mod / 'test_schema'
test_schema_path.mkdir()
core_path = test_schema_path / 'core.yaml'
imported_path = test_schema_path / 'imported.yaml'
namespace_path = test_schema_path / 'namespace.yaml'
schema = TestSchemas(
core_path=core_path,
imported_path=imported_path,
namespace_path=namespace_path,
core=SchemaDefinition(
name="core",
id="core",
version="1.0.1",
imports=["imported",'linkml:types'],
default_prefix="core",
prefixes={'linkml': Prefix('linkml','https://w3id.org/linkml')},
description="Test core schema",
classes=[
ClassDefinition(
name="MainTopLevel",
description="The main class we are testing!",
is_a="MainThing",
tree_root=True,
attributes=[
SlotDefinition(
name="name",
description="A fixed property that should use Literal and be frozen",
range="string",
required=True,
ifabsent="string(toplevel)",
equals_string="toplevel",
identifier=True
),
SlotDefinition(
name="array",
range="MainTopLevel__Array"
),
SlotDefinition(
name="SkippableSlot",
description="A slot that was meant to be skipped!"
),
SlotDefinition(
name="inline_dict",
description="This should be inlined as a dictionary despite this class having an identifier",
multivalued=True,
inlined=True,
inlined_as_list=False,
any_of=[{'range': 'OtherClass'}, {'range': 'StillAnotherClass'} ]
)
]
),
ClassDefinition(
name="MainTopLevel__Array",
description="Main class's array",
is_a="Arraylike",
attributes=[
SlotDefinition(
name="x",
range="numeric",
required=True
),
SlotDefinition(
name="y",
range="numeric",
required=True
),
SlotDefinition(
name="z",
range="numeric",
required=False,
maximum_cardinality=3,
minimum_cardinality=3
),
SlotDefinition(
name="a",
range="numeric",
required=False,
minimum_cardinality=4,
maximum_cardinality=4
)
]
),
ClassDefinition(
name="skippable",
description="A class that lives to be skipped!",
),
ClassDefinition(
name="OtherClass",
description="Another class yno!",
attributes=[
SlotDefinition(
name="name",
range="string",
required=True,
identifier=True
)
]
),
ClassDefinition(
name="StillAnotherClass",
description="And yet another!",
attributes=[
SlotDefinition(
name="name",
range="string",
required=True,
identifier=True
)
]
)
],
types=[
TypeDefinition(
name="numeric",
typeof="float"
)
]
),
imported=SchemaDefinition(
name="imported",
id="imported",
version="1.4.5",
default_prefix="core",
imports=['linkml:types'],
prefixes = {'linkml': Prefix('linkml', 'https://w3id.org/linkml')},
classes = [
ClassDefinition(
name="MainThing",
description="Class imported by our main thing class!",
attributes=[
SlotDefinition(
name="meta_slot",
range="string"
)
]
),
ClassDefinition(
name="Arraylike",
abstract=True
)
]
),
namespace=SchemaDefinition(
name="namespace",
id="namespace",
version="1.1.1",
default_prefix="namespace",
annotations={'namespace': {'tag': 'namespace', 'value': 'True'}},
description="A namespace package that should import all other classes",
imports=['core', 'imported']
)
)
yaml_dumper.dump(schema.core, schema.core_path)
yaml_dumper.dump(schema.imported, schema.imported_path)
yaml_dumper.dump(schema.namespace, schema.namespace_path)
return schema

View file

@ -1,3 +1,11 @@
"""
Placeholder end-to-end tests for generating linkml translations and pydantic models.
Should be replaced with more specific unit and integration tests, but in place for now
to ensure that the basics of the whole thing operate -- not doing any actual data validation
here.
"""
import pdb import pdb
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict

View file

@ -0,0 +1,270 @@
"""
Test custom features of the pydantic generator
Note that since this is largely a subclass, we don't test all of the functionality of the generator
because it's tested in the base linkml package.
"""
import pdb
import sys
import typing
import pytest
from typing import TypedDict, Optional
from types import ModuleType
import re
import numpy as np
from pydantic import BaseModel
from ..fixtures import tmp_output_dir, tmp_output_dir_mod, linkml_schema, TestSchemas
from nwb_linkml.generators.pydantic import NWBPydanticGenerator, compile_python
from nwb_linkml.types.ndarray import NDArrayMeta
class TestModules(TypedDict):
core: ModuleType
imported: ModuleType
namespace: ModuleType
split: bool
def generate_and_import(linkml_schema:TestSchemas, split:bool, generator_kwargs:Optional[dict]=None) -> TestModules:
if generator_kwargs is None:
generator_kwargs = {}
default_kwargs = {
'split': split,
'emit_metadata': True,
'gen_slots': True,
'pydantic_version': '2',
**generator_kwargs
}
core_str = NWBPydanticGenerator(
str(linkml_schema.core_path),
**default_kwargs
).serialize()
imported_str = NWBPydanticGenerator(
str(linkml_schema.imported_path),
**default_kwargs
).serialize()
namespace_str = NWBPydanticGenerator(
str(linkml_schema.namespace_path),
**default_kwargs
).serialize()
with open(linkml_schema.core_path.with_suffix('.py'), 'w') as pfile:
pfile.write(core_str)
with open(linkml_schema.imported_path.with_suffix('.py'), 'w') as pfile:
pfile.write(imported_str)
with open(linkml_schema.namespace_path.with_suffix('.py'), 'w') as pfile:
pfile.write(namespace_str)
with open(linkml_schema.core_path.parent / '__init__.py', 'w') as pfile:
pfile.write(' ')
sys.path.append(str(linkml_schema.core_path.parents[1]))
core = compile_python(str(linkml_schema.core_path.with_suffix('.py')), module_name='test_schema.core')
imported = compile_python(str(linkml_schema.imported_path.with_suffix('.py')), module_name='test_schema.imported')
namespace = compile_python(str(linkml_schema.namespace_path.with_suffix('.py')),
module_name='test_schema.namespace')
return TestModules(
core=core,
imported=imported,
namespace=namespace,
split=split
)
@pytest.fixture(scope="module", params=['split', 'unsplit'])
def imported_schema(linkml_schema, request) -> TestModules:
"""
Convenience fixture for testing non-core generator features without needing to re-generate and
import every time.
"""
if request.param == 'split':
split = True
else:
split = False
yield generate_and_import(linkml_schema, split)
del sys.modules['test_schema.core']
del sys.modules['test_schema.imported']
del sys.modules['test_schema.namespace']
def _model_correctness(modules:TestModules):
"""
Shared assertions for model correctness.
Only tests very basic things like type and existence,
more specific tests are in their own test functions!
"""
assert issubclass(modules['core'].MainTopLevel, BaseModel)
assert issubclass(modules['core'].Skippable, BaseModel)
assert issubclass(modules['core'].OtherClass, BaseModel)
assert issubclass(modules['core'].StillAnotherClass, BaseModel)
assert issubclass(modules['imported'].MainThing, BaseModel)
def test_generate(linkml_schema):
"""
Base case, we can generate pydantic models from linkml schema
Tests basic functionality of serializer including
- serialization
- compilation (loading as a python model)
- existence and correctness of attributes
"""
modules = generate_and_import(linkml_schema, split=False)
assert isinstance(modules['core'], ModuleType)
assert isinstance(modules['imported'], ModuleType)
assert isinstance(modules['namespace'], ModuleType)
_model_correctness(modules)
# unsplit modules should have all the classes present, even if they aren't defined in it
assert modules['core'].MainThing.__module__ == 'test_schema.core'
assert issubclass(modules['core'].MainTopLevel, modules['core'].MainThing)
del sys.modules['test_schema.core']
del sys.modules['test_schema.imported']
del sys.modules['test_schema.namespace']
def test_generate_split(linkml_schema):
"""
We can generate schema split into separate files
"""
modules = generate_and_import(linkml_schema, split=True)
assert isinstance(modules['core'], ModuleType)
assert isinstance(modules['imported'], ModuleType)
assert isinstance(modules['namespace'], ModuleType)
_model_correctness(modules)
# split modules have classes defined once and imported
assert modules['core'].MainThing.__module__ == 'test_schema.imported'
# can't assert subclass here because of the weird way relative imports work
# when we don't actually import using normal python import machinery
assert modules['core'].MainTopLevel.__mro__[1].__module__ == 'test_schema.imported'
del sys.modules['test_schema.core']
del sys.modules['test_schema.imported']
del sys.modules['test_schema.namespace']
def test_versions(linkml_schema):
"""
We can use explicit versions that import from relative paths generated by
SchemaProvider
"""
# here all we do is check that we have the correct relative import, since we test
# the actual generation of these path structures elsewhere in the provider tests
core_str = NWBPydanticGenerator(
str(linkml_schema.core_path),
versions={'imported': 'v4.2.0'}
).serialize()
# the import should be like
# from ...imported.v4_2_0.imported import (
# MainThing
# )
match = re.findall(r'from \.\.\.imported\.v4_2_0.*?MainThing.*?\)', core_str, flags=re.DOTALL)
assert len(match) == 1
def test_arraylike(imported_schema):
"""
Arraylike classes are converted to slots that specify nptyping arrays
array: Optional[Union[
NDArray[Shape["* x, * y"], Number],
NDArray[Shape["* x, * y, 3 z"], Number],
NDArray[Shape["* x, * y, 3 z, 4 a"], Number]
]] = Field(None)
"""
# check that we have gotten an NDArray annotation and its shape is correct
array = imported_schema['core'].MainTopLevel.model_fields['array'].annotation
args = typing.get_args(array)
for i, shape in enumerate(('* x, * y', '* x, * y, 3 z', '* x, * y, 3 z, 4 a')):
assert isinstance(args[i], NDArrayMeta)
assert args[i].__args__[0].__args__
assert args[i].__args__[1] == np.number
# we shouldn't have an actual class for the array
assert not hasattr(imported_schema['core'], 'MainTopLevel__Array')
assert not hasattr(imported_schema['core'], 'MainTopLevelArray')
def test_inject_fields(imported_schema):
"""
Our root model should have the special fields we injected
"""
base = imported_schema['core'].ConfiguredBaseModel
assert 'hdf5_path' in base.model_fields
assert 'object_id' in base.model_fields
def test_linkml_meta(imported_schema):
"""
We should be able to store some linkml metadata with our classes
"""
meta = imported_schema['core'].LinkML_Meta
assert 'tree_root' in meta.model_fields
assert imported_schema['core'].MainTopLevel.linkml_meta.default.tree_root == True
assert imported_schema['core'].OtherClass.linkml_meta.default.tree_root == False
def test_skip(linkml_schema):
"""
We can skip slots and classes
"""
modules = generate_and_import(
linkml_schema, split=False,
generator_kwargs={
'SKIP_SLOTS': ('SkippableSlot',),
'SKIP_CLASSES': ('Skippable', 'skippable')
})
assert not hasattr(modules['core'], 'Skippable')
assert 'SkippableSlot' not in modules['core'].MainTopLevel.model_fields
def test_inline_with_identifier(imported_schema):
"""
By default, if a class has an identifier attribute, it is inlined
as a string rather than its class. We overrode that to be able to make dictionaries of collections
"""
main = imported_schema['core'].MainTopLevel
inline = main.model_fields['inline_dict'].annotation
assert typing.get_origin(typing.get_args(inline)[0]) == dict
# god i hate pythons typing interface
otherclass, stillanother = typing.get_args(typing.get_args(typing.get_args(inline)[0])[1])
assert otherclass is imported_schema['core'].OtherClass
assert stillanother is imported_schema['core'].StillAnotherClass
def test_namespace(imported_schema):
"""
Namespace schema import all classes from the other schema
Returns:
"""
ns = imported_schema['namespace']
for classname, modname in (
('MainThing', 'test_schema.imported'),
('Arraylike','test_schema.imported'),
('MainTopLevel','test_schema.core'),
('Skippable','test_schema.core'),
('OtherClass','test_schema.core'),
('StillAnotherClass', 'test_schema.core')
):
assert hasattr(ns, classname)
if imported_schema['split']:
assert getattr(ns, classname).__module__ == modname