diff --git a/README.md b/README.md index 819566a..d4c763a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # nwb-linkml [![Documentation Status](https://readthedocs.org/projects/nwb-linkml/badge/?version=latest)](https://nwb-linkml.readthedocs.io/en/latest/?badge=latest) - +[![Coverage Status](https://coveralls.io/repos/github/p2p-ld/nwb-linkml/badge.svg)](https://coveralls.io/github/p2p-ld/nwb-linkml) Translating NWB schema language to linkml diff --git a/nwb_linkml/src/nwb_linkml/generators/pydantic.py b/nwb_linkml/src/nwb_linkml/generators/pydantic.py index 4513178..cf66576 100644 --- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py +++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py @@ -15,6 +15,13 @@ The `serialize` method - Generates linkML Classes - `generate_enums` runs first +.. note:: + + This module is heinous. We have mostly copied and pasted the existing :class:`linkml.generators.PydanticGenerator` + and overridden what we need to make this work for NWB, but the source is... + a little messy. We will be tidying this up and trying to pull changes upstream, + but for now this is just our hacky little secret. + """ import pdb from dataclasses import dataclass, field @@ -218,11 +225,11 @@ class {{ c.name }} @dataclass class NWBPydanticGenerator(PydanticGenerator): - SKIP_ENUM=('FlatDType',) + SKIP_ENUM:Tuple[str]=('FlatDType',) # SKIP_SLOTS=('VectorData',) - SKIP_SLOTS=('',) - SKIP_CLASSES=('',) - INJECTED_FIELDS = ( + SKIP_SLOTS:Tuple[str]=('',) + SKIP_CLASSES:Tuple[str]=('',) + INJECTED_FIELDS:Tuple[str] = ( 'hdf5_path: Optional[str] = Field(None, description="The absolute path that this object is stored in an NWB file")', 'object_id: Optional[str] = Field(None, description="Unique UUID for each object")' ) @@ -231,7 +238,7 @@ class NWBPydanticGenerator(PydanticGenerator): schema_map:Optional[Dict[str, SchemaDefinition]]=None versions:dict = None """See :meth:`.LinkMLProvider.build` for usage - a list of specific versions to import from""" - + pydantic_version = "2" def _locate_imports( self, @@ -274,13 +281,12 @@ class NWBPydanticGenerator(PydanticGenerator): def _get_namespace_imports(self, sv:SchemaView) -> Dict[str, List[str]]: """ Get imports for namespace packages. For these we import all - the tree_root classes, ie. all the classes that are top-level classes rather than + the tree_root classes, ie. all the classes that are top-level classes rather than nested classes """ all_classes = sv.all_classes(imports=True) needed_classes = [] for clsname, cls in all_classes.items(): - #if cls.tree_root: if cls.is_a != 'Arraylike': needed_classes.append(clsname) @@ -764,12 +770,12 @@ class NWBPydanticGenerator(PydanticGenerator): ) return code - def compile_module(self, module_path:Path=None, **kwargs) -> ModuleType: # pragma: no cover - replaced with provider + def compile_module(self, module_path:Path=None, module_name:str='test') -> ModuleType: # pragma: no cover - replaced with provider """ Compiles generated python code to a module :return: """ - pycode = self.serialize(**kwargs) + pycode = self.serialize() if module_path is not None: module_path = Path(module_path) init_file = module_path / '__init__.py' @@ -777,11 +783,11 @@ class NWBPydanticGenerator(PydanticGenerator): ifile.write(' ') try: - return compile_python(pycode, module_path) + return compile_python(pycode, module_path, module_name) except NameError as e: raise e -def compile_python(text_or_fn: str, package_path: Path = None) -> ModuleType: # pragma: no cover - replaced with provider +def compile_python(text_or_fn: str, package_path: Path = None, module_name:str='test') -> ModuleType: # pragma: no cover - replaced with provider """ Compile the text or file and return the resulting module @param text_or_fn: Python text or file name that references python file @@ -793,8 +799,9 @@ def compile_python(text_or_fn: str, package_path: Path = None) -> ModuleType: # if package_path is None and python_txt != text_or_fn: package_path = Path(text_or_fn) spec = compile(python_txt, '', 'exec') - module = ModuleType('test') + module = ModuleType(module_name) exec(spec, module.__dict__) + sys.modules[module_name] = module return module diff --git a/nwb_linkml/tests/fixtures.py b/nwb_linkml/tests/fixtures.py index ca5e605..68be6e2 100644 --- a/nwb_linkml/tests/fixtures.py +++ b/nwb_linkml/tests/fixtures.py @@ -1,8 +1,12 @@ import pytest import os +from typing import NamedTuple + +from linkml_runtime.dumpers import yaml_dumper from nwb_linkml.io import schema as io from nwb_linkml.adapters.namespaces import NamespacesAdapter +from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition, Prefix, TypeDefinition import shutil from pathlib import Path @@ -15,6 +19,30 @@ def tmp_output_dir() -> Path: return path +@pytest.fixture(scope="function") +def tmp_output_dir_func(tmp_output_dir) -> Path: + """ + tmp output dir that gets cleared between every function + cleans at the start rather than at cleanup in case the output is to be inspected + """ + subpath = tmp_output_dir / '__tmp__' + if subpath.exists(): + shutil.rmtree(str(subpath)) + subpath.mkdir() + return subpath + +@pytest.fixture(scope="module") +def tmp_output_dir_mod(tmp_output_dir) -> Path: + """ + tmp output dir that gets cleared between every function + cleans at the start rather than at cleanup in case the output is to be inspected + """ + subpath = tmp_output_dir / '__tmp__' + if subpath.exists(): + shutil.rmtree(str(subpath)) + subpath.mkdir() + return subpath + @pytest.fixture(autouse=True, scope='session') def set_config_vars(tmp_output_dir): os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir) @@ -32,3 +60,183 @@ def nwb_core_fixture() -> NamespacesAdapter: def data_dir() -> Path: path = Path(__file__).parent.resolve() / 'data' return path + +class TestSchemas(NamedTuple): + core: SchemaDefinition + core_path: Path + imported: SchemaDefinition + imported_path: Path + namespace: SchemaDefinition + namespace_path: Path + +@pytest.fixture(scope="module") +def linkml_schema(tmp_output_dir_mod) -> TestSchemas: + """ + A test schema that includes + + - Two schemas, one importing from the other + - Arraylike + - Required/static "name" field + - linkml metadata like tree_root + - skipping classes + """ + test_schema_path = tmp_output_dir_mod / 'test_schema' + test_schema_path.mkdir() + + core_path = test_schema_path / 'core.yaml' + imported_path = test_schema_path / 'imported.yaml' + namespace_path = test_schema_path / 'namespace.yaml' + + schema = TestSchemas( + core_path=core_path, + imported_path=imported_path, + namespace_path=namespace_path, + core=SchemaDefinition( + name="core", + id="core", + version="1.0.1", + imports=["imported",'linkml:types'], + default_prefix="core", + prefixes={'linkml': Prefix('linkml','https://w3id.org/linkml')}, + description="Test core schema", + classes=[ + ClassDefinition( + name="MainTopLevel", + description="The main class we are testing!", + is_a="MainThing", + tree_root=True, + attributes=[ + SlotDefinition( + name="name", + description="A fixed property that should use Literal and be frozen", + range="string", + required=True, + ifabsent="string(toplevel)", + equals_string="toplevel", + identifier=True + ), + SlotDefinition( + name="array", + range="MainTopLevel__Array" + ), + SlotDefinition( + name="SkippableSlot", + description="A slot that was meant to be skipped!" + ), + SlotDefinition( + name="inline_dict", + description="This should be inlined as a dictionary despite this class having an identifier", + multivalued=True, + inlined=True, + inlined_as_list=False, + any_of=[{'range': 'OtherClass'}, {'range': 'StillAnotherClass'} ] + ) + ] + ), + ClassDefinition( + name="MainTopLevel__Array", + description="Main class's array", + is_a="Arraylike", + attributes=[ + SlotDefinition( + name="x", + range="numeric", + required=True + ), + SlotDefinition( + name="y", + range="numeric", + required=True + ), + SlotDefinition( + name="z", + range="numeric", + required=False, + maximum_cardinality=3, + minimum_cardinality=3 + ), + SlotDefinition( + name="a", + range="numeric", + required=False, + minimum_cardinality=4, + maximum_cardinality=4 + ) + ] + ), + ClassDefinition( + name="skippable", + description="A class that lives to be skipped!", + + ), + ClassDefinition( + name="OtherClass", + description="Another class yno!", + attributes=[ + SlotDefinition( + name="name", + range="string", + required=True, + identifier=True + ) + ] + ), + ClassDefinition( + name="StillAnotherClass", + description="And yet another!", + attributes=[ + SlotDefinition( + name="name", + range="string", + required=True, + identifier=True + ) + ] + ) + ], + types=[ + TypeDefinition( + name="numeric", + typeof="float" + ) + ] + ), + imported=SchemaDefinition( + name="imported", + id="imported", + version="1.4.5", + default_prefix="core", + imports=['linkml:types'], + prefixes = {'linkml': Prefix('linkml', 'https://w3id.org/linkml')}, + classes = [ + ClassDefinition( + name="MainThing", + description="Class imported by our main thing class!", + attributes=[ + SlotDefinition( + name="meta_slot", + range="string" + ) + ] + ), + ClassDefinition( + name="Arraylike", + abstract=True + ) + ] + ), + namespace=SchemaDefinition( + name="namespace", + id="namespace", + version="1.1.1", + default_prefix="namespace", + annotations={'namespace': {'tag': 'namespace', 'value': 'True'}}, + description="A namespace package that should import all other classes", + imports=['core', 'imported'] + ) + ) + yaml_dumper.dump(schema.core, schema.core_path) + yaml_dumper.dump(schema.imported, schema.imported_path) + yaml_dumper.dump(schema.namespace, schema.namespace_path) + return schema + diff --git a/nwb_linkml/tests/test_generate.py b/nwb_linkml/tests/test_generate.py index d950291..820039e 100644 --- a/nwb_linkml/tests/test_generate.py +++ b/nwb_linkml/tests/test_generate.py @@ -1,3 +1,11 @@ +""" +Placeholder end-to-end tests for generating linkml translations and pydantic models. + +Should be replaced with more specific unit and integration tests, but in place for now +to ensure that the basics of the whole thing operate -- not doing any actual data validation +here. +""" + import pdb from pathlib import Path from typing import Dict diff --git a/nwb_linkml/tests/test_generators/__init__.py b/nwb_linkml/tests/test_generators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nwb_linkml/tests/test_generators/test_generator_pydantic.py b/nwb_linkml/tests/test_generators/test_generator_pydantic.py new file mode 100644 index 0000000..58af9e3 --- /dev/null +++ b/nwb_linkml/tests/test_generators/test_generator_pydantic.py @@ -0,0 +1,270 @@ +""" +Test custom features of the pydantic generator + +Note that since this is largely a subclass, we don't test all of the functionality of the generator +because it's tested in the base linkml package. +""" +import pdb +import sys +import typing + +import pytest +from typing import TypedDict, Optional +from types import ModuleType +import re +import numpy as np + +from pydantic import BaseModel + +from ..fixtures import tmp_output_dir, tmp_output_dir_mod, linkml_schema, TestSchemas + +from nwb_linkml.generators.pydantic import NWBPydanticGenerator, compile_python +from nwb_linkml.types.ndarray import NDArrayMeta + +class TestModules(TypedDict): + core: ModuleType + imported: ModuleType + namespace: ModuleType + split: bool + + +def generate_and_import(linkml_schema:TestSchemas, split:bool, generator_kwargs:Optional[dict]=None) -> TestModules: + if generator_kwargs is None: + generator_kwargs = {} + default_kwargs = { + 'split': split, + 'emit_metadata': True, + 'gen_slots': True, + 'pydantic_version': '2', + **generator_kwargs + } + + core_str = NWBPydanticGenerator( + str(linkml_schema.core_path), + **default_kwargs + ).serialize() + imported_str = NWBPydanticGenerator( + str(linkml_schema.imported_path), + **default_kwargs + ).serialize() + namespace_str = NWBPydanticGenerator( + str(linkml_schema.namespace_path), + **default_kwargs + ).serialize() + + with open(linkml_schema.core_path.with_suffix('.py'), 'w') as pfile: + pfile.write(core_str) + with open(linkml_schema.imported_path.with_suffix('.py'), 'w') as pfile: + pfile.write(imported_str) + with open(linkml_schema.namespace_path.with_suffix('.py'), 'w') as pfile: + pfile.write(namespace_str) + with open(linkml_schema.core_path.parent / '__init__.py', 'w') as pfile: + pfile.write(' ') + + sys.path.append(str(linkml_schema.core_path.parents[1])) + + core = compile_python(str(linkml_schema.core_path.with_suffix('.py')), module_name='test_schema.core') + imported = compile_python(str(linkml_schema.imported_path.with_suffix('.py')), module_name='test_schema.imported') + namespace = compile_python(str(linkml_schema.namespace_path.with_suffix('.py')), + module_name='test_schema.namespace') + + return TestModules( + core=core, + imported=imported, + namespace=namespace, + split=split + ) + +@pytest.fixture(scope="module", params=['split', 'unsplit']) +def imported_schema(linkml_schema, request) -> TestModules: + """ + Convenience fixture for testing non-core generator features without needing to re-generate and + import every time. + """ + if request.param == 'split': + split = True + else: + split = False + + yield generate_and_import(linkml_schema, split) + + del sys.modules['test_schema.core'] + del sys.modules['test_schema.imported'] + del sys.modules['test_schema.namespace'] + + + + + +def _model_correctness(modules:TestModules): + """ + Shared assertions for model correctness. + Only tests very basic things like type and existence, + more specific tests are in their own test functions! + """ + assert issubclass(modules['core'].MainTopLevel, BaseModel) + assert issubclass(modules['core'].Skippable, BaseModel) + assert issubclass(modules['core'].OtherClass, BaseModel) + assert issubclass(modules['core'].StillAnotherClass, BaseModel) + assert issubclass(modules['imported'].MainThing, BaseModel) + + +def test_generate(linkml_schema): + """ + Base case, we can generate pydantic models from linkml schema + + Tests basic functionality of serializer including + + - serialization + - compilation (loading as a python model) + - existence and correctness of attributes + """ + modules = generate_and_import(linkml_schema, split=False) + + assert isinstance(modules['core'], ModuleType) + assert isinstance(modules['imported'], ModuleType) + assert isinstance(modules['namespace'], ModuleType) + _model_correctness(modules) + + # unsplit modules should have all the classes present, even if they aren't defined in it + assert modules['core'].MainThing.__module__ == 'test_schema.core' + assert issubclass(modules['core'].MainTopLevel, modules['core'].MainThing) + del sys.modules['test_schema.core'] + del sys.modules['test_schema.imported'] + del sys.modules['test_schema.namespace'] + + +def test_generate_split(linkml_schema): + """ + We can generate schema split into separate files + """ + modules = generate_and_import(linkml_schema, split=True) + + assert isinstance(modules['core'], ModuleType) + assert isinstance(modules['imported'], ModuleType) + assert isinstance(modules['namespace'], ModuleType) + _model_correctness(modules) + + # split modules have classes defined once and imported + assert modules['core'].MainThing.__module__ == 'test_schema.imported' + # can't assert subclass here because of the weird way relative imports work + # when we don't actually import using normal python import machinery + assert modules['core'].MainTopLevel.__mro__[1].__module__ == 'test_schema.imported' + del sys.modules['test_schema.core'] + del sys.modules['test_schema.imported'] + del sys.modules['test_schema.namespace'] + +def test_versions(linkml_schema): + """ + We can use explicit versions that import from relative paths generated by + SchemaProvider + """ + # here all we do is check that we have the correct relative import, since we test + # the actual generation of these path structures elsewhere in the provider tests + + core_str = NWBPydanticGenerator( + str(linkml_schema.core_path), + versions={'imported': 'v4.2.0'} + ).serialize() + + # the import should be like + # from ...imported.v4_2_0.imported import ( + # MainThing + # ) + match = re.findall(r'from \.\.\.imported\.v4_2_0.*?MainThing.*?\)', core_str, flags=re.DOTALL) + assert len(match) == 1 + + +def test_arraylike(imported_schema): + """ + Arraylike classes are converted to slots that specify nptyping arrays + + array: Optional[Union[ + NDArray[Shape["* x, * y"], Number], + NDArray[Shape["* x, * y, 3 z"], Number], + NDArray[Shape["* x, * y, 3 z, 4 a"], Number] + ]] = Field(None) + """ + # check that we have gotten an NDArray annotation and its shape is correct + array = imported_schema['core'].MainTopLevel.model_fields['array'].annotation + args = typing.get_args(array) + for i, shape in enumerate(('* x, * y', '* x, * y, 3 z', '* x, * y, 3 z, 4 a')): + assert isinstance(args[i], NDArrayMeta) + assert args[i].__args__[0].__args__ + assert args[i].__args__[1] == np.number + + # we shouldn't have an actual class for the array + assert not hasattr(imported_schema['core'], 'MainTopLevel__Array') + assert not hasattr(imported_schema['core'], 'MainTopLevelArray') + + +def test_inject_fields(imported_schema): + """ + Our root model should have the special fields we injected + """ + base = imported_schema['core'].ConfiguredBaseModel + assert 'hdf5_path' in base.model_fields + assert 'object_id' in base.model_fields + + + +def test_linkml_meta(imported_schema): + """ + We should be able to store some linkml metadata with our classes + """ + meta = imported_schema['core'].LinkML_Meta + assert 'tree_root' in meta.model_fields + assert imported_schema['core'].MainTopLevel.linkml_meta.default.tree_root == True + assert imported_schema['core'].OtherClass.linkml_meta.default.tree_root == False + + + +def test_skip(linkml_schema): + """ + We can skip slots and classes + """ + modules = generate_and_import( + linkml_schema, split=False, + generator_kwargs={ + 'SKIP_SLOTS': ('SkippableSlot',), + 'SKIP_CLASSES': ('Skippable', 'skippable') + }) + assert not hasattr(modules['core'], 'Skippable') + assert 'SkippableSlot' not in modules['core'].MainTopLevel.model_fields + + + +def test_inline_with_identifier(imported_schema): + """ + By default, if a class has an identifier attribute, it is inlined + as a string rather than its class. We overrode that to be able to make dictionaries of collections + """ + main = imported_schema['core'].MainTopLevel + inline = main.model_fields['inline_dict'].annotation + assert typing.get_origin(typing.get_args(inline)[0]) == dict + # god i hate pythons typing interface + otherclass, stillanother = typing.get_args(typing.get_args(typing.get_args(inline)[0])[1]) + assert otherclass is imported_schema['core'].OtherClass + assert stillanother is imported_schema['core'].StillAnotherClass + + + +def test_namespace(imported_schema): + """ + Namespace schema import all classes from the other schema + Returns: + + """ + ns = imported_schema['namespace'] + + for classname, modname in ( + ('MainThing', 'test_schema.imported'), + ('Arraylike','test_schema.imported'), + ('MainTopLevel','test_schema.core'), + ('Skippable','test_schema.core'), + ('OtherClass','test_schema.core'), + ('StillAnotherClass', 'test_schema.core') + ): + assert hasattr(ns, classname) + if imported_schema['split']: + assert getattr(ns, classname).__module__ == modname