From 2e87fa055677ae90a33e618a36e124f6b86131c1 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Fri, 8 Sep 2023 19:46:42 -0700 Subject: [PATCH] working generation from provider --- .../src/nwb_linkml/adapters/namespaces.py | 18 +- nwb_linkml/src/nwb_linkml/adapters/schema.py | 2 +- .../src/nwb_linkml/generators/pydantic.py | 13 +- nwb_linkml/src/nwb_linkml/maps/naming.py | 21 +- nwb_linkml/src/nwb_linkml/providers/schema.py | 223 +++++++++++------- nwb_linkml/tests/test_provider.py | 8 - nwb_linkml/tests/test_providers/__init__.py | 0 .../test_provider_git.py} | 0 .../test_providers/test_provider_schema.py | 17 ++ 9 files changed, 199 insertions(+), 103 deletions(-) delete mode 100644 nwb_linkml/tests/test_provider.py create mode 100644 nwb_linkml/tests/test_providers/__init__.py rename nwb_linkml/tests/{test_io/test_io_git.py => test_providers/test_provider_git.py} (100%) create mode 100644 nwb_linkml/tests/test_providers/test_provider_schema.py diff --git a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py index 96b81a2..18df045 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py +++ b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py @@ -13,6 +13,7 @@ from pprint import pformat from linkml_runtime.linkml_model import SchemaDefinition from linkml_runtime.dumpers import yaml_dumper from time import sleep +from copy import copy from nwb_schema_language import Namespaces @@ -76,7 +77,6 @@ class NamespacesAdapter(Adapter): sch_result += sch.build() if progress is not None: progress.update(sch.namespace, advance=1) - sleep(1) # recursive step if not skip_imports: @@ -84,12 +84,19 @@ class NamespacesAdapter(Adapter): imported_build = imported.build(progress=progress) sch_result += imported_build - # add in monkeypatch nwb types - sch_result.schemas.append(NwbLangSchema) # now generate the top-level namespaces that import everything for ns in self.namespaces.namespaces: + + # add in monkeypatch nwb types + nwb_lang = copy(NwbLangSchema) + lang_schema_name = '.'.join([ns.name, 'nwb.language']) + nwb_lang.name = lang_schema_name + sch_result.schemas.append(nwb_lang) + ns_schemas = [sch.name for sch in self.schemas if sch.namespace == ns.name] + ns_schemas.append(lang_schema_name) + # also add imports bc, well, we need them if not skip_imports: ns_schemas.extend([ns.name for imported in self.imported for ns in imported.namespaces.namespaces]) @@ -179,6 +186,11 @@ class NamespacesAdapter(Adapter): """ Populate the imports that are needed for each schema file + This function adds a string version of imported schema assuming the + generated schema will live in the same directory. If the path to + the imported schema needs to be adjusted, that should happen elsewhere + (eg in :class:`.LinkMLProvider`) because we shouldn't know about + directory structure or anything like that here. """ for sch in self.schemas: for needs in sch.needed_imports: diff --git a/nwb_linkml/src/nwb_linkml/adapters/schema.py b/nwb_linkml/src/nwb_linkml/adapters/schema.py index a13961d..b676ae4 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/schema.py +++ b/nwb_linkml/src/nwb_linkml/adapters/schema.py @@ -93,7 +93,7 @@ class SchemaAdapter(Adapter): types=res.types ) # every schema needs the language elements - sch.imports.append('nwb.language') + sch.imports.append('.'.join([self.namespace, 'nwb.language'])) return BuildResult(schemas=[sch]) def split_subclasses(self, classes: BuildResult) -> BuildResult: diff --git a/nwb_linkml/src/nwb_linkml/generators/pydantic.py b/nwb_linkml/src/nwb_linkml/generators/pydantic.py index bbefb37..f90d60d 100644 --- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py +++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py @@ -17,7 +17,7 @@ The `serialize` method """ import pdb -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import List, Dict, Set, Tuple, Optional, TypedDict import os, sys @@ -193,8 +193,8 @@ class NWBPydanticGenerator(PydanticGenerator): SKIP_CLASSES=('',) # SKIP_CLASSES=('VectorData','VectorIndex') split:bool=True - schema_map:Dict[str, SchemaDefinition]=None - versions:List[dict] = None + schema_map:Optional[Dict[str, SchemaDefinition]]=None + versions:dict = None """See :meth:`.LinkMLProvider.build` for usage - a list of specific versions to import from""" @@ -220,8 +220,8 @@ class NWBPydanticGenerator(PydanticGenerator): if module_name == self.schema.name: continue - if self.versions and module_name in [v['name'] for v in self.versions]: - version = version_module_case([v['version'] for v in self.versions if v['name'] == module_name][0]) + if self.versions and module_name in self.versions: + version = version_module_case(self.versions[module_name]) local_mod_name = '....' + module_case(module_name) + '.' + version + '.' + 'namespace' else: @@ -537,7 +537,8 @@ class NWBPydanticGenerator(PydanticGenerator): sv: SchemaView sv = self.schemaview - sv.schema_map = self.schema_map + if self.schema_map is not None: + sv.schema_map = self.schema_map schema = sv.schema pyschema = SchemaDefinition( id=schema.id, diff --git a/nwb_linkml/src/nwb_linkml/maps/naming.py b/nwb_linkml/src/nwb_linkml/maps/naming.py index 9757bce..c95f8ad 100644 --- a/nwb_linkml/src/nwb_linkml/maps/naming.py +++ b/nwb_linkml/src/nwb_linkml/maps/naming.py @@ -1,3 +1,4 @@ +import pdb import re from pathlib import Path @@ -44,9 +45,19 @@ def relative_path(target: Path, origin: Path): References: - https://stackoverflow.com/a/71874881 """ - try: - return Path(target).resolve().relative_to(Path(origin).resolve()) - except ValueError as e: # target does not start with origin - # recursion with origin (eventually origin is root so try will succeed) - return Path('..').joinpath(relative_path(target, Path(origin).parent)) + def _relative_path(target:Path, origin:Path): + try: + return Path(target).resolve().relative_to(Path(origin).resolve()) + except ValueError as e: # target does not start with origin + # recursion with origin (eventually origin is root so try will succeed) + return Path('..').joinpath(_relative_path(target, Path(origin).parent)) + + try: + successful = Path(target).resolve().relative_to(Path(origin).resolve()) + return successful + except ValueError as e: # target does not start with origin + # recursion with origin (eventually origin is root so try will succeed) + relative = Path('..').joinpath(_relative_path(target, Path(origin).parent)) + # remove the first '..' because this thing freaking double counts + return Path(*relative.parts[1:]) diff --git a/nwb_linkml/src/nwb_linkml/providers/schema.py b/nwb_linkml/src/nwb_linkml/providers/schema.py index 8f9ca01..9e51c43 100644 --- a/nwb_linkml/src/nwb_linkml/providers/schema.py +++ b/nwb_linkml/src/nwb_linkml/providers/schema.py @@ -14,11 +14,13 @@ Relationship to other modules: """ import pdb from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict +from types import ModuleType from pathlib import Path import os from abc import abstractmethod, ABC import warnings import importlib +import sys from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName from linkml_runtime.dumpers import yaml_dumper @@ -34,9 +36,6 @@ from nwb_linkml.generators.pydantic import NWBPydanticGenerator from nwb_linkml.providers.git import DEFAULT_REPOS from nwb_linkml.ui import AdapterProgress -class NamespaceVersion(TypedDict): - namespace: str - version: str P = TypeVar('P') @@ -112,14 +111,16 @@ class Provider(ABC): if version is not None: version_path = namespace_path / version_module_case(version) - version_path.mkdir(exist_ok=True, parents=True) + #version_path.mkdir(exist_ok=True, parents=True) else: # or find the most recently built one versions = sorted(namespace_path.iterdir(), key=os.path.getmtime) + versions = [v for v in versions if v.is_dir() and v.name not in ('__pycache__')] if len(versions) == 0: raise FileNotFoundError('No version provided, and no existing schema found') version_path = versions[-1] + return version_path @@ -180,20 +181,21 @@ class LinkMLProvider(Provider): def build( self, ns_adapter: adapters.NamespacesAdapter, - versions: Optional[List[NamespaceVersion]] = None, + versions: Optional[dict] = None, dump: bool = True, ) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]: """ Arguments: namespaces (:class:`.NamespacesAdapter`): Adapter (populated with any necessary imported namespaces) to build - versions (List[NamespaceVersion]): List of specific versions to use - for cross-namespace imports. If none is provided, use the most recent version + versions (dict): Dict of specific versions to use + for cross-namespace imports. as ``{'namespace': 'version'}`` + If none is provided, use the most recent version available. dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return """ - self._find_imports(ns_adapter, versions, populate=True) + #self._find_imports(ns_adapter, versions, populate=True) if self.verbose: progress = AdapterProgress(ns_adapter) #progress.start() @@ -212,16 +214,22 @@ class LinkMLProvider(Provider): build_result = {} namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()] + namespace_names = [sch.name for sch in namespace_sch] for ns_linkml in namespace_sch: version = ns_adapter.versions[ns_linkml.name] version_path = self.namespace_path(ns_linkml.name, version, allow_repo=False) + version_path.mkdir(exist_ok=True, parents=True) ns_file = version_path / 'namespace.yaml' + ns_linkml = self._fix_schema_imports(ns_linkml, ns_adapter, ns_file) yaml_dumper.dump(ns_linkml, ns_file) + # write the schemas for this namespace - ns_schema_names = [name.strip('.yaml') for name in ns_adapter.namespace_schemas(ns_linkml.name)] - other_schema = [sch for sch in built.schemas if sch.name in ns_schema_names] + other_schema = [sch for sch in built.schemas if sch.name.split('.')[0] == ns_linkml.name and sch not in namespace_sch] for sch in other_schema: output_file = version_path / (sch.name + '.yaml') + # fix the paths for intra-schema imports + sch = self._fix_schema_imports(sch, ns_adapter, output_file) + yaml_dumper.dump(sch, output_file) # make return result for just this namespace @@ -233,6 +241,20 @@ class LinkMLProvider(Provider): return build_result + def _fix_schema_imports(self, sch: SchemaDefinition, + ns_adapter: adapters.NamespacesAdapter, + output_file: Path) -> SchemaDefinition: + for animport in sch.imports: + if animport.split('.')[0] in ns_adapter.versions.keys(): + imported_path = self.namespace_path(animport.split('.')[0], ns_adapter.versions[animport.split('.')[0]]) / 'namespace' + rel_path = relative_path(imported_path, output_file) + if str(rel_path) == '.' or str(rel_path) == 'namespace': + # same directory, just keep the existing import + continue + idx = sch.imports.index(animport) + del sch.imports[idx] + sch.imports.insert(idx, str(rel_path)) + return sch def get(self, namespace: str, version: Optional[str] = None) -> SchemaView: """ Get a schema view over the namespace @@ -240,7 +262,9 @@ class LinkMLProvider(Provider): path = self.namespace_path(namespace, version) / 'namespace.yaml' if not path.exists(): path = self._find_source(namespace, version) - return SchemaView(path) + sv = SchemaView(path) + sv.path = path + return sv def _find_source(self, namespace:str, version: Optional[str] = None) -> Path: """Try and find the namespace if it exists in our default repository and build it!""" @@ -254,43 +278,42 @@ class LinkMLProvider(Provider): - - def _find_imports(self, - ns: adapters.NamespacesAdapter, - versions: Optional[List[NamespaceVersion]] = None, - populate: bool=True) -> Dict[str, List[str]]: - """ - Find relative paths to other linkml schema that need to be - imported, but lack an explicit source - - Arguments: - ns (:class:`.NamespacesAdapter`): Namespaces to find imports to - versions (List[:class:`.NamespaceVersion`]): Specific versions to import - populate (bool): If ``True`` (default), modify the namespace adapter to include the imports, - otherwise just return - - Returns: - dict of lists for relative paths to other schema namespaces - """ - import_paths = {} - for ns_name, needed_imports in ns.needed_imports.items(): - our_path = self.namespace_path(ns_name, ns.versions[ns_name]) / 'namespace.yaml' - import_paths[ns_name] = [] - for needed_import in needed_imports: - needed_version = None - if versions: - needed_versions = [v['version'] for v in versions if v['namespace'] == needed_import] - if len(needed_versions) > 0: - needed_version = needed_versions[0] - - version_path = self.namespace_path(needed_import, needed_version) / 'namespace.yaml' - import_paths[ns_name].append(str(relative_path(version_path, our_path))) - - if populate: - for sch in ns.schemas: - sch.imports.extend(import_paths) - - return import_paths + # + # def _find_imports(self, + # ns: adapters.NamespacesAdapter, + # versions: Optional[dict] = None, + # populate: bool=True) -> Dict[str, List[str]]: + # """ + # Find relative paths to other linkml schema that need to be + # imported, but lack an explicit source + # + # Arguments: + # ns (:class:`.NamespacesAdapter`): Namespaces to find imports to + # versions (dict): Specific versions to import + # populate (bool): If ``True`` (default), modify the namespace adapter to include the imports, + # otherwise just return + # + # Returns: + # dict of lists for relative paths to other schema namespaces + # """ + # import_paths = {} + # for ns_name, needed_imports in ns.needed_imports.items(): + # our_path = self.namespace_path(ns_name, ns.versions[ns_name], allow_repo=False) / 'namespace.yaml' + # import_paths[ns_name] = [] + # for needed_import in needed_imports: + # needed_version = None + # if versions: + # needed_version = versions.get(needed_import, None) + # + # version_path = self.namespace_path(needed_import, needed_version, allow_repo=False) / 'namespace.yaml' + # import_paths[ns_name].append(str(relative_path(version_path, our_path))) + # + # if populate: + # pdb.set_trace() + # for sch in ns.schemas: + # sch.imports.extend(import_paths[ns_name]) + # + # return import_paths class PydanticProvider(Provider): @@ -304,9 +327,24 @@ class PydanticProvider(Provider): self, namespace: str | Path, version: Optional[str] = None, - versions: Optional[List[NamespaceVersion]] = None, - dump: bool = True + versions: Optional[dict] = None, + dump: bool = True, + **kwargs ) -> str: + """ + + + Args: + namespace: + version: + versions: + dump: + **kwargs: Passed to :class:`.NWBPydanticGenerator` + + Returns: + + """ + if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')): # we're given a name of a namespace to build path = LinkMLProvider(path=self.config.cache_dir).namespace_path(namespace, version) / 'namespace.yaml' @@ -314,22 +352,67 @@ class PydanticProvider(Provider): # given a path to a namespace linkml yaml file path = Path(namespace) + default_kwargs = { + 'split': False, + 'emit_metadata': True, + 'gen_slots': True, + 'pydantic_version': '2' + } + default_kwargs.update(kwargs) + generator = NWBPydanticGenerator( str(path), - split=False, versions=versions, - emit_metadata=True, - gen_slots=True, - pydantic_version='2' + **default_kwargs ) serialized = generator.serialize() if dump: out_file = self.path / path.parts[-3] / path.parts[-2] / 'namespace.py' + out_file.parent.mkdir(parents=True,exist_ok=True) with open(out_file, 'w') as ofile: ofile.write(serialized) return serialized + @classmethod + def module_name(self, namespace:str, version:Optional[str]=None) -> str: + name_pieces = ['nwb_linkml', 'models', namespace] + if version is not None: + name_pieces.append(version_module_case(version)) + module_name = '.'.join(name_pieces) + return module_name + def import_module( + self, + namespace: str, + version: Optional[str] = None + ) -> ModuleType: + path = self.namespace_path(namespace, version) / 'namespace.py' + if not path.exists(): + raise ImportError(f'Module has not been built yet {path}') + module_name = self.module_name(namespace, version) + spec = importlib.util.spec_from_file_location(module_name, path) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + def get(self, namespace: str, version: Optional[str] = None) -> ModuleType: + module_name = self.module_name(namespace, version) + if module_name in sys.modules: + return sys.modules[module_name] + + try: + path = self.namespace_path(namespace, version) + except FileNotFoundError: + path = None + + if path is None or not path.exists(): + _ = self.build(namespace, version) + module = self.import_module(namespace, version) + return module + + + class SchemaProvider: """ @@ -342,30 +425,29 @@ class SchemaProvider: consistency. Store each generated schema in a directory structure indexed by - schema namespace name and a truncated hash of the loaded schema dictionaries - (not the hash of the .yaml file, since we are also provided schema in nwbfiles) + schema namespace name and version eg: cache_dir - linkml - nwb_core - - hash_532gn90f + - v0_2_0 - nwb.core.namespace.yaml - nwb.fore.file.yaml - ... - - hash_fuia082f + - v0_2_1 - nwb.core.namespace.yaml - ... - my_schema - - hash_t3tn908h + - v0_1_0 - ... - pydantic - nwb_core - - hash_532gn90f + - v0_2_0 - core.py - ... - - hash_fuia082f + - v0_2_1 - core.py - ... @@ -393,25 +475,6 @@ class SchemaProvider: - def generate_linkml( - self, - schemas:Dict[str, dict], - versions: Optional[List[NamespaceVersion]] = None - ): - """ - Generate linkml from loaded nwb schemas, either from yaml or from an - nwb file's ``/specifications`` group. - - Arguments: - schemas (dict): A dictionary of ``{'schema_name': {:schema_definition}}``. - The "namespace" schema should have the key ``namespace``, which is used - to infer version and schema name. Post-load maps should have already - been applied - versions (List[NamespaceVersion]): List of specific versions to use - for cross-namespace imports. If none is provided, use the most recent version - available. - """ - diff --git a/nwb_linkml/tests/test_provider.py b/nwb_linkml/tests/test_provider.py deleted file mode 100644 index e9d219a..0000000 --- a/nwb_linkml/tests/test_provider.py +++ /dev/null @@ -1,8 +0,0 @@ -import pytest - -from nwb_linkml.providers.schema import LinkMLProvider - -def test_linkml_provider(): - - provider = LinkMLProvider() - core = provider.get('core') \ No newline at end of file diff --git a/nwb_linkml/tests/test_providers/__init__.py b/nwb_linkml/tests/test_providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nwb_linkml/tests/test_io/test_io_git.py b/nwb_linkml/tests/test_providers/test_provider_git.py similarity index 100% rename from nwb_linkml/tests/test_io/test_io_git.py rename to nwb_linkml/tests/test_providers/test_provider_git.py diff --git a/nwb_linkml/tests/test_providers/test_provider_schema.py b/nwb_linkml/tests/test_providers/test_provider_schema.py new file mode 100644 index 0000000..8895539 --- /dev/null +++ b/nwb_linkml/tests/test_providers/test_provider_schema.py @@ -0,0 +1,17 @@ +import pdb + +import pytest + +from nwb_linkml.providers.schema import LinkMLProvider, PydanticProvider + +def test_linkml_provider(): + + provider = LinkMLProvider() + core = provider.get('core') + +@pytest.mark.depends(on=['test_linkml_provider']) +def test_pydantic_provider(): + provider = PydanticProvider() + + core = provider.get('core') +