diff --git a/docs/api/nwb_linkml/io/git.md b/docs/api/nwb_linkml/providers/git.md similarity index 55% rename from docs/api/nwb_linkml/io/git.md rename to docs/api/nwb_linkml/providers/git.md index 26486d5..d1fbd97 100644 --- a/docs/api/nwb_linkml/io/git.md +++ b/docs/api/nwb_linkml/providers/git.md @@ -1,7 +1,7 @@ # Git ```{eval-rst} -.. automodule:: nwb_linkml.io.git +.. automodule:: nwb_linkml.providers.git :members: :undoc-members: ``` \ No newline at end of file diff --git a/docs/api/nwb_linkml/providers/schema.md b/docs/api/nwb_linkml/providers/schema.md new file mode 100644 index 0000000..7d348a2 --- /dev/null +++ b/docs/api/nwb_linkml/providers/schema.md @@ -0,0 +1,7 @@ +# Schema + +```{eval-rst} +.. automodule:: nwb_linkml.providers.schema + :members: + :undoc-members: +``` \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index bab5698..c56983e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ intersphinx_mapping = { 'matplotlib': ('https://matplotlib.org/stable/', None), 'numpy': ('https://numpy.org/doc/stable/', None), 'pandas': ('https://pandas.pydata.org/docs/', None), + 'pydantic': ('https://docs.pydantic.dev/latest/', None) } diff --git a/nwb_linkml/src/nwb_linkml/lang_elements.py b/nwb_linkml/src/nwb_linkml/lang_elements.py index 8b85be4..55dfa70 100644 --- a/nwb_linkml/src/nwb_linkml/lang_elements.py +++ b/nwb_linkml/src/nwb_linkml/lang_elements.py @@ -22,6 +22,11 @@ FlatDType = EnumDefinition( DTypeTypes = [] for nwbtype, linkmltype in flat_to_linkml.items(): + # skip the dtypes that are the same as the builtin linkml types (which should alredy exist) + # to avoid a recursion error + if linkmltype == nwbtype: + continue + amin = None if nwbtype.startswith('uint'): amin = 0 diff --git a/nwb_linkml/src/nwb_linkml/providers/git.py b/nwb_linkml/src/nwb_linkml/providers/git.py index bc96ad2..4a5d9b5 100644 --- a/nwb_linkml/src/nwb_linkml/providers/git.py +++ b/nwb_linkml/src/nwb_linkml/providers/git.py @@ -2,7 +2,7 @@ Define and manage NWB namespaces in external repositories """ import pdb -from typing import Optional, Dict +from typing import Optional, Dict, List import warnings from pathlib import Path import tempfile @@ -18,6 +18,10 @@ class NamespaceRepo(BaseModel): name: str = Field(description="Short name used to refer to this namespace (usually equivalent to the name field within a namespaces NWB list)") repository: HttpUrl | DirectoryPath = Field(description="URL or local absolute path to the root repository") path: Path = Field(description="Relative path from the repository root to the namespace file") + versions: List[str] = Field( + description="Known versions for this namespace repository, correspond to commit hashes or git tags that can be checked out by :class:`.GitRepo`", + default_factory=list + ) def provide_from_git(self, commit:str|None=None) -> Path: git = GitRepo(self, commit) @@ -28,13 +32,15 @@ class NamespaceRepo(BaseModel): NWB_CORE_REPO = NamespaceRepo( name="core", repository="https://github.com/NeurodataWithoutBorders/nwb-schema", - path=Path("core/nwb.namespace.yaml") + path=Path("core/nwb.namespace.yaml"), + versions=["2.0.1", "2.1.0", "2.2.0", "2.2.1", "2.2.2", "2.2.3", "2.2.4", "2.2.5", "2.3.0", "2.4.0", "2.5.0", "2.6.0"] ) HDMF_COMMON_REPO = NamespaceRepo( name="hdmf-common", repository="https://github.com/hdmf-dev/hdmf-common-schema", - path=Path("common/namespace.yaml") + path=Path("common/namespace.yaml"), + versions=["1.1.0", "1.1.1", "1.1.2", "1.1.3", "1.2.0", "1.2.1", "1.3.0", "1.4.0", "1.5.0", "1.5.1", "1.6.0", "1.7.0", "1.8.0"] ) DEFAULT_REPOS = { @@ -104,7 +110,7 @@ class GitRepo: """ The intended commit to check out. - If ``None``, use ``HEAD`` + If ``None``: if :attr:`NamespaceRepo.versions`, use the last version. Otherwise use ``HEAD`` Should match :prop:`.active_commit`, differs semantically in that it is used to set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out @@ -113,12 +119,80 @@ class GitRepo: @commit.setter def commit(self, commit:str|None): + # first get out of a potential detached head state + # that would cause a call to "HEAD" to fail in unexpected ways + if self.detached_head: + self._git_call('checkout', self.default_branch) + if commit is None: - self._git_call('checkout', "HEAD") + if len(self.namespace.versions) > 0: + self._git_call('checkout', self.namespace.versions[-1]) + else: + self._git_call('checkout', "HEAD") else: self._git_call('checkout', commit) self._commit = commit + @property + def tag(self) -> str: + """ + Get/set the currently checked out repo tag. + + Returns: + str: the result of ``git describe --tags``, which is + equal to the tag if it is checked out, otherwise it is the tag + plus some number of revisions and the short hash. + + Examples: + + >>> repo = GitRepo(NWB_CORE_REPO) + >>> repo.clone() + >>> # Check out a tag specifically + >>> repo.tag = "2.6.0" + >>> repo.tag + "2.6.0" + >>> # Now check out a commit some number after the tag. + >>> repo.commit = "gec0a879" + >>> repo.tag + "2.6.0-5-gec0a879" + + """ + res = self._git_call('describe', '--tags') + return res.stdout.decode('utf-8').strip() + + @tag.setter + def tag(self, tag:str): + # first check that we have the most recent tags + self._git_call('fetch', '--all', '--tags') + self._git_call('checkout', f'tags/{tag}') + # error will be raised by _git_call if tag not found + + @property + def default_branch(self) -> str: + """ + Default branch as configured for this repository + + Gotten from ``git symbolic-ref`` + """ + res = self._git_call('symbolic-ref', 'refs/remotes/origin/HEAD') + return res.stdout.decode('utf-8').strip().split('/')[-1] + + @property + def detached_head(self) -> bool: + """ + Detect if repo is in detached HEAD state that might need to be undone before + checking out eg. a HEAD commit. + + Returns: + bool: ``True`` if in detached head mode, ``False`` otherwise + """ + res = self._git_call('branch', '--show-current') + branch = res.stdout.decode('utf-8').strip() + if not branch: + return True + else: + return False + def check(self) -> bool: """ Check if the repository is already cloned and checked out @@ -177,15 +251,16 @@ class GitRepo: warnings.warn('Destination directory is not empty and does not pass checks for correctness! cleaning up') self.cleanup() else: - # already have it + # already have it, just ensure commit and return + + self.commit = self.commit return elif self.temp_directory.exists(): # exists but empty self.cleanup() res = subprocess.run(['git', 'clone', str(self.namespace.repository), str(self.temp_directory)]) - if self.commit: - self.commit = self.commit + self.commit = self.commit if res.returncode != 0: raise GitError(f'Could not clone repository:\n{res.stderr}') diff --git a/nwb_linkml/src/nwb_linkml/providers/schema.py b/nwb_linkml/src/nwb_linkml/providers/schema.py index 9e51c43..16c2f84 100644 --- a/nwb_linkml/src/nwb_linkml/providers/schema.py +++ b/nwb_linkml/src/nwb_linkml/providers/schema.py @@ -22,6 +22,8 @@ import warnings import importlib import sys +from pydantic import BaseModel + from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName from linkml_runtime.dumpers import yaml_dumper from linkml_runtime import SchemaView @@ -42,6 +44,18 @@ P = TypeVar('P') class Provider(ABC): """ Metaclass for different kind of providers! + + Args: + path (:class:`pathlib.Path`): Override the temporary directory configured by + the environment-wide :class:`.Config` object as the base directory that the + subclasses provide to. + verbose (bool): If ``True``, print things like progress bars to stdout :) + + Attributes: + config (:class:`.Config`): Configuration for the directories used by this + provider, unless overridden by ``path`` + cache_dir (:class:`pathlib.Path`): The main cache directory under which the other + providers will store the things they provide """ PROVIDES: str PROVIDES_CLASS: P = None @@ -125,12 +139,54 @@ class Provider(ABC): class LinkMLSchemaBuild(TypedDict): + """Build result from :meth:`.LinkMLProvider.build`""" result: BuildResult version: str namespace: Path class LinkMLProvider(Provider): + """ + Provider for conversions from nwb schema language to linkML. + + By default, generate and manage a nest of temporary cache directories + (as configured by :class:`.Config`) for each version of a given namespace. + + Like other :class:`.Provider` classes, this model is not a singleton but + behaves a bit like one in that when instantiated without arguments + it is stateless (except for configuration by environment-level variables). + So we don't use ``@classmethod``s here, but instantiating the class should remain + cheap. + + Namespaces can be built from: + + * namespace .yaml files: :meth:`.build_from_yaml` + * dictionaries, as are usually packaged in nwb files: :meth:`.build_from_dicts` + + All of which feed into... + * :class:`~.adapters.NamespacesAdapter` used throughout the rest of ``nwb_linkml`` - :meth:`.build` + + After a namespace is built, it can be accessed using :meth:`.LinkMLProvider.get`, which + can also be consumed by other providers, so a given namespace and version should only need + to be built once. + + Note: + At the moment there is no checking (eg. by comparing hashes) of different sources that + purport to be a given version of a namespace. When ambiguous, the class prefers to + build sets of namespaces together and use the most recently built ones since there is no + formal system for linking versions of namespaced schemas in nwb schema language. + + Examples: + + >>> provider = LinkMLProvider() + >>> # Simplest case, get the core nwb schema from the default NWB core repo + >>> core = provider.get('core') + >>> # Get a specific version of the core schema + >>> core_other_version = provider.get('core', '2.2.0') + >>> # Build a custom schema and then get it + >>> # provider.build_from_yaml('myschema.yaml') + >>> # my_schema = provider.get('myschema') + """ PROVIDES = 'linkml' PROVIDES_CLASS = SchemaDefinition @@ -203,13 +259,8 @@ class LinkMLProvider(Provider): built = ns_adapter.build(progress=progress) else: progress = None - built = ns_adapter.build() - - # if progress is not None: - # progress.stop() - # write schemas to yaml files build_result = {} @@ -257,7 +308,15 @@ class LinkMLProvider(Provider): return sch def get(self, namespace: str, version: Optional[str] = None) -> SchemaView: """ - Get a schema view over the namespace + Get a schema view over the namespace. + + If a matching path for the namespace and version exists in the :attr:`.path`, + then return the SchemaView over that namespace. + + Otherwise, try and find a source using our :data:`.providers.git.DEFAULT_REPOS`. + + If none is found, then you need to build and cache the (probably custom) schema first with + :meth:`.build` """ path = self.namespace_path(namespace, version) / 'namespace.yaml' if not path.exists(): @@ -276,47 +335,12 @@ class LinkMLProvider(Provider): return res[namespace]['namespace'] - - - # - # def _find_imports(self, - # ns: adapters.NamespacesAdapter, - # versions: Optional[dict] = None, - # populate: bool=True) -> Dict[str, List[str]]: - # """ - # Find relative paths to other linkml schema that need to be - # imported, but lack an explicit source - # - # Arguments: - # ns (:class:`.NamespacesAdapter`): Namespaces to find imports to - # versions (dict): Specific versions to import - # populate (bool): If ``True`` (default), modify the namespace adapter to include the imports, - # otherwise just return - # - # Returns: - # dict of lists for relative paths to other schema namespaces - # """ - # import_paths = {} - # for ns_name, needed_imports in ns.needed_imports.items(): - # our_path = self.namespace_path(ns_name, ns.versions[ns_name], allow_repo=False) / 'namespace.yaml' - # import_paths[ns_name] = [] - # for needed_import in needed_imports: - # needed_version = None - # if versions: - # needed_version = versions.get(needed_import, None) - # - # version_path = self.namespace_path(needed_import, needed_version, allow_repo=False) / 'namespace.yaml' - # import_paths[ns_name].append(str(relative_path(version_path, our_path))) - # - # if populate: - # pdb.set_trace() - # for sch in ns.schemas: - # sch.imports.extend(import_paths[ns_name]) - # - # return import_paths - - class PydanticProvider(Provider): + """ + Provider for pydantic models built from linkml-style nwb schema (ie. as provided by :class:`.LinkMLProvider`) + + + """ PROVIDES = 'pydantic' @property @@ -335,14 +359,22 @@ class PydanticProvider(Provider): Args: - namespace: - version: - versions: - dump: + namespace (Union[str, :class:`pathlib.Path`]): If a string, use a + :class:`.LinkMLProvider` to get the converted schema. If a path, + assume we have been given an explicit ``namespace.yaml`` from a converted + NWB -> LinkML schema to load from. + version (Optional[str]): The version of the schema to build, if present. + Works similarly to ``version`` in :class:`.LinkMLProvider` + versions (Optional[dict]): An explicit mapping of namespaces and versions to use when + building the combined pydantic `namespace.py` file. Since NWB doesn't have an explicit + version dependency system between schema, there is intrinsic ambiguity between which version + of which schema should be used when imported from another. This mapping allows those ambiguities to be resolved. + See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information. + dump (bool): If ``True`` (default), dump the model to the cache, otherwise just return the serialized string of built pydantic model **kwargs: Passed to :class:`.NWBPydanticGenerator` Returns: - + str: The built model file as returned from :meth:`.NWBPydanticGenerator.serialize` """ if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')): @@ -386,6 +418,20 @@ class PydanticProvider(Provider): namespace: str, version: Optional[str] = None ) -> ModuleType: + """ + Import a module within the temporary directory from its namespace and version + + In most cases, you're looking for :meth:`.PydanticProvider.get`, this method is + made available in case you don't want to accidentally build something + or invoke the rest of the provisioning system. + + Args: + namespace (str): Name of namespace + version (Optional[str]): Version to import, if None, try and get the most recently built version. + + Returns: + :class:`types.ModuleType` + """ path = self.namespace_path(namespace, version) / 'namespace.py' if not path.exists(): raise ImportError(f'Module has not been built yet {path}') @@ -397,6 +443,36 @@ class PydanticProvider(Provider): return module def get(self, namespace: str, version: Optional[str] = None) -> ModuleType: + """ + Get the imported module for a given namespace and version. + + A given namespace will be stored in :data:`sys.modules` as ``nwb_linkml.models.{namespace}``, + so first check if there is any already-imported module, and return that if so. + + Then we check in the temporary directory for an already-built ``namespace.py`` file + + Otherwise we pass arguments to :meth:`.PydanticProvider.build` and attempt to build them + before returning. + + Notes: + The imported modules shadow the "actual" + ``nwb_linkml.models`` module as would be imported from the usual location within the package directory. + This is intentional, as models can then be used as if they were integrated parts of the package, + and also so the active version of a namespace can be cleanly accessed + (ie. without ``from nwb_linkml.models.core import v2_2_0 as core`` ). + Accordingly, we assume that people will only be using a single version of NWB in a given + Python session. + + + Args: + namespace (str): Name of namespace to import. Must have either been previously built with :meth:`.PydanticProvider.build` or + a matching namespace/version combo must be available to the :class:`.LinkMLProvider` + version (Optional[str]): Version to import. If ``None``, get the most recently build module + + Returns: + The imported :class:`types.ModuleType` object that has all the built classes at the root level. + + """ module_name = self.module_name(namespace, version) if module_name in sys.modules: return sys.modules[module_name] @@ -411,6 +487,23 @@ class PydanticProvider(Provider): module = self.import_module(namespace, version) return module + def get_class(self, namespace: str, class_: str, version: Optional[str] = None) -> BaseModel: + """ + Get a class from a given namespace and version! + + Args: + namespace (str): Name of a namespace that has been previously built and cached, otherwise + we will attempt to build it from the :data:`.providers.git.DEFAULT_REPOS` + class_ (str): Name of class to retrieve + version (Optional[str]): Optional version of the schema to retrieve from + + Returns: + :class:`pydantic.BaseModel` + """ + mod = self.get(namespace, version) + return getattr(mod, class_) + + @@ -433,11 +526,11 @@ class SchemaProvider: - linkml - nwb_core - v0_2_0 - - nwb.core.namespace.yaml - - nwb.fore.file.yaml + - namespace.yaml + - nwb.core.file.yaml - ... - v0_2_1 - - nwb.core.namespace.yaml + - namespace.yaml - ... - my_schema - v0_1_0 @@ -445,10 +538,10 @@ class SchemaProvider: - pydantic - nwb_core - v0_2_0 - - core.py + - namespace.py - ... - v0_2_1 - - core.py + - namespace.py - ... """ diff --git a/nwb_linkml/tests/test_providers/test_provider_schema.py b/nwb_linkml/tests/test_providers/test_provider_schema.py index 8895539..383eb98 100644 --- a/nwb_linkml/tests/test_providers/test_provider_schema.py +++ b/nwb_linkml/tests/test_providers/test_provider_schema.py @@ -1,17 +1,80 @@ import pdb +import shutil + +from typing import Optional, Union, List +from ..fixtures import tmp_output_dir import pytest from nwb_linkml.providers.schema import LinkMLProvider, PydanticProvider -def test_linkml_provider(): - provider = LinkMLProvider() - core = provider.get('core') +CORE_MODULES = ( +"core.nwb.base", +"core.nwb.device", +"core.nwb.epoch", +"core.nwb.image", +"core.nwb.file", +"core.nwb.misc", +"core.nwb.behavior", +"core.nwb.ecephys", +"core.nwb.icephys", +"core.nwb.ogen", +"core.nwb.ophys", +"core.nwb.retinotopy", +"core.nwb.language" +) +@pytest.mark.parametrize( + ["repo_version", "schema_version", "schema_dir"], + [ + ('2.6.0', '2.6.0-alpha', 'v2_6_0_alpha') + ] +) +def test_linkml_provider(tmp_output_dir, repo_version, schema_version, schema_dir): + + provider = LinkMLProvider(path=tmp_output_dir) + # clear any prior output + shutil.rmtree(provider.path, ignore_errors=True) + assert not provider.path.exists() + + # end to end, check that we can get the 'core' repo at the latest version + # in the gitrepo + core = provider.get('core', version=repo_version) + + assert core.schema.version == schema_version + assert all([mod in core.schema.imports for mod in CORE_MODULES]) + assert schema_dir in [path.name for path in (provider.path / 'core').iterdir()] + + @pytest.mark.depends(on=['test_linkml_provider']) -def test_pydantic_provider(): - provider = PydanticProvider() +@pytest.mark.parametrize( + ['class_name', 'test_fields'], + [ + ('TimeSeries', { + 'name':str, + 'description': Optional[str], + 'comments': Optional[str], + 'data': 'TimeSeriesData', + 'timestamps': Optional[List[float]], + 'control': Optional[List[int]], + }) + ] +) +def test_pydantic_provider(tmp_output_dir, class_name, test_fields): + provider = PydanticProvider(path=tmp_output_dir) core = provider.get('core') + test_class = getattr(core, class_name) + assert test_class == provider.get_class('core', class_name) + + for k, v in test_fields.items(): + if isinstance(v, str): + assert test_class.model_fields[k].annotation.__name__ == v + else: + assert test_class.model_fields[k].annotation == v + + + pdb.set_trace() +