Docstrings and basic tests for providers.

bugfix - remove redundant language elements that cause a recursion error (eg. subtyping "float" with "float")
This commit is contained in:
sneakers-the-rat 2023-09-11 19:38:29 -07:00
parent 2e87fa0556
commit c69cbdfb38
7 changed files with 314 additions and 70 deletions

View file

@ -1,7 +1,7 @@
# Git
```{eval-rst}
.. automodule:: nwb_linkml.io.git
.. automodule:: nwb_linkml.providers.git
:members:
:undoc-members:
```

View file

@ -0,0 +1,7 @@
# Schema
```{eval-rst}
.. automodule:: nwb_linkml.providers.schema
:members:
:undoc-members:
```

View file

@ -39,6 +39,7 @@ intersphinx_mapping = {
'matplotlib': ('https://matplotlib.org/stable/', None),
'numpy': ('https://numpy.org/doc/stable/', None),
'pandas': ('https://pandas.pydata.org/docs/', None),
'pydantic': ('https://docs.pydantic.dev/latest/', None)
}

View file

@ -22,6 +22,11 @@ FlatDType = EnumDefinition(
DTypeTypes = []
for nwbtype, linkmltype in flat_to_linkml.items():
# skip the dtypes that are the same as the builtin linkml types (which should alredy exist)
# to avoid a recursion error
if linkmltype == nwbtype:
continue
amin = None
if nwbtype.startswith('uint'):
amin = 0

View file

@ -2,7 +2,7 @@
Define and manage NWB namespaces in external repositories
"""
import pdb
from typing import Optional, Dict
from typing import Optional, Dict, List
import warnings
from pathlib import Path
import tempfile
@ -18,6 +18,10 @@ class NamespaceRepo(BaseModel):
name: str = Field(description="Short name used to refer to this namespace (usually equivalent to the name field within a namespaces NWB list)")
repository: HttpUrl | DirectoryPath = Field(description="URL or local absolute path to the root repository")
path: Path = Field(description="Relative path from the repository root to the namespace file")
versions: List[str] = Field(
description="Known versions for this namespace repository, correspond to commit hashes or git tags that can be checked out by :class:`.GitRepo`",
default_factory=list
)
def provide_from_git(self, commit:str|None=None) -> Path:
git = GitRepo(self, commit)
@ -28,13 +32,15 @@ class NamespaceRepo(BaseModel):
NWB_CORE_REPO = NamespaceRepo(
name="core",
repository="https://github.com/NeurodataWithoutBorders/nwb-schema",
path=Path("core/nwb.namespace.yaml")
path=Path("core/nwb.namespace.yaml"),
versions=["2.0.1", "2.1.0", "2.2.0", "2.2.1", "2.2.2", "2.2.3", "2.2.4", "2.2.5", "2.3.0", "2.4.0", "2.5.0", "2.6.0"]
)
HDMF_COMMON_REPO = NamespaceRepo(
name="hdmf-common",
repository="https://github.com/hdmf-dev/hdmf-common-schema",
path=Path("common/namespace.yaml")
path=Path("common/namespace.yaml"),
versions=["1.1.0", "1.1.1", "1.1.2", "1.1.3", "1.2.0", "1.2.1", "1.3.0", "1.4.0", "1.5.0", "1.5.1", "1.6.0", "1.7.0", "1.8.0"]
)
DEFAULT_REPOS = {
@ -104,7 +110,7 @@ class GitRepo:
"""
The intended commit to check out.
If ``None``, use ``HEAD``
If ``None``: if :attr:`NamespaceRepo.versions`, use the last version. Otherwise use ``HEAD``
Should match :prop:`.active_commit`, differs semantically in that it is used to
set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out
@ -113,12 +119,80 @@ class GitRepo:
@commit.setter
def commit(self, commit:str|None):
# first get out of a potential detached head state
# that would cause a call to "HEAD" to fail in unexpected ways
if self.detached_head:
self._git_call('checkout', self.default_branch)
if commit is None:
self._git_call('checkout', "HEAD")
if len(self.namespace.versions) > 0:
self._git_call('checkout', self.namespace.versions[-1])
else:
self._git_call('checkout', "HEAD")
else:
self._git_call('checkout', commit)
self._commit = commit
@property
def tag(self) -> str:
"""
Get/set the currently checked out repo tag.
Returns:
str: the result of ``git describe --tags``, which is
equal to the tag if it is checked out, otherwise it is the tag
plus some number of revisions and the short hash.
Examples:
>>> repo = GitRepo(NWB_CORE_REPO)
>>> repo.clone()
>>> # Check out a tag specifically
>>> repo.tag = "2.6.0"
>>> repo.tag
"2.6.0"
>>> # Now check out a commit some number after the tag.
>>> repo.commit = "gec0a879"
>>> repo.tag
"2.6.0-5-gec0a879"
"""
res = self._git_call('describe', '--tags')
return res.stdout.decode('utf-8').strip()
@tag.setter
def tag(self, tag:str):
# first check that we have the most recent tags
self._git_call('fetch', '--all', '--tags')
self._git_call('checkout', f'tags/{tag}')
# error will be raised by _git_call if tag not found
@property
def default_branch(self) -> str:
"""
Default branch as configured for this repository
Gotten from ``git symbolic-ref``
"""
res = self._git_call('symbolic-ref', 'refs/remotes/origin/HEAD')
return res.stdout.decode('utf-8').strip().split('/')[-1]
@property
def detached_head(self) -> bool:
"""
Detect if repo is in detached HEAD state that might need to be undone before
checking out eg. a HEAD commit.
Returns:
bool: ``True`` if in detached head mode, ``False`` otherwise
"""
res = self._git_call('branch', '--show-current')
branch = res.stdout.decode('utf-8').strip()
if not branch:
return True
else:
return False
def check(self) -> bool:
"""
Check if the repository is already cloned and checked out
@ -177,15 +251,16 @@ class GitRepo:
warnings.warn('Destination directory is not empty and does not pass checks for correctness! cleaning up')
self.cleanup()
else:
# already have it
# already have it, just ensure commit and return
self.commit = self.commit
return
elif self.temp_directory.exists():
# exists but empty
self.cleanup()
res = subprocess.run(['git', 'clone', str(self.namespace.repository), str(self.temp_directory)])
if self.commit:
self.commit = self.commit
self.commit = self.commit
if res.returncode != 0:
raise GitError(f'Could not clone repository:\n{res.stderr}')

View file

@ -22,6 +22,8 @@ import warnings
import importlib
import sys
from pydantic import BaseModel
from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime import SchemaView
@ -42,6 +44,18 @@ P = TypeVar('P')
class Provider(ABC):
"""
Metaclass for different kind of providers!
Args:
path (:class:`pathlib.Path`): Override the temporary directory configured by
the environment-wide :class:`.Config` object as the base directory that the
subclasses provide to.
verbose (bool): If ``True``, print things like progress bars to stdout :)
Attributes:
config (:class:`.Config`): Configuration for the directories used by this
provider, unless overridden by ``path``
cache_dir (:class:`pathlib.Path`): The main cache directory under which the other
providers will store the things they provide
"""
PROVIDES: str
PROVIDES_CLASS: P = None
@ -125,12 +139,54 @@ class Provider(ABC):
class LinkMLSchemaBuild(TypedDict):
"""Build result from :meth:`.LinkMLProvider.build`"""
result: BuildResult
version: str
namespace: Path
class LinkMLProvider(Provider):
"""
Provider for conversions from nwb schema language to linkML.
By default, generate and manage a nest of temporary cache directories
(as configured by :class:`.Config`) for each version of a given namespace.
Like other :class:`.Provider` classes, this model is not a singleton but
behaves a bit like one in that when instantiated without arguments
it is stateless (except for configuration by environment-level variables).
So we don't use ``@classmethod``s here, but instantiating the class should remain
cheap.
Namespaces can be built from:
* namespace .yaml files: :meth:`.build_from_yaml`
* dictionaries, as are usually packaged in nwb files: :meth:`.build_from_dicts`
All of which feed into...
* :class:`~.adapters.NamespacesAdapter` used throughout the rest of ``nwb_linkml`` - :meth:`.build`
After a namespace is built, it can be accessed using :meth:`.LinkMLProvider.get`, which
can also be consumed by other providers, so a given namespace and version should only need
to be built once.
Note:
At the moment there is no checking (eg. by comparing hashes) of different sources that
purport to be a given version of a namespace. When ambiguous, the class prefers to
build sets of namespaces together and use the most recently built ones since there is no
formal system for linking versions of namespaced schemas in nwb schema language.
Examples:
>>> provider = LinkMLProvider()
>>> # Simplest case, get the core nwb schema from the default NWB core repo
>>> core = provider.get('core')
>>> # Get a specific version of the core schema
>>> core_other_version = provider.get('core', '2.2.0')
>>> # Build a custom schema and then get it
>>> # provider.build_from_yaml('myschema.yaml')
>>> # my_schema = provider.get('myschema')
"""
PROVIDES = 'linkml'
PROVIDES_CLASS = SchemaDefinition
@ -203,13 +259,8 @@ class LinkMLProvider(Provider):
built = ns_adapter.build(progress=progress)
else:
progress = None
built = ns_adapter.build()
# if progress is not None:
# progress.stop()
# write schemas to yaml files
build_result = {}
@ -257,7 +308,15 @@ class LinkMLProvider(Provider):
return sch
def get(self, namespace: str, version: Optional[str] = None) -> SchemaView:
"""
Get a schema view over the namespace
Get a schema view over the namespace.
If a matching path for the namespace and version exists in the :attr:`.path`,
then return the SchemaView over that namespace.
Otherwise, try and find a source using our :data:`.providers.git.DEFAULT_REPOS`.
If none is found, then you need to build and cache the (probably custom) schema first with
:meth:`.build`
"""
path = self.namespace_path(namespace, version) / 'namespace.yaml'
if not path.exists():
@ -276,47 +335,12 @@ class LinkMLProvider(Provider):
return res[namespace]['namespace']
#
# def _find_imports(self,
# ns: adapters.NamespacesAdapter,
# versions: Optional[dict] = None,
# populate: bool=True) -> Dict[str, List[str]]:
# """
# Find relative paths to other linkml schema that need to be
# imported, but lack an explicit source
#
# Arguments:
# ns (:class:`.NamespacesAdapter`): Namespaces to find imports to
# versions (dict): Specific versions to import
# populate (bool): If ``True`` (default), modify the namespace adapter to include the imports,
# otherwise just return
#
# Returns:
# dict of lists for relative paths to other schema namespaces
# """
# import_paths = {}
# for ns_name, needed_imports in ns.needed_imports.items():
# our_path = self.namespace_path(ns_name, ns.versions[ns_name], allow_repo=False) / 'namespace.yaml'
# import_paths[ns_name] = []
# for needed_import in needed_imports:
# needed_version = None
# if versions:
# needed_version = versions.get(needed_import, None)
#
# version_path = self.namespace_path(needed_import, needed_version, allow_repo=False) / 'namespace.yaml'
# import_paths[ns_name].append(str(relative_path(version_path, our_path)))
#
# if populate:
# pdb.set_trace()
# for sch in ns.schemas:
# sch.imports.extend(import_paths[ns_name])
#
# return import_paths
class PydanticProvider(Provider):
"""
Provider for pydantic models built from linkml-style nwb schema (ie. as provided by :class:`.LinkMLProvider`)
"""
PROVIDES = 'pydantic'
@property
@ -335,14 +359,22 @@ class PydanticProvider(Provider):
Args:
namespace:
version:
versions:
dump:
namespace (Union[str, :class:`pathlib.Path`]): If a string, use a
:class:`.LinkMLProvider` to get the converted schema. If a path,
assume we have been given an explicit ``namespace.yaml`` from a converted
NWB -> LinkML schema to load from.
version (Optional[str]): The version of the schema to build, if present.
Works similarly to ``version`` in :class:`.LinkMLProvider`
versions (Optional[dict]): An explicit mapping of namespaces and versions to use when
building the combined pydantic `namespace.py` file. Since NWB doesn't have an explicit
version dependency system between schema, there is intrinsic ambiguity between which version
of which schema should be used when imported from another. This mapping allows those ambiguities to be resolved.
See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information.
dump (bool): If ``True`` (default), dump the model to the cache, otherwise just return the serialized string of built pydantic model
**kwargs: Passed to :class:`.NWBPydanticGenerator`
Returns:
str: The built model file as returned from :meth:`.NWBPydanticGenerator.serialize`
"""
if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')):
@ -386,6 +418,20 @@ class PydanticProvider(Provider):
namespace: str,
version: Optional[str] = None
) -> ModuleType:
"""
Import a module within the temporary directory from its namespace and version
In most cases, you're looking for :meth:`.PydanticProvider.get`, this method is
made available in case you don't want to accidentally build something
or invoke the rest of the provisioning system.
Args:
namespace (str): Name of namespace
version (Optional[str]): Version to import, if None, try and get the most recently built version.
Returns:
:class:`types.ModuleType`
"""
path = self.namespace_path(namespace, version) / 'namespace.py'
if not path.exists():
raise ImportError(f'Module has not been built yet {path}')
@ -397,6 +443,36 @@ class PydanticProvider(Provider):
return module
def get(self, namespace: str, version: Optional[str] = None) -> ModuleType:
"""
Get the imported module for a given namespace and version.
A given namespace will be stored in :data:`sys.modules` as ``nwb_linkml.models.{namespace}``,
so first check if there is any already-imported module, and return that if so.
Then we check in the temporary directory for an already-built ``namespace.py`` file
Otherwise we pass arguments to :meth:`.PydanticProvider.build` and attempt to build them
before returning.
Notes:
The imported modules shadow the "actual"
``nwb_linkml.models`` module as would be imported from the usual location within the package directory.
This is intentional, as models can then be used as if they were integrated parts of the package,
and also so the active version of a namespace can be cleanly accessed
(ie. without ``from nwb_linkml.models.core import v2_2_0 as core`` ).
Accordingly, we assume that people will only be using a single version of NWB in a given
Python session.
Args:
namespace (str): Name of namespace to import. Must have either been previously built with :meth:`.PydanticProvider.build` or
a matching namespace/version combo must be available to the :class:`.LinkMLProvider`
version (Optional[str]): Version to import. If ``None``, get the most recently build module
Returns:
The imported :class:`types.ModuleType` object that has all the built classes at the root level.
"""
module_name = self.module_name(namespace, version)
if module_name in sys.modules:
return sys.modules[module_name]
@ -411,6 +487,23 @@ class PydanticProvider(Provider):
module = self.import_module(namespace, version)
return module
def get_class(self, namespace: str, class_: str, version: Optional[str] = None) -> BaseModel:
"""
Get a class from a given namespace and version!
Args:
namespace (str): Name of a namespace that has been previously built and cached, otherwise
we will attempt to build it from the :data:`.providers.git.DEFAULT_REPOS`
class_ (str): Name of class to retrieve
version (Optional[str]): Optional version of the schema to retrieve from
Returns:
:class:`pydantic.BaseModel`
"""
mod = self.get(namespace, version)
return getattr(mod, class_)
@ -433,11 +526,11 @@ class SchemaProvider:
- linkml
- nwb_core
- v0_2_0
- nwb.core.namespace.yaml
- nwb.fore.file.yaml
- namespace.yaml
- nwb.core.file.yaml
- ...
- v0_2_1
- nwb.core.namespace.yaml
- namespace.yaml
- ...
- my_schema
- v0_1_0
@ -445,10 +538,10 @@ class SchemaProvider:
- pydantic
- nwb_core
- v0_2_0
- core.py
- namespace.py
- ...
- v0_2_1
- core.py
- namespace.py
- ...
"""

View file

@ -1,17 +1,80 @@
import pdb
import shutil
from typing import Optional, Union, List
from ..fixtures import tmp_output_dir
import pytest
from nwb_linkml.providers.schema import LinkMLProvider, PydanticProvider
def test_linkml_provider():
provider = LinkMLProvider()
core = provider.get('core')
CORE_MODULES = (
"core.nwb.base",
"core.nwb.device",
"core.nwb.epoch",
"core.nwb.image",
"core.nwb.file",
"core.nwb.misc",
"core.nwb.behavior",
"core.nwb.ecephys",
"core.nwb.icephys",
"core.nwb.ogen",
"core.nwb.ophys",
"core.nwb.retinotopy",
"core.nwb.language"
)
@pytest.mark.parametrize(
["repo_version", "schema_version", "schema_dir"],
[
('2.6.0', '2.6.0-alpha', 'v2_6_0_alpha')
]
)
def test_linkml_provider(tmp_output_dir, repo_version, schema_version, schema_dir):
provider = LinkMLProvider(path=tmp_output_dir)
# clear any prior output
shutil.rmtree(provider.path, ignore_errors=True)
assert not provider.path.exists()
# end to end, check that we can get the 'core' repo at the latest version
# in the gitrepo
core = provider.get('core', version=repo_version)
assert core.schema.version == schema_version
assert all([mod in core.schema.imports for mod in CORE_MODULES])
assert schema_dir in [path.name for path in (provider.path / 'core').iterdir()]
@pytest.mark.depends(on=['test_linkml_provider'])
def test_pydantic_provider():
provider = PydanticProvider()
@pytest.mark.parametrize(
['class_name', 'test_fields'],
[
('TimeSeries', {
'name':str,
'description': Optional[str],
'comments': Optional[str],
'data': 'TimeSeriesData',
'timestamps': Optional[List[float]],
'control': Optional[List[int]],
})
]
)
def test_pydantic_provider(tmp_output_dir, class_name, test_fields):
provider = PydanticProvider(path=tmp_output_dir)
core = provider.get('core')
test_class = getattr(core, class_name)
assert test_class == provider.get_class('core', class_name)
for k, v in test_fields.items():
if isinstance(v, str):
assert test_class.model_fields[k].annotation.__name__ == v
else:
assert test_class.model_fields[k].annotation == v
pdb.set_trace()