Docstrings and basic tests for providers.

bugfix - remove redundant language elements that cause a recursion error (eg. subtyping "float" with "float")
This commit is contained in:
sneakers-the-rat 2023-09-11 19:38:29 -07:00
parent 2e87fa0556
commit c69cbdfb38
7 changed files with 314 additions and 70 deletions

View file

@ -1,7 +1,7 @@
# Git # Git
```{eval-rst} ```{eval-rst}
.. automodule:: nwb_linkml.io.git .. automodule:: nwb_linkml.providers.git
:members: :members:
:undoc-members: :undoc-members:
``` ```

View file

@ -0,0 +1,7 @@
# Schema
```{eval-rst}
.. automodule:: nwb_linkml.providers.schema
:members:
:undoc-members:
```

View file

@ -39,6 +39,7 @@ intersphinx_mapping = {
'matplotlib': ('https://matplotlib.org/stable/', None), 'matplotlib': ('https://matplotlib.org/stable/', None),
'numpy': ('https://numpy.org/doc/stable/', None), 'numpy': ('https://numpy.org/doc/stable/', None),
'pandas': ('https://pandas.pydata.org/docs/', None), 'pandas': ('https://pandas.pydata.org/docs/', None),
'pydantic': ('https://docs.pydantic.dev/latest/', None)
} }

View file

@ -22,6 +22,11 @@ FlatDType = EnumDefinition(
DTypeTypes = [] DTypeTypes = []
for nwbtype, linkmltype in flat_to_linkml.items(): for nwbtype, linkmltype in flat_to_linkml.items():
# skip the dtypes that are the same as the builtin linkml types (which should alredy exist)
# to avoid a recursion error
if linkmltype == nwbtype:
continue
amin = None amin = None
if nwbtype.startswith('uint'): if nwbtype.startswith('uint'):
amin = 0 amin = 0

View file

@ -2,7 +2,7 @@
Define and manage NWB namespaces in external repositories Define and manage NWB namespaces in external repositories
""" """
import pdb import pdb
from typing import Optional, Dict from typing import Optional, Dict, List
import warnings import warnings
from pathlib import Path from pathlib import Path
import tempfile import tempfile
@ -18,6 +18,10 @@ class NamespaceRepo(BaseModel):
name: str = Field(description="Short name used to refer to this namespace (usually equivalent to the name field within a namespaces NWB list)") name: str = Field(description="Short name used to refer to this namespace (usually equivalent to the name field within a namespaces NWB list)")
repository: HttpUrl | DirectoryPath = Field(description="URL or local absolute path to the root repository") repository: HttpUrl | DirectoryPath = Field(description="URL or local absolute path to the root repository")
path: Path = Field(description="Relative path from the repository root to the namespace file") path: Path = Field(description="Relative path from the repository root to the namespace file")
versions: List[str] = Field(
description="Known versions for this namespace repository, correspond to commit hashes or git tags that can be checked out by :class:`.GitRepo`",
default_factory=list
)
def provide_from_git(self, commit:str|None=None) -> Path: def provide_from_git(self, commit:str|None=None) -> Path:
git = GitRepo(self, commit) git = GitRepo(self, commit)
@ -28,13 +32,15 @@ class NamespaceRepo(BaseModel):
NWB_CORE_REPO = NamespaceRepo( NWB_CORE_REPO = NamespaceRepo(
name="core", name="core",
repository="https://github.com/NeurodataWithoutBorders/nwb-schema", repository="https://github.com/NeurodataWithoutBorders/nwb-schema",
path=Path("core/nwb.namespace.yaml") path=Path("core/nwb.namespace.yaml"),
versions=["2.0.1", "2.1.0", "2.2.0", "2.2.1", "2.2.2", "2.2.3", "2.2.4", "2.2.5", "2.3.0", "2.4.0", "2.5.0", "2.6.0"]
) )
HDMF_COMMON_REPO = NamespaceRepo( HDMF_COMMON_REPO = NamespaceRepo(
name="hdmf-common", name="hdmf-common",
repository="https://github.com/hdmf-dev/hdmf-common-schema", repository="https://github.com/hdmf-dev/hdmf-common-schema",
path=Path("common/namespace.yaml") path=Path("common/namespace.yaml"),
versions=["1.1.0", "1.1.1", "1.1.2", "1.1.3", "1.2.0", "1.2.1", "1.3.0", "1.4.0", "1.5.0", "1.5.1", "1.6.0", "1.7.0", "1.8.0"]
) )
DEFAULT_REPOS = { DEFAULT_REPOS = {
@ -104,7 +110,7 @@ class GitRepo:
""" """
The intended commit to check out. The intended commit to check out.
If ``None``, use ``HEAD`` If ``None``: if :attr:`NamespaceRepo.versions`, use the last version. Otherwise use ``HEAD``
Should match :prop:`.active_commit`, differs semantically in that it is used to Should match :prop:`.active_commit`, differs semantically in that it is used to
set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out
@ -113,12 +119,80 @@ class GitRepo:
@commit.setter @commit.setter
def commit(self, commit:str|None): def commit(self, commit:str|None):
# first get out of a potential detached head state
# that would cause a call to "HEAD" to fail in unexpected ways
if self.detached_head:
self._git_call('checkout', self.default_branch)
if commit is None: if commit is None:
if len(self.namespace.versions) > 0:
self._git_call('checkout', self.namespace.versions[-1])
else:
self._git_call('checkout', "HEAD") self._git_call('checkout', "HEAD")
else: else:
self._git_call('checkout', commit) self._git_call('checkout', commit)
self._commit = commit self._commit = commit
@property
def tag(self) -> str:
"""
Get/set the currently checked out repo tag.
Returns:
str: the result of ``git describe --tags``, which is
equal to the tag if it is checked out, otherwise it is the tag
plus some number of revisions and the short hash.
Examples:
>>> repo = GitRepo(NWB_CORE_REPO)
>>> repo.clone()
>>> # Check out a tag specifically
>>> repo.tag = "2.6.0"
>>> repo.tag
"2.6.0"
>>> # Now check out a commit some number after the tag.
>>> repo.commit = "gec0a879"
>>> repo.tag
"2.6.0-5-gec0a879"
"""
res = self._git_call('describe', '--tags')
return res.stdout.decode('utf-8').strip()
@tag.setter
def tag(self, tag:str):
# first check that we have the most recent tags
self._git_call('fetch', '--all', '--tags')
self._git_call('checkout', f'tags/{tag}')
# error will be raised by _git_call if tag not found
@property
def default_branch(self) -> str:
"""
Default branch as configured for this repository
Gotten from ``git symbolic-ref``
"""
res = self._git_call('symbolic-ref', 'refs/remotes/origin/HEAD')
return res.stdout.decode('utf-8').strip().split('/')[-1]
@property
def detached_head(self) -> bool:
"""
Detect if repo is in detached HEAD state that might need to be undone before
checking out eg. a HEAD commit.
Returns:
bool: ``True`` if in detached head mode, ``False`` otherwise
"""
res = self._git_call('branch', '--show-current')
branch = res.stdout.decode('utf-8').strip()
if not branch:
return True
else:
return False
def check(self) -> bool: def check(self) -> bool:
""" """
Check if the repository is already cloned and checked out Check if the repository is already cloned and checked out
@ -177,14 +251,15 @@ class GitRepo:
warnings.warn('Destination directory is not empty and does not pass checks for correctness! cleaning up') warnings.warn('Destination directory is not empty and does not pass checks for correctness! cleaning up')
self.cleanup() self.cleanup()
else: else:
# already have it # already have it, just ensure commit and return
self.commit = self.commit
return return
elif self.temp_directory.exists(): elif self.temp_directory.exists():
# exists but empty # exists but empty
self.cleanup() self.cleanup()
res = subprocess.run(['git', 'clone', str(self.namespace.repository), str(self.temp_directory)]) res = subprocess.run(['git', 'clone', str(self.namespace.repository), str(self.temp_directory)])
if self.commit:
self.commit = self.commit self.commit = self.commit
if res.returncode != 0: if res.returncode != 0:
raise GitError(f'Could not clone repository:\n{res.stderr}') raise GitError(f'Could not clone repository:\n{res.stderr}')

View file

@ -22,6 +22,8 @@ import warnings
import importlib import importlib
import sys import sys
from pydantic import BaseModel
from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName
from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime import SchemaView from linkml_runtime import SchemaView
@ -42,6 +44,18 @@ P = TypeVar('P')
class Provider(ABC): class Provider(ABC):
""" """
Metaclass for different kind of providers! Metaclass for different kind of providers!
Args:
path (:class:`pathlib.Path`): Override the temporary directory configured by
the environment-wide :class:`.Config` object as the base directory that the
subclasses provide to.
verbose (bool): If ``True``, print things like progress bars to stdout :)
Attributes:
config (:class:`.Config`): Configuration for the directories used by this
provider, unless overridden by ``path``
cache_dir (:class:`pathlib.Path`): The main cache directory under which the other
providers will store the things they provide
""" """
PROVIDES: str PROVIDES: str
PROVIDES_CLASS: P = None PROVIDES_CLASS: P = None
@ -125,12 +139,54 @@ class Provider(ABC):
class LinkMLSchemaBuild(TypedDict): class LinkMLSchemaBuild(TypedDict):
"""Build result from :meth:`.LinkMLProvider.build`"""
result: BuildResult result: BuildResult
version: str version: str
namespace: Path namespace: Path
class LinkMLProvider(Provider): class LinkMLProvider(Provider):
"""
Provider for conversions from nwb schema language to linkML.
By default, generate and manage a nest of temporary cache directories
(as configured by :class:`.Config`) for each version of a given namespace.
Like other :class:`.Provider` classes, this model is not a singleton but
behaves a bit like one in that when instantiated without arguments
it is stateless (except for configuration by environment-level variables).
So we don't use ``@classmethod``s here, but instantiating the class should remain
cheap.
Namespaces can be built from:
* namespace .yaml files: :meth:`.build_from_yaml`
* dictionaries, as are usually packaged in nwb files: :meth:`.build_from_dicts`
All of which feed into...
* :class:`~.adapters.NamespacesAdapter` used throughout the rest of ``nwb_linkml`` - :meth:`.build`
After a namespace is built, it can be accessed using :meth:`.LinkMLProvider.get`, which
can also be consumed by other providers, so a given namespace and version should only need
to be built once.
Note:
At the moment there is no checking (eg. by comparing hashes) of different sources that
purport to be a given version of a namespace. When ambiguous, the class prefers to
build sets of namespaces together and use the most recently built ones since there is no
formal system for linking versions of namespaced schemas in nwb schema language.
Examples:
>>> provider = LinkMLProvider()
>>> # Simplest case, get the core nwb schema from the default NWB core repo
>>> core = provider.get('core')
>>> # Get a specific version of the core schema
>>> core_other_version = provider.get('core', '2.2.0')
>>> # Build a custom schema and then get it
>>> # provider.build_from_yaml('myschema.yaml')
>>> # my_schema = provider.get('myschema')
"""
PROVIDES = 'linkml' PROVIDES = 'linkml'
PROVIDES_CLASS = SchemaDefinition PROVIDES_CLASS = SchemaDefinition
@ -203,13 +259,8 @@ class LinkMLProvider(Provider):
built = ns_adapter.build(progress=progress) built = ns_adapter.build(progress=progress)
else: else:
progress = None progress = None
built = ns_adapter.build() built = ns_adapter.build()
# if progress is not None:
# progress.stop()
# write schemas to yaml files # write schemas to yaml files
build_result = {} build_result = {}
@ -257,7 +308,15 @@ class LinkMLProvider(Provider):
return sch return sch
def get(self, namespace: str, version: Optional[str] = None) -> SchemaView: def get(self, namespace: str, version: Optional[str] = None) -> SchemaView:
""" """
Get a schema view over the namespace Get a schema view over the namespace.
If a matching path for the namespace and version exists in the :attr:`.path`,
then return the SchemaView over that namespace.
Otherwise, try and find a source using our :data:`.providers.git.DEFAULT_REPOS`.
If none is found, then you need to build and cache the (probably custom) schema first with
:meth:`.build`
""" """
path = self.namespace_path(namespace, version) / 'namespace.yaml' path = self.namespace_path(namespace, version) / 'namespace.yaml'
if not path.exists(): if not path.exists():
@ -276,47 +335,12 @@ class LinkMLProvider(Provider):
return res[namespace]['namespace'] return res[namespace]['namespace']
#
# def _find_imports(self,
# ns: adapters.NamespacesAdapter,
# versions: Optional[dict] = None,
# populate: bool=True) -> Dict[str, List[str]]:
# """
# Find relative paths to other linkml schema that need to be
# imported, but lack an explicit source
#
# Arguments:
# ns (:class:`.NamespacesAdapter`): Namespaces to find imports to
# versions (dict): Specific versions to import
# populate (bool): If ``True`` (default), modify the namespace adapter to include the imports,
# otherwise just return
#
# Returns:
# dict of lists for relative paths to other schema namespaces
# """
# import_paths = {}
# for ns_name, needed_imports in ns.needed_imports.items():
# our_path = self.namespace_path(ns_name, ns.versions[ns_name], allow_repo=False) / 'namespace.yaml'
# import_paths[ns_name] = []
# for needed_import in needed_imports:
# needed_version = None
# if versions:
# needed_version = versions.get(needed_import, None)
#
# version_path = self.namespace_path(needed_import, needed_version, allow_repo=False) / 'namespace.yaml'
# import_paths[ns_name].append(str(relative_path(version_path, our_path)))
#
# if populate:
# pdb.set_trace()
# for sch in ns.schemas:
# sch.imports.extend(import_paths[ns_name])
#
# return import_paths
class PydanticProvider(Provider): class PydanticProvider(Provider):
"""
Provider for pydantic models built from linkml-style nwb schema (ie. as provided by :class:`.LinkMLProvider`)
"""
PROVIDES = 'pydantic' PROVIDES = 'pydantic'
@property @property
@ -335,14 +359,22 @@ class PydanticProvider(Provider):
Args: Args:
namespace: namespace (Union[str, :class:`pathlib.Path`]): If a string, use a
version: :class:`.LinkMLProvider` to get the converted schema. If a path,
versions: assume we have been given an explicit ``namespace.yaml`` from a converted
dump: NWB -> LinkML schema to load from.
version (Optional[str]): The version of the schema to build, if present.
Works similarly to ``version`` in :class:`.LinkMLProvider`
versions (Optional[dict]): An explicit mapping of namespaces and versions to use when
building the combined pydantic `namespace.py` file. Since NWB doesn't have an explicit
version dependency system between schema, there is intrinsic ambiguity between which version
of which schema should be used when imported from another. This mapping allows those ambiguities to be resolved.
See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information.
dump (bool): If ``True`` (default), dump the model to the cache, otherwise just return the serialized string of built pydantic model
**kwargs: Passed to :class:`.NWBPydanticGenerator` **kwargs: Passed to :class:`.NWBPydanticGenerator`
Returns: Returns:
str: The built model file as returned from :meth:`.NWBPydanticGenerator.serialize`
""" """
if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')): if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')):
@ -386,6 +418,20 @@ class PydanticProvider(Provider):
namespace: str, namespace: str,
version: Optional[str] = None version: Optional[str] = None
) -> ModuleType: ) -> ModuleType:
"""
Import a module within the temporary directory from its namespace and version
In most cases, you're looking for :meth:`.PydanticProvider.get`, this method is
made available in case you don't want to accidentally build something
or invoke the rest of the provisioning system.
Args:
namespace (str): Name of namespace
version (Optional[str]): Version to import, if None, try and get the most recently built version.
Returns:
:class:`types.ModuleType`
"""
path = self.namespace_path(namespace, version) / 'namespace.py' path = self.namespace_path(namespace, version) / 'namespace.py'
if not path.exists(): if not path.exists():
raise ImportError(f'Module has not been built yet {path}') raise ImportError(f'Module has not been built yet {path}')
@ -397,6 +443,36 @@ class PydanticProvider(Provider):
return module return module
def get(self, namespace: str, version: Optional[str] = None) -> ModuleType: def get(self, namespace: str, version: Optional[str] = None) -> ModuleType:
"""
Get the imported module for a given namespace and version.
A given namespace will be stored in :data:`sys.modules` as ``nwb_linkml.models.{namespace}``,
so first check if there is any already-imported module, and return that if so.
Then we check in the temporary directory for an already-built ``namespace.py`` file
Otherwise we pass arguments to :meth:`.PydanticProvider.build` and attempt to build them
before returning.
Notes:
The imported modules shadow the "actual"
``nwb_linkml.models`` module as would be imported from the usual location within the package directory.
This is intentional, as models can then be used as if they were integrated parts of the package,
and also so the active version of a namespace can be cleanly accessed
(ie. without ``from nwb_linkml.models.core import v2_2_0 as core`` ).
Accordingly, we assume that people will only be using a single version of NWB in a given
Python session.
Args:
namespace (str): Name of namespace to import. Must have either been previously built with :meth:`.PydanticProvider.build` or
a matching namespace/version combo must be available to the :class:`.LinkMLProvider`
version (Optional[str]): Version to import. If ``None``, get the most recently build module
Returns:
The imported :class:`types.ModuleType` object that has all the built classes at the root level.
"""
module_name = self.module_name(namespace, version) module_name = self.module_name(namespace, version)
if module_name in sys.modules: if module_name in sys.modules:
return sys.modules[module_name] return sys.modules[module_name]
@ -411,6 +487,23 @@ class PydanticProvider(Provider):
module = self.import_module(namespace, version) module = self.import_module(namespace, version)
return module return module
def get_class(self, namespace: str, class_: str, version: Optional[str] = None) -> BaseModel:
"""
Get a class from a given namespace and version!
Args:
namespace (str): Name of a namespace that has been previously built and cached, otherwise
we will attempt to build it from the :data:`.providers.git.DEFAULT_REPOS`
class_ (str): Name of class to retrieve
version (Optional[str]): Optional version of the schema to retrieve from
Returns:
:class:`pydantic.BaseModel`
"""
mod = self.get(namespace, version)
return getattr(mod, class_)
@ -433,11 +526,11 @@ class SchemaProvider:
- linkml - linkml
- nwb_core - nwb_core
- v0_2_0 - v0_2_0
- nwb.core.namespace.yaml - namespace.yaml
- nwb.fore.file.yaml - nwb.core.file.yaml
- ... - ...
- v0_2_1 - v0_2_1
- nwb.core.namespace.yaml - namespace.yaml
- ... - ...
- my_schema - my_schema
- v0_1_0 - v0_1_0
@ -445,10 +538,10 @@ class SchemaProvider:
- pydantic - pydantic
- nwb_core - nwb_core
- v0_2_0 - v0_2_0
- core.py - namespace.py
- ... - ...
- v0_2_1 - v0_2_1
- core.py - namespace.py
- ... - ...
""" """

View file

@ -1,17 +1,80 @@
import pdb import pdb
import shutil
from typing import Optional, Union, List
from ..fixtures import tmp_output_dir
import pytest import pytest
from nwb_linkml.providers.schema import LinkMLProvider, PydanticProvider from nwb_linkml.providers.schema import LinkMLProvider, PydanticProvider
def test_linkml_provider():
provider = LinkMLProvider() CORE_MODULES = (
core = provider.get('core') "core.nwb.base",
"core.nwb.device",
"core.nwb.epoch",
"core.nwb.image",
"core.nwb.file",
"core.nwb.misc",
"core.nwb.behavior",
"core.nwb.ecephys",
"core.nwb.icephys",
"core.nwb.ogen",
"core.nwb.ophys",
"core.nwb.retinotopy",
"core.nwb.language"
)
@pytest.mark.parametrize(
["repo_version", "schema_version", "schema_dir"],
[
('2.6.0', '2.6.0-alpha', 'v2_6_0_alpha')
]
)
def test_linkml_provider(tmp_output_dir, repo_version, schema_version, schema_dir):
provider = LinkMLProvider(path=tmp_output_dir)
# clear any prior output
shutil.rmtree(provider.path, ignore_errors=True)
assert not provider.path.exists()
# end to end, check that we can get the 'core' repo at the latest version
# in the gitrepo
core = provider.get('core', version=repo_version)
assert core.schema.version == schema_version
assert all([mod in core.schema.imports for mod in CORE_MODULES])
assert schema_dir in [path.name for path in (provider.path / 'core').iterdir()]
@pytest.mark.depends(on=['test_linkml_provider']) @pytest.mark.depends(on=['test_linkml_provider'])
def test_pydantic_provider(): @pytest.mark.parametrize(
provider = PydanticProvider() ['class_name', 'test_fields'],
[
('TimeSeries', {
'name':str,
'description': Optional[str],
'comments': Optional[str],
'data': 'TimeSeriesData',
'timestamps': Optional[List[float]],
'control': Optional[List[int]],
})
]
)
def test_pydantic_provider(tmp_output_dir, class_name, test_fields):
provider = PydanticProvider(path=tmp_output_dir)
core = provider.get('core') core = provider.get('core')
test_class = getattr(core, class_name)
assert test_class == provider.get_class('core', class_name)
for k, v in test_fields.items():
if isinstance(v, str):
assert test_class.model_fields[k].annotation.__name__ == v
else:
assert test_class.model_fields[k].annotation == v
pdb.set_trace()