cute progress bar and also linkml provider works

This commit is contained in:
sneakers-the-rat 2023-09-08 16:12:19 -07:00
parent d15ba8e2e1
commit 0b0fb6c67a
11 changed files with 259 additions and 60 deletions

View file

@ -1,12 +1,8 @@
.. nwb-linkml documentation master file, created by
sphinx-quickstart on Thu Sep 7 18:57:17 2023.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
# nwb-linkml # nwb-linkml
```{toctree} ```{toctree}
:caption: Contents: :caption: Contents:
:maxdepth: 3
api/index api/index
changelog changelog

View file

@ -71,7 +71,6 @@ addopts = [
"--cov-append", "--cov-append",
"--cov-config=.coveragerc", "--cov-config=.coveragerc",
"--emoji", "--emoji",
"--profile"
] ]
testpaths = [ testpaths = [
"tests", "tests",

View file

@ -12,6 +12,7 @@ from pydantic import BaseModel, Field, validator, PrivateAttr
from pprint import pformat from pprint import pformat
from linkml_runtime.linkml_model import SchemaDefinition from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.dumpers import yaml_dumper
from time import sleep
from nwb_schema_language import Namespaces from nwb_schema_language import Namespaces
@ -19,6 +20,9 @@ from nwb_schema_language import Namespaces
from nwb_linkml.adapters.adapter import Adapter, BuildResult from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.adapters.schema import SchemaAdapter
from nwb_linkml.lang_elements import NwbLangSchema from nwb_linkml.lang_elements import NwbLangSchema
from nwb_linkml.providers.git import DEFAULT_REPOS
from nwb_linkml.ui import AdapterProgress
class NamespacesAdapter(Adapter): class NamespacesAdapter(Adapter):
namespaces: Namespaces namespaces: Namespaces
@ -33,18 +37,51 @@ class NamespacesAdapter(Adapter):
self._populate_schema_namespaces() self._populate_schema_namespaces()
self.split = self._split self.split = self._split
def build(self, skip_imports:bool=False) -> BuildResult: @classmethod
def from_yaml(cls, path:Path) -> 'NamespacesAdapter':
"""
Create a NamespacesAdapter from a nwb schema language namespaces yaml file.
Also attempts to provide imported implicitly imported schema (using the namespace key, rather than source, eg.
with hdmf-common)
"""
from nwb_linkml.io import schema as schema_io
ns_adapter = schema_io.load_namespaces(path)
ns_adapter = schema_io.load_namespace_schema(ns_adapter, path)
# try and find imported schema
need_imports = []
for needed in ns_adapter.needed_imports.values():
need_imports.extend([n for n in needed if n not in ns_adapter.needed_imports.keys()])
for needed in need_imports:
if needed in DEFAULT_REPOS.keys():
needed_source_ns = DEFAULT_REPOS[needed].provide_from_git()
needed_adapter = NamespacesAdapter.from_yaml(needed_source_ns)
ns_adapter.imported.append(needed_adapter)
return ns_adapter
def build(self, skip_imports:bool=False, progress:Optional[AdapterProgress] = None) -> BuildResult:
if not self._imports_populated and not skip_imports: if not self._imports_populated and not skip_imports:
self.populate_imports() self.populate_imports()
sch_result = BuildResult() sch_result = BuildResult()
for sch in self.schemas: for sch in self.schemas:
if progress is not None:
progress.update(sch.namespace, action=sch.name)
sch_result += sch.build() sch_result += sch.build()
if progress is not None:
progress.update(sch.namespace, advance=1)
sleep(1)
# recursive step # recursive step
if not skip_imports: if not skip_imports:
for imported in self.imported: for imported in self.imported:
imported_build = imported.build() imported_build = imported.build(progress=progress)
sch_result += imported_build sch_result += imported_build
# add in monkeypatch nwb types # add in monkeypatch nwb types
@ -189,18 +226,43 @@ class NamespacesAdapter(Adapter):
""" """
versions for each namespace versions for each namespace
""" """
return {ns['name']:ns['version'] for ns in self.namespaces.namespaces} versions = {ns.name:ns.version for ns in self.namespaces.namespaces}
for imported in self.imported:
versions.update(imported.versions)
return versions
def namespace_schemas(self, name:str) -> List[str]: def namespace_schemas(self, name:str) -> List[str]:
""" """
Get the schemas that are defined in a given namespace Get the schemas that are defined in a given namespace
""" """
ns = [ns for ns in self.namespaces.namespaces if ns.name == name][0] ns = [ns for ns in self.namespaces.namespaces if ns.name == name]
if len(ns) == 0:
for imported in self.imported:
ns = [ns for ns in imported.namespaces.namespaces if ns.name == name]
if len(ns) > 0:
ns = ns[0]
break
else:
raise NameError(f"Couldnt find namespace {name}")
else:
ns = ns[0]
schema_names = [] schema_names = []
for sch in ns.schema_: for sch in ns.schema_:
if sch.source is not None: if sch.source is not None:
schema_names.append(sch.source) schema_names.append(sch.source)
return schema_names return schema_names
def schema_namespace(self, name:str) -> Optional[str]:
"""
Inverse of :meth:`.namespace_schemas` - given a schema name, get the namespace it's in
"""
for ns in self.namespaces.namespaces:
sources = [sch.source for sch in ns.schema_ if sch.source is not None]
if name in sources:
return ns.name
return None

View file

@ -3,7 +3,7 @@ Manage the operation of nwb_linkml from environmental variables
""" """
import tempfile import tempfile
from pathlib import Path from pathlib import Path
from pydantic import Field, DirectoryPath, computed_field from pydantic import Field, DirectoryPath, computed_field, field_validator, FieldValidationInfo
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
class Config(BaseSettings): class Config(BaseSettings):
@ -33,6 +33,14 @@ class Config(BaseSettings):
"""Directory to store generated pydantic models""" """Directory to store generated pydantic models"""
return self.cache_dir / 'pydantic' return self.cache_dir / 'pydantic'
@field_validator('cache_dir', mode='before')
@classmethod
def folder_exists(cls, v: Path, info: FieldValidationInfo) -> Path:
v.mkdir(exist_ok=True)
assert v.exists()
return v
def __post_init__(self): def __post_init__(self):
self.cache_dir.mkdir(exist_ok=True) self.cache_dir.mkdir(exist_ok=True)
self.linkml_dir.mkdir(exist_ok=True) self.linkml_dir.mkdir(exist_ok=True)

View file

@ -9,7 +9,7 @@ from linkml_runtime.loaders import yaml_loader
import yaml import yaml
from nwb_schema_language import Namespaces, Group, Dataset from nwb_schema_language import Namespaces, Group, Dataset
from nwb_linkml.io.git import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO from nwb_linkml.providers.git import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.map import PHASES, Map from nwb_linkml.map import PHASES, Map
from nwb_linkml.adapters.namespaces import NamespacesAdapter from nwb_linkml.adapters.namespaces import NamespacesAdapter
from nwb_linkml.adapters.schema import SchemaAdapter from nwb_linkml.adapters.schema import SchemaAdapter

View file

@ -33,7 +33,7 @@ def version_module_case(name:str) -> str:
""" """
name = module_case(name) name = module_case(name)
if not name.startswith('v'): if not name.startswith('v'):
name = v + name name = 'v' + name
return name return name
def relative_path(target: Path, origin: Path): def relative_path(target: Path, origin: Path):

View file

@ -2,7 +2,7 @@
Define and manage NWB namespaces in external repositories Define and manage NWB namespaces in external repositories
""" """
import pdb import pdb
from typing import Optional from typing import Optional, Dict
import warnings import warnings
from pathlib import Path from pathlib import Path
import tempfile import tempfile
@ -39,7 +39,7 @@ HDMF_COMMON_REPO = NamespaceRepo(
DEFAULT_REPOS = { DEFAULT_REPOS = {
repo.name: repo for repo in [NWB_CORE_REPO, HDMF_COMMON_REPO] repo.name: repo for repo in [NWB_CORE_REPO, HDMF_COMMON_REPO]
} } # type: Dict[str, NamespaceRepo]
class GitError(OSError): class GitError(OSError):
@ -104,7 +104,7 @@ class GitRepo:
""" """
The intended commit to check out. The intended commit to check out.
If ``None``, should be the latest commit when the repo was checked out If ``None``, use ``HEAD``
Should match :prop:`.active_commit`, differs semantically in that it is used to Should match :prop:`.active_commit`, differs semantically in that it is used to
set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out
@ -112,8 +112,11 @@ class GitRepo:
return self._commit return self._commit
@commit.setter @commit.setter
def commit(self, commit:str): def commit(self, commit:str|None):
self._git_call('checkout', commit) if commit is None:
self._git_call('checkout', "HEAD")
else:
self._git_call('checkout', commit)
self._commit = commit self._commit = commit
def check(self) -> bool: def check(self) -> bool:

View file

@ -12,12 +12,15 @@ Relationship to other modules:
- :mod:`.providers` then use ``adapters`` and ``generators`` to provide models - :mod:`.providers` then use ``adapters`` and ``generators`` to provide models
from generated schema! from generated schema!
""" """
import pdb
from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict
from pathlib import Path from pathlib import Path
import os import os
from abc import abstractmethod from abc import abstractmethod, ABC
import warnings
import importlib
from linkml_runtime.linkml_model import SchemaDefinition from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName
from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime import SchemaView from linkml_runtime import SchemaView
@ -28,6 +31,8 @@ from nwb_linkml.adapters.adapter import BuildResult
from nwb_linkml.maps.naming import module_case, version_module_case, relative_path from nwb_linkml.maps.naming import module_case, version_module_case, relative_path
from nwb_schema_language import Namespaces from nwb_schema_language import Namespaces
from nwb_linkml.generators.pydantic import NWBPydanticGenerator from nwb_linkml.generators.pydantic import NWBPydanticGenerator
from nwb_linkml.providers.git import DEFAULT_REPOS
from nwb_linkml.ui import AdapterProgress
class NamespaceVersion(TypedDict): class NamespaceVersion(TypedDict):
namespace: str namespace: str
@ -35,7 +40,7 @@ class NamespaceVersion(TypedDict):
P = TypeVar('P') P = TypeVar('P')
class Provider: class Provider(ABC):
""" """
Metaclass for different kind of providers! Metaclass for different kind of providers!
""" """
@ -51,9 +56,10 @@ class Provider:
config = Config() config = Config()
self.config = config self.config = config
self.cache_dir = config.cache_dir self.cache_dir = config.cache_dir
self.verbose = verbose
@abstractmethod
@property @property
@abstractmethod
def path(self) -> Path: def path(self) -> Path:
""" """
Base path for this kind of provider Base path for this kind of provider
@ -69,7 +75,9 @@ class Provider:
def namespace_path( def namespace_path(
self, self,
namespace: str, namespace: str,
version: Optional[str] = None) -> Path: version: Optional[str] = None,
allow_repo: bool = True
) -> Path:
""" """
Get the location for a given namespace of this type. Get the location for a given namespace of this type.
@ -85,20 +93,26 @@ class Provider:
recent *version*, but the most recently *generated* version recent *version*, but the most recently *generated* version
because it's assumed that's the one you want if you're just because it's assumed that's the one you want if you're just
gesturally reaching for one. gesturally reaching for one.
allow_repo (bool): Allow the pathfinder to return the installed repository/package,
useful to enforce building into temporary directories, decoupling finding a path
during loading vs. building. Building into the repo is still possible if both
namespace and version are provided (ie. the path is fully qualified) and
:attr:`.config`'s path is the repository path.
""" """
namespace_module = module_case(namespace) namespace_module = module_case(namespace)
namespace_path = self.path / namespace_module namespace_path = self.path / namespace_module
if not namespace_path.exists() and namespace in ('core', 'hdmf-common'): if not namespace_path.exists() and namespace in ('core', 'hdmf-common') and allow_repo:
# return builtins # return builtins
module_path = Path(importlib.util.find_spec('nwb_linkml').origin).parent
if self.PROVIDES == 'linkml': if self.PROVIDES == 'linkml':
from nwb_linkml import schema namespace_path = module_path / 'schema'
namespace_path = Path(schema.__file__)
elif self.PROVIDES == 'pydantic': elif self.PROVIDES == 'pydantic':
from nwb_linkml import models namespace_path = module_path / 'models'
namespace_path = Path(models.__file__)
if version is not None: if version is not None:
version_path = namespace_path / version_module_case(version) version_path = namespace_path / version_module_case(version)
version_path.mkdir(exist_ok=True, parents=True)
else: else:
# or find the most recently built one # or find the most recently built one
versions = sorted(namespace_path.iterdir(), key=os.path.getmtime) versions = sorted(namespace_path.iterdir(), key=os.path.getmtime)
@ -109,6 +123,10 @@ class Provider:
return version_path return version_path
class LinkMLSchemaBuild(TypedDict):
result: BuildResult
version: str
namespace: Path
class LinkMLProvider(Provider): class LinkMLProvider(Provider):
@ -127,76 +145,115 @@ class LinkMLProvider(Provider):
path (:class:`pathlib.Path`): Path to the namespace .yaml path (:class:`pathlib.Path`): Path to the namespace .yaml
kwargs: passed to :meth:`.build` kwargs: passed to :meth:`.build`
""" """
sch = {} ns_adapter = adapters.NamespacesAdapter.from_yaml(path)
ns_dict = io.schema.load_yaml(path) return self.build(ns_adapter, **kwargs)
sch['namespace'] = ns_dict
namespace = Namespaces(**ns_dict)
for ns in namespace.namespaces: def build_from_dicts(
for schema in ns.schema_:
if schema.source is None:
# this is normal, we'll resolve later
continue
yml_file = path.parent / schema.source
sch[yml_file.stem] = (io.schema.load_yaml(yml_file))
return self.build(schemas=sch, **kwargs)
def build(
self, self,
schemas:Dict[str, dict], schemas:Dict[str, dict],
versions: Optional[List[NamespaceVersion]] = None, **kwargs
dump: bool = True, ) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]:
) -> BuildResult:
""" """
Build from schema dictionaries, eg. as come from nwb files
Arguments: Arguments:
schemas (dict): A dictionary of ``{'schema_name': {:schema_definition}}``. schemas (dict): A dictionary of ``{'schema_name': {:schema_definition}}``.
The "namespace" schema should have the key ``namespace``, which is used The "namespace" schema should have the key ``namespace``, which is used
to infer version and schema name. Post-load maps should have already to infer version and schema name. Post-load maps should have already
been applied been applied
versions (List[NamespaceVersion]): List of specific versions to use
for cross-namespace imports. If none is provided, use the most recent version
available.
dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
""" """
ns = Namespaces(**schemas['namespace']) ns = Namespaces(**schemas['namespace'])
typed_schemas = [ typed_schemas = [
io.schema.load_schema_file( io.schema.load_schema_file(
path=Path(key + ".yaml"), path=Path(key + ".yaml"),
yaml=val) yaml=val)
for key,val in schemas.items() for key, val in schemas.items()
if key != 'namespace' if key != 'namespace'
] ]
ns_adapter = adapters.NamespacesAdapter( ns_adapter = adapters.NamespacesAdapter(
namespaces=ns, namespaces=ns,
schemas=typed_schemas schemas=typed_schemas
) )
return self.build(ns_adapter, **kwargs)
def build(
self,
ns_adapter: adapters.NamespacesAdapter,
versions: Optional[List[NamespaceVersion]] = None,
dump: bool = True,
) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]:
"""
Arguments:
namespaces (:class:`.NamespacesAdapter`): Adapter (populated with any necessary imported namespaces)
to build
versions (List[NamespaceVersion]): List of specific versions to use
for cross-namespace imports. If none is provided, use the most recent version
available.
dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
"""
self._find_imports(ns_adapter, versions, populate=True) self._find_imports(ns_adapter, versions, populate=True)
built = ns_adapter.build() if self.verbose:
progress = AdapterProgress(ns_adapter)
#progress.start()
with progress:
built = ns_adapter.build(progress=progress)
else:
progress = None
built = ns_adapter.build()
# if progress is not None:
# progress.stop()
# write schemas to yaml files # write schemas to yaml files
build_result = {}
namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()] namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()]
for ns_linkml in namespace_sch: for ns_linkml in namespace_sch:
version = ns_adapter.versions[ns_linkml.name] version = ns_adapter.versions[ns_linkml.name]
version_path = self.namespace_path(ns_linkml.name, version) version_path = self.namespace_path(ns_linkml.name, version, allow_repo=False)
with open(version_path / 'namespace.yaml', 'w') as ns_f: ns_file = version_path / 'namespace.yaml'
yaml_dumper.dump(ns_linkml, version_path) yaml_dumper.dump(ns_linkml, ns_file)
# write the schemas for this namespace # write the schemas for this namespace
ns_schema_names = ns_adapter.namespace_schemas(ns_linkml.name) ns_schema_names = [name.strip('.yaml') for name in ns_adapter.namespace_schemas(ns_linkml.name)]
other_schema = [sch for sch in built.schemas if sch.name in ns_schema_names] other_schema = [sch for sch in built.schemas if sch.name in ns_schema_names]
for sch in other_schema: for sch in other_schema:
output_file = version_path / (sch.name + '.yaml') output_file = version_path / (sch.name + '.yaml')
yaml_dumper.dump(sch, output_file) yaml_dumper.dump(sch, output_file)
return built # make return result for just this namespace
build_result[ns_linkml.name] = LinkMLSchemaBuild(
namespace=ns_file,
result= BuildResult(schemas=[ns_linkml, *other_schema]),
version=version
)
return build_result
def get(self, namespace: str, version: Optional[str] = None) -> SchemaView: def get(self, namespace: str, version: Optional[str] = None) -> SchemaView:
""" """
Get a schema view over the namespace Get a schema view over the namespace
""" """
path = self.namespace_path(namespace, version) / 'namespace.yaml' path = self.namespace_path(namespace, version) / 'namespace.yaml'
if not path.exists():
path = self._find_source(namespace, version)
return SchemaView(path) return SchemaView(path)
def _find_source(self, namespace:str, version: Optional[str] = None) -> Path:
"""Try and find the namespace if it exists in our default repository and build it!"""
ns_repo = DEFAULT_REPOS.get(namespace, None)
if ns_repo is None:
raise KeyError(f"Namespace {namespace} could not be found, and no git repository source has been configured!")
ns_file = ns_repo.provide_from_git(commit=version)
res = self.build_from_yaml(ns_file)
return res[namespace]['namespace']
def _find_imports(self, def _find_imports(self,
ns: adapters.NamespacesAdapter, ns: adapters.NamespacesAdapter,

View file

@ -0,0 +1,67 @@
"""
UI Elements :)
"""
from typing import TYPE_CHECKING
from rich.live import Live
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, Column
if TYPE_CHECKING:
from nwb_linkml.adapters.namespaces import NamespacesAdapter
class AdapterProgress:
def __init__(self, ns:'NamespacesAdapter'):
self.ns = ns
self.task_ids = {}
self.progress = Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.fields[name]} - [bold red]{task.fields[action]}",
table_column=Column(ratio=1)),
BarColumn(table_column=Column(ratio=1), bar_width=None),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
expand=True
)
# add tasks for each namespace
for an_ns in self.ns.namespaces.namespaces:
ns_schemas = self.ns.namespace_schemas(an_ns.name)
self.task_ids[an_ns.name] = self.progress.add_task(
'', name=an_ns.name, action='',
total=len(ns_schemas)
)
for imported_ns in self.ns.imported:
for an_ns in imported_ns.namespaces.namespaces:
ns_schemas = imported_ns.namespace_schemas(an_ns.name)
self.task_ids[an_ns.name] = self.progress.add_task(
'', name=an_ns.name, action='',
total=len(ns_schemas)
)
self.panel = Panel(
self.progress,
title=f"Building Namespaces",
border_style="green",
padding=(2,2)
)
def update(self, namespace:str, **kwargs):
self.progress.update(self.task_ids[namespace], **kwargs)
def start(self):
self.progress.start()
def stop(self):
self.progress.stop()
def __enter__(self) -> Live:
self._live = Live(self.panel)
return self._live.__enter__()
def __exit__(self, *args):
return self._live.__exit__(*args)

View file

@ -1,9 +1,8 @@
import pytest import pytest
import tempfile
import shutil import shutil
import yaml import yaml
from nwb_linkml.io.git import GitRepo, GitError, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO from nwb_linkml.providers.git import GitRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_schema_language import Namespaces from nwb_schema_language import Namespaces
@pytest.mark.parametrize( @pytest.mark.parametrize(

View file

@ -0,0 +1,8 @@
import pytest
from nwb_linkml.providers.schema import LinkMLProvider
def test_linkml_provider():
provider = LinkMLProvider()
core = provider.get('core')