cute progress bar and also linkml provider works

This commit is contained in:
sneakers-the-rat 2023-09-08 16:12:19 -07:00
parent d15ba8e2e1
commit 0b0fb6c67a
11 changed files with 259 additions and 60 deletions

View file

@ -1,12 +1,8 @@
.. nwb-linkml documentation master file, created by
sphinx-quickstart on Thu Sep 7 18:57:17 2023.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
# nwb-linkml
```{toctree}
:caption: Contents:
:maxdepth: 3
api/index
changelog

View file

@ -71,7 +71,6 @@ addopts = [
"--cov-append",
"--cov-config=.coveragerc",
"--emoji",
"--profile"
]
testpaths = [
"tests",

View file

@ -12,6 +12,7 @@ from pydantic import BaseModel, Field, validator, PrivateAttr
from pprint import pformat
from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime.dumpers import yaml_dumper
from time import sleep
from nwb_schema_language import Namespaces
@ -19,6 +20,9 @@ from nwb_schema_language import Namespaces
from nwb_linkml.adapters.adapter import Adapter, BuildResult
from nwb_linkml.adapters.schema import SchemaAdapter
from nwb_linkml.lang_elements import NwbLangSchema
from nwb_linkml.providers.git import DEFAULT_REPOS
from nwb_linkml.ui import AdapterProgress
class NamespacesAdapter(Adapter):
namespaces: Namespaces
@ -33,18 +37,51 @@ class NamespacesAdapter(Adapter):
self._populate_schema_namespaces()
self.split = self._split
def build(self, skip_imports:bool=False) -> BuildResult:
@classmethod
def from_yaml(cls, path:Path) -> 'NamespacesAdapter':
"""
Create a NamespacesAdapter from a nwb schema language namespaces yaml file.
Also attempts to provide imported implicitly imported schema (using the namespace key, rather than source, eg.
with hdmf-common)
"""
from nwb_linkml.io import schema as schema_io
ns_adapter = schema_io.load_namespaces(path)
ns_adapter = schema_io.load_namespace_schema(ns_adapter, path)
# try and find imported schema
need_imports = []
for needed in ns_adapter.needed_imports.values():
need_imports.extend([n for n in needed if n not in ns_adapter.needed_imports.keys()])
for needed in need_imports:
if needed in DEFAULT_REPOS.keys():
needed_source_ns = DEFAULT_REPOS[needed].provide_from_git()
needed_adapter = NamespacesAdapter.from_yaml(needed_source_ns)
ns_adapter.imported.append(needed_adapter)
return ns_adapter
def build(self, skip_imports:bool=False, progress:Optional[AdapterProgress] = None) -> BuildResult:
if not self._imports_populated and not skip_imports:
self.populate_imports()
sch_result = BuildResult()
for sch in self.schemas:
if progress is not None:
progress.update(sch.namespace, action=sch.name)
sch_result += sch.build()
if progress is not None:
progress.update(sch.namespace, advance=1)
sleep(1)
# recursive step
if not skip_imports:
for imported in self.imported:
imported_build = imported.build()
imported_build = imported.build(progress=progress)
sch_result += imported_build
# add in monkeypatch nwb types
@ -189,18 +226,43 @@ class NamespacesAdapter(Adapter):
"""
versions for each namespace
"""
return {ns['name']:ns['version'] for ns in self.namespaces.namespaces}
versions = {ns.name:ns.version for ns in self.namespaces.namespaces}
for imported in self.imported:
versions.update(imported.versions)
return versions
def namespace_schemas(self, name:str) -> List[str]:
"""
Get the schemas that are defined in a given namespace
"""
ns = [ns for ns in self.namespaces.namespaces if ns.name == name][0]
ns = [ns for ns in self.namespaces.namespaces if ns.name == name]
if len(ns) == 0:
for imported in self.imported:
ns = [ns for ns in imported.namespaces.namespaces if ns.name == name]
if len(ns) > 0:
ns = ns[0]
break
else:
raise NameError(f"Couldnt find namespace {name}")
else:
ns = ns[0]
schema_names = []
for sch in ns.schema_:
if sch.source is not None:
schema_names.append(sch.source)
return schema_names
def schema_namespace(self, name:str) -> Optional[str]:
"""
Inverse of :meth:`.namespace_schemas` - given a schema name, get the namespace it's in
"""
for ns in self.namespaces.namespaces:
sources = [sch.source for sch in ns.schema_ if sch.source is not None]
if name in sources:
return ns.name
return None

View file

@ -3,7 +3,7 @@ Manage the operation of nwb_linkml from environmental variables
"""
import tempfile
from pathlib import Path
from pydantic import Field, DirectoryPath, computed_field
from pydantic import Field, DirectoryPath, computed_field, field_validator, FieldValidationInfo
from pydantic_settings import BaseSettings, SettingsConfigDict
class Config(BaseSettings):
@ -33,6 +33,14 @@ class Config(BaseSettings):
"""Directory to store generated pydantic models"""
return self.cache_dir / 'pydantic'
@field_validator('cache_dir', mode='before')
@classmethod
def folder_exists(cls, v: Path, info: FieldValidationInfo) -> Path:
v.mkdir(exist_ok=True)
assert v.exists()
return v
def __post_init__(self):
self.cache_dir.mkdir(exist_ok=True)
self.linkml_dir.mkdir(exist_ok=True)

View file

@ -9,7 +9,7 @@ from linkml_runtime.loaders import yaml_loader
import yaml
from nwb_schema_language import Namespaces, Group, Dataset
from nwb_linkml.io.git import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.providers.git import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.map import PHASES, Map
from nwb_linkml.adapters.namespaces import NamespacesAdapter
from nwb_linkml.adapters.schema import SchemaAdapter

View file

@ -33,7 +33,7 @@ def version_module_case(name:str) -> str:
"""
name = module_case(name)
if not name.startswith('v'):
name = v + name
name = 'v' + name
return name
def relative_path(target: Path, origin: Path):

View file

@ -2,7 +2,7 @@
Define and manage NWB namespaces in external repositories
"""
import pdb
from typing import Optional
from typing import Optional, Dict
import warnings
from pathlib import Path
import tempfile
@ -39,7 +39,7 @@ HDMF_COMMON_REPO = NamespaceRepo(
DEFAULT_REPOS = {
repo.name: repo for repo in [NWB_CORE_REPO, HDMF_COMMON_REPO]
}
} # type: Dict[str, NamespaceRepo]
class GitError(OSError):
@ -104,7 +104,7 @@ class GitRepo:
"""
The intended commit to check out.
If ``None``, should be the latest commit when the repo was checked out
If ``None``, use ``HEAD``
Should match :prop:`.active_commit`, differs semantically in that it is used to
set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out
@ -112,7 +112,10 @@ class GitRepo:
return self._commit
@commit.setter
def commit(self, commit:str):
def commit(self, commit:str|None):
if commit is None:
self._git_call('checkout', "HEAD")
else:
self._git_call('checkout', commit)
self._commit = commit

View file

@ -12,12 +12,15 @@ Relationship to other modules:
- :mod:`.providers` then use ``adapters`` and ``generators`` to provide models
from generated schema!
"""
import pdb
from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict
from pathlib import Path
import os
from abc import abstractmethod
from abc import abstractmethod, ABC
import warnings
import importlib
from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime.linkml_model import SchemaDefinition, SchemaDefinitionName
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime import SchemaView
@ -28,6 +31,8 @@ from nwb_linkml.adapters.adapter import BuildResult
from nwb_linkml.maps.naming import module_case, version_module_case, relative_path
from nwb_schema_language import Namespaces
from nwb_linkml.generators.pydantic import NWBPydanticGenerator
from nwb_linkml.providers.git import DEFAULT_REPOS
from nwb_linkml.ui import AdapterProgress
class NamespaceVersion(TypedDict):
namespace: str
@ -35,7 +40,7 @@ class NamespaceVersion(TypedDict):
P = TypeVar('P')
class Provider:
class Provider(ABC):
"""
Metaclass for different kind of providers!
"""
@ -51,9 +56,10 @@ class Provider:
config = Config()
self.config = config
self.cache_dir = config.cache_dir
self.verbose = verbose
@abstractmethod
@property
@abstractmethod
def path(self) -> Path:
"""
Base path for this kind of provider
@ -69,7 +75,9 @@ class Provider:
def namespace_path(
self,
namespace: str,
version: Optional[str] = None) -> Path:
version: Optional[str] = None,
allow_repo: bool = True
) -> Path:
"""
Get the location for a given namespace of this type.
@ -85,20 +93,26 @@ class Provider:
recent *version*, but the most recently *generated* version
because it's assumed that's the one you want if you're just
gesturally reaching for one.
allow_repo (bool): Allow the pathfinder to return the installed repository/package,
useful to enforce building into temporary directories, decoupling finding a path
during loading vs. building. Building into the repo is still possible if both
namespace and version are provided (ie. the path is fully qualified) and
:attr:`.config`'s path is the repository path.
"""
namespace_module = module_case(namespace)
namespace_path = self.path / namespace_module
if not namespace_path.exists() and namespace in ('core', 'hdmf-common'):
if not namespace_path.exists() and namespace in ('core', 'hdmf-common') and allow_repo:
# return builtins
module_path = Path(importlib.util.find_spec('nwb_linkml').origin).parent
if self.PROVIDES == 'linkml':
from nwb_linkml import schema
namespace_path = Path(schema.__file__)
namespace_path = module_path / 'schema'
elif self.PROVIDES == 'pydantic':
from nwb_linkml import models
namespace_path = Path(models.__file__)
namespace_path = module_path / 'models'
if version is not None:
version_path = namespace_path / version_module_case(version)
version_path.mkdir(exist_ok=True, parents=True)
else:
# or find the most recently built one
versions = sorted(namespace_path.iterdir(), key=os.path.getmtime)
@ -109,6 +123,10 @@ class Provider:
return version_path
class LinkMLSchemaBuild(TypedDict):
result: BuildResult
version: str
namespace: Path
class LinkMLProvider(Provider):
@ -127,37 +145,22 @@ class LinkMLProvider(Provider):
path (:class:`pathlib.Path`): Path to the namespace .yaml
kwargs: passed to :meth:`.build`
"""
sch = {}
ns_dict = io.schema.load_yaml(path)
sch['namespace'] = ns_dict
namespace = Namespaces(**ns_dict)
ns_adapter = adapters.NamespacesAdapter.from_yaml(path)
return self.build(ns_adapter, **kwargs)
for ns in namespace.namespaces:
for schema in ns.schema_:
if schema.source is None:
# this is normal, we'll resolve later
continue
yml_file = path.parent / schema.source
sch[yml_file.stem] = (io.schema.load_yaml(yml_file))
return self.build(schemas=sch, **kwargs)
def build(
def build_from_dicts(
self,
schemas:Dict[str, dict],
versions: Optional[List[NamespaceVersion]] = None,
dump: bool = True,
) -> BuildResult:
**kwargs
) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]:
"""
Build from schema dictionaries, eg. as come from nwb files
Arguments:
schemas (dict): A dictionary of ``{'schema_name': {:schema_definition}}``.
The "namespace" schema should have the key ``namespace``, which is used
to infer version and schema name. Post-load maps should have already
been applied
versions (List[NamespaceVersion]): List of specific versions to use
for cross-namespace imports. If none is provided, use the most recent version
available.
dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
"""
ns = Namespaces(**schemas['namespace'])
typed_schemas = [
@ -171,32 +174,86 @@ class LinkMLProvider(Provider):
namespaces=ns,
schemas=typed_schemas
)
return self.build(ns_adapter, **kwargs)
def build(
self,
ns_adapter: adapters.NamespacesAdapter,
versions: Optional[List[NamespaceVersion]] = None,
dump: bool = True,
) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]:
"""
Arguments:
namespaces (:class:`.NamespacesAdapter`): Adapter (populated with any necessary imported namespaces)
to build
versions (List[NamespaceVersion]): List of specific versions to use
for cross-namespace imports. If none is provided, use the most recent version
available.
dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
"""
self._find_imports(ns_adapter, versions, populate=True)
if self.verbose:
progress = AdapterProgress(ns_adapter)
#progress.start()
with progress:
built = ns_adapter.build(progress=progress)
else:
progress = None
built = ns_adapter.build()
# if progress is not None:
# progress.stop()
# write schemas to yaml files
build_result = {}
namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()]
for ns_linkml in namespace_sch:
version = ns_adapter.versions[ns_linkml.name]
version_path = self.namespace_path(ns_linkml.name, version)
with open(version_path / 'namespace.yaml', 'w') as ns_f:
yaml_dumper.dump(ns_linkml, version_path)
version_path = self.namespace_path(ns_linkml.name, version, allow_repo=False)
ns_file = version_path / 'namespace.yaml'
yaml_dumper.dump(ns_linkml, ns_file)
# write the schemas for this namespace
ns_schema_names = ns_adapter.namespace_schemas(ns_linkml.name)
ns_schema_names = [name.strip('.yaml') for name in ns_adapter.namespace_schemas(ns_linkml.name)]
other_schema = [sch for sch in built.schemas if sch.name in ns_schema_names]
for sch in other_schema:
output_file = version_path / (sch.name + '.yaml')
yaml_dumper.dump(sch, output_file)
return built
# make return result for just this namespace
build_result[ns_linkml.name] = LinkMLSchemaBuild(
namespace=ns_file,
result= BuildResult(schemas=[ns_linkml, *other_schema]),
version=version
)
return build_result
def get(self, namespace: str, version: Optional[str] = None) -> SchemaView:
"""
Get a schema view over the namespace
"""
path = self.namespace_path(namespace, version) / 'namespace.yaml'
if not path.exists():
path = self._find_source(namespace, version)
return SchemaView(path)
def _find_source(self, namespace:str, version: Optional[str] = None) -> Path:
"""Try and find the namespace if it exists in our default repository and build it!"""
ns_repo = DEFAULT_REPOS.get(namespace, None)
if ns_repo is None:
raise KeyError(f"Namespace {namespace} could not be found, and no git repository source has been configured!")
ns_file = ns_repo.provide_from_git(commit=version)
res = self.build_from_yaml(ns_file)
return res[namespace]['namespace']
def _find_imports(self,
ns: adapters.NamespacesAdapter,

View file

@ -0,0 +1,67 @@
"""
UI Elements :)
"""
from typing import TYPE_CHECKING
from rich.live import Live
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, Column
if TYPE_CHECKING:
from nwb_linkml.adapters.namespaces import NamespacesAdapter
class AdapterProgress:
def __init__(self, ns:'NamespacesAdapter'):
self.ns = ns
self.task_ids = {}
self.progress = Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.fields[name]} - [bold red]{task.fields[action]}",
table_column=Column(ratio=1)),
BarColumn(table_column=Column(ratio=1), bar_width=None),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
expand=True
)
# add tasks for each namespace
for an_ns in self.ns.namespaces.namespaces:
ns_schemas = self.ns.namespace_schemas(an_ns.name)
self.task_ids[an_ns.name] = self.progress.add_task(
'', name=an_ns.name, action='',
total=len(ns_schemas)
)
for imported_ns in self.ns.imported:
for an_ns in imported_ns.namespaces.namespaces:
ns_schemas = imported_ns.namespace_schemas(an_ns.name)
self.task_ids[an_ns.name] = self.progress.add_task(
'', name=an_ns.name, action='',
total=len(ns_schemas)
)
self.panel = Panel(
self.progress,
title=f"Building Namespaces",
border_style="green",
padding=(2,2)
)
def update(self, namespace:str, **kwargs):
self.progress.update(self.task_ids[namespace], **kwargs)
def start(self):
self.progress.start()
def stop(self):
self.progress.stop()
def __enter__(self) -> Live:
self._live = Live(self.panel)
return self._live.__enter__()
def __exit__(self, *args):
return self._live.__exit__(*args)

View file

@ -1,9 +1,8 @@
import pytest
import tempfile
import shutil
import yaml
from nwb_linkml.io.git import GitRepo, GitError, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.providers.git import GitRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_schema_language import Namespaces
@pytest.mark.parametrize(

View file

@ -0,0 +1,8 @@
import pytest
from nwb_linkml.providers.schema import LinkMLProvider
def test_linkml_provider():
provider = LinkMLProvider()
core = provider.get('core')