working on providers to cache generated models

2025-01-09 13:44:27 +00:00 · 2023-09-07 18:50:50 -07:00 · 2023-09-07 18:50:50 -07:00 · a01bb49b1e
commit a01bb49b1e
parent 0ec09a035a
12 changed files with 560 additions and 29 deletions
--- a/nwb_linkml/poetry.lock
+++ b/nwb_linkml/poetry.lock
@ -1385,6 +1385,21 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"

+[[package]]
+name = "pydantic-settings"
+version = "2.0.3"
+description = "Settings management using Pydantic"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pydantic_settings-2.0.3-py3-none-any.whl", hash = "sha256:ddd907b066622bd67603b75e2ff791875540dc485b7307c4fffc015719da8625"},
+    {file = "pydantic_settings-2.0.3.tar.gz", hash = "sha256:962dc3672495aad6ae96a4390fac7e593591e144625e5112d359f8f67fb75945"},
+]
+
+[package.dependencies]
+pydantic = ">=2.0.1"
+python-dotenv = ">=0.21.0"
+
 [[package]]
 name = "pygments"
 version = "2.16.1"
@ -1598,6 +1613,20 @@ files = [
 [package.dependencies]
 six = ">=1.5"

+[[package]]
+name = "python-dotenv"
+version = "1.0.0"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"},
+    {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
 [[package]]
 name = "pytrie"
 version = "0.4.0"
@ -2367,9 +2396,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p

 [extras]
 plot = ["dash", "dash-cytoscape"]
-tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pytest-emoji", "pytest-md"]
+tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pytest-emoji", "pytest-md", "pytest-profiling"]

 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "7ae9160a401b3bfa2f4535696ecf15e33815e356f7757ee611893c701485d24f"
+content-hash = "9ef89b731746d07d428c6cff4a8c8b4771fbfcfcc8f17120ce3c6089e5161eb6"
--- a/nwb_linkml/pyproject.toml
+++ b/nwb_linkml/pyproject.toml
@ -30,6 +30,7 @@ pytest-emoji = {version="^0.2.0", optional = true}
 pytest-cov = {version = "^4.1.0", optional = true}
 coveralls = {version = "^3.3.1", optional = true}
 pytest-profiling = {version = "^1.7.0", optional = true}
+pydantic-settings = "^2.0.3"

 [tool.poetry.extras]
 tests = [
--- a/nwb_linkml/src/nwb_linkml/adapters/classes.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/classes.py
@ -1,28 +1,16 @@
 """
 Adapters to linkML classes
 """
-import re
 from abc import abstractmethod
 from typing import List, Optional
 from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
 from nwb_linkml.adapters.adapter import Adapter, BuildResult
 from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
 from nwb_linkml.maps import QUANTITY_MAP
+from nwb_linkml.maps.naming import camel_to_snake

-CAMEL_TO_SNAKE = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
-"""
-Convert camel case to snake case

-courtesy of: https://stackoverflow.com/a/12867228
-"""

-def camel_to_snake(name:str) -> str:
-    """
-    Convert camel case to snake case
-
-    courtesy of: https://stackoverflow.com/a/12867228
-    """
-    return CAMEL_TO_SNAKE.sub(r'_\1', name).lower()

 class ClassAdapter(Adapter):
    """
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -9,7 +9,8 @@ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
 from pydantic import PrivateAttr

 from nwb_schema_language import Dataset, ReferenceDtype, CompoundDtype, DTypeType
-from nwb_linkml.adapters.classes import ClassAdapter, camel_to_snake
+from nwb_linkml.adapters.classes import ClassAdapter
+from nwb_linkml.maps.naming import camel_to_snake
 from nwb_linkml.adapters.adapter import BuildResult
 from nwb_linkml.maps import QUANTITY_MAP

--- a/nwb_linkml/src/nwb_linkml/adapters/group.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/group.py
@ -6,7 +6,8 @@ from typing import List
 from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition

 from nwb_schema_language import Dataset, Group, ReferenceDtype, CompoundDtype, DTypeType
-from nwb_linkml.adapters.classes import ClassAdapter, camel_to_snake
+from nwb_linkml.adapters.classes import ClassAdapter
+from nwb_linkml.maps.naming import camel_to_snake
 from nwb_linkml.adapters.dataset import DatasetAdapter
 from nwb_linkml.adapters.adapter import BuildResult
 from nwb_linkml.maps import QUANTITY_MAP
--- a/nwb_linkml/src/nwb_linkml/adapters/namespaces.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/namespaces.py
@ -6,7 +6,7 @@ for extracting information and generating translated schema
 """
 import pdb

-from typing import List, Optional
+from typing import List, Optional, Dict
 from pathlib import Path
 from pydantic import BaseModel, Field, validator, PrivateAttr
 from pprint import pformat
@ -33,8 +33,8 @@ class NamespacesAdapter(Adapter):
        self._populate_schema_namespaces()
        self.split = self._split

-    def build(self) -> BuildResult:
-        if not self._imports_populated:
+    def build(self, skip_imports:bool=False) -> BuildResult:
+        if not self._imports_populated and not skip_imports:
            self.populate_imports()


@ -42,9 +42,10 @@ class NamespacesAdapter(Adapter):
        for sch in self.schemas:
            sch_result += sch.build()
        # recursive step
-        for imported in self.imported:
-            imported_build = imported.build()
-            sch_result += imported_build
+        if not skip_imports:
+            for imported in self.imported:
+                imported_build = imported.build()
+                sch_result += imported_build

        # add in monkeypatch nwb types
        sch_result.schemas.append(NwbLangSchema)
@ -53,7 +54,8 @@ class NamespacesAdapter(Adapter):
        for ns in self.namespaces.namespaces:
            ns_schemas = [sch.name for sch in self.schemas if sch.namespace == ns.name]
            # also add imports bc, well, we need them
-            ns_schemas.extend([ns.name for imported in self.imported for ns in imported.namespaces.namespaces])
+            if not skip_imports:
+                ns_schemas.extend([ns.name for imported in self.imported for ns in imported.namespaces.namespaces])
            ns_schema = SchemaDefinition(
                name = ns.name,
                id = ns.name,
@ -164,4 +166,41 @@ class NamespacesAdapter(Adapter):
            output_file = base_dir / (schema.name + '.yaml')
            yaml_dumper.dump(schema, output_file)

+    @property
+    def needed_imports(self) -> Dict[str, List[str]]:
+        """
+        List of other, external namespaces that we need to import.
+        Usually provided as schema with a namespace but not a source
+
+        Returns:
+            {'namespace_name': ['needed_import_0', ...]}
+        """
+        needed_imports = {}
+        for a_ns in self.namespaces.namespaces:
+            needed_imports[a_ns.name] = []
+            for potential_import in a_ns.schema_:
+                if potential_import.namespace and not potential_import.source:
+                    needed_imports[a_ns.name].append(potential_import.namespace)
+
+        return needed_imports
+
+    @property
+    def versions(self) -> Dict[str, str]:
+        """
+        versions for each namespace
+        """
+        return {ns['name']:ns['version'] for ns in self.namespaces.namespaces}
+
+    def namespace_schemas(self, name:str) -> List[str]:
+        """
+        Get the schemas that are defined in a given namespace
+        """
+        ns = [ns for ns in self.namespaces.namespaces if ns.name == name][0]
+        schema_names = []
+        for sch in ns.schema_:
+            if sch.source is not None:
+                schema_names.append(sch.source)
+        return schema_names
+
+

--- a/nwb_linkml/src/nwb_linkml/adapters/schema.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/schema.py
@ -29,7 +29,7 @@ class SchemaAdapter(Adapter):
    path: Path
    groups: List[Group] = Field(default_factory=list)
    datasets: List[Dataset] = Field(default_factory=list)
-    imports: List['SchemaAdapter'] = Field(default_factory=list)
+    imports: List['SchemaAdapter' | str] = Field(default_factory=list)
    namespace: Optional[str] = Field(
        None,
        description="""String of containing namespace. Populated by NamespacesAdapter""")
@ -48,7 +48,7 @@ class SchemaAdapter(Adapter):
        out_str += '-'*len(self.name) + '\n'
        if len(self.imports) > 0:
            out_str += "Imports:\n"
-            out_str += "  " + ', '.join([i.name for i in self.imports]) + '\n'
+            out_str += "  " + ', '.join([i.name if isinstance(i, SchemaAdapter) else i for i in self.imports ]) + '\n'

        out_str += 'Groups:\n'
        out_str += '  ' + ', '.join([g.neurodata_type_def for g in self.groups])
@ -83,10 +83,11 @@ class SchemaAdapter(Adapter):
            return sch_split

        else:
+
            sch = SchemaDefinition(
                name = self.name,
                id = self.name,
-                imports = [i.name for i in self.imports],
+                imports = [i.name if isinstance(i, SchemaAdapter) else i for i in self.imports ],
                classes=res.classes,
                slots=res.slots,
                types=res.types
@ -113,7 +114,7 @@ class SchemaAdapter(Adapter):
        split_sch_name = '.'.join([self.name, 'include'])


-        imports = [i.name for i in self.imports]
+        imports = [i.name if isinstance(i, SchemaAdapter) else i for i in self.imports ]
        imports.append('nwb.language')
        # need to mutually import the two schemas because the subclasses
        # could refer to the main classes
--- a/nwb_linkml/src/nwb_linkml/config.py
+++ b/nwb_linkml/src/nwb_linkml/config.py
@ -0,0 +1,41 @@
+"""
+Manage the operation of nwb_linkml from environmental variables
+"""
+import tempfile
+from pathlib import Path
+from pydantic import Field, DirectoryPath, computed_field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+class Config(BaseSettings):
+    """
+    Configuration for nwb_linkml, populated by default but can be overridden
+    by environment variables.
+
+    Examples:
+
+        export NWB_LINKML_CACHE_DIR="/home/mycache/dir"
+
+    """
+    model_config = SettingsConfigDict(env_prefix="nwb_linkml_")
+    cache_dir: DirectoryPath = Field(
+        default_factory= lambda: Path(tempfile.gettempdir()) / 'nwb_linkml__cache',
+        description="Location to cache generated schema and models")
+
+    @computed_field
+    @property
+    def linkml_dir(self) -> Path:
+        """Directory to store generated linkml models"""
+        return self.cache_dir / 'linkml'
+
+    @computed_field
+    @property
+    def pydantic_dir(self) -> Path:
+        """Directory to store generated pydantic models"""
+        return self.cache_dir / 'pydantic'
+
+    def __post_init__(self):
+        self.cache_dir.mkdir(exist_ok=True)
+        self.linkml_dir.mkdir(exist_ok=True)
+        self.pydantic_dir.mkdir(exist_ok=True)
+
+
--- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py
+++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py
@ -45,6 +45,7 @@ from linkml_runtime.utils.schemaview import SchemaView
 from linkml_runtime.utils.compile_python import file_text
 from linkml.utils.ifabsent_functions import ifabsent_value_declaration

+from nwb_linkml.maps.naming import module_case, version_module_case

 from jinja2 import Template

@ -193,6 +194,8 @@ class NWBPydanticGenerator(PydanticGenerator):
    # SKIP_CLASSES=('VectorData','VectorIndex')
    split:bool=True
    schema_map:Dict[str, SchemaDefinition]=None
+    versions:List[dict] = None
+    """See :meth:`.LinkMLProvider.build` for usage - a list of specific versions to import from"""


    def _locate_imports(
@ -217,7 +220,12 @@ class NWBPydanticGenerator(PydanticGenerator):
            if module_name == self.schema.name:
                continue

-            local_mod_name = '.' + module_name.replace('.', '_').replace('-', '_')
+            if self.versions and module_name in [v['name'] for v in self.versions]:
+                version = version_module_case([v['version'] for v in self.versions if v['name'] == module_name][0])
+                local_mod_name = '....' + module_case(module_name) + '.' + version + '.' + 'namespace'
+            else:
+
+                local_mod_name = '.' + module_case(module_name)
            if local_mod_name not in imports:
                imports[local_mod_name] = [camelcase(cls)]
            else:
--- a/nwb_linkml/src/nwb_linkml/maps/naming.py
+++ b/nwb_linkml/src/nwb_linkml/maps/naming.py
@ -0,0 +1,52 @@
+import re
+from pathlib import Path
+
+CAMEL_TO_SNAKE = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))')
+"""
+Convert camel case to snake case
+
+courtesy of: https://stackoverflow.com/a/12867228
+"""
+
+def camel_to_snake(name:str) -> str:
+    """
+    Convert camel case to snake case
+
+    courtesy of: https://stackoverflow.com/a/12867228
+    """
+    return CAMEL_TO_SNAKE.sub(r'_\1', name).lower()
+
+def module_case(name:str) -> str:
+    """
+    Returns name that can be used as a python module, used for
+    referring to generated pydantic and linkml models.
+
+    Replaces with underscores:
+        - -
+        - .
+    """
+    return name.replace('-', '_').replace('.', '_').lower()
+
+def version_module_case(name:str) -> str:
+    """
+    :func:`.module_case` except ensure that it starts with "v"
+    """
+    name = module_case(name)
+    if not name.startswith('v'):
+        name = v + name
+    return name
+
+def relative_path(target: Path, origin: Path):
+    """
+    return path of target relative to origin, even if they're
+    not in the same subpath
+
+    References:
+        - https://stackoverflow.com/a/71874881
+    """
+    try:
+        return Path(target).resolve().relative_to(Path(origin).resolve())
+    except ValueError as e: # target does not start with origin
+        # recursion with origin (eventually origin is root so try will succeed)
+        return Path('..').joinpath(relative_path(target, Path(origin).parent))
+
--- a/nwb_linkml/src/nwb_linkml/providers/init.py
+++ b/nwb_linkml/src/nwb_linkml/providers/init.py
--- a/nwb_linkml/src/nwb_linkml/providers/schema.py
+++ b/nwb_linkml/src/nwb_linkml/providers/schema.py
@ -0,0 +1,370 @@
+"""
+Class for managing, building, and caching built schemas.
+
+The nwb.core and hdmf-common schema are statically built and stored in this repository,
+but to make it feasible to use arbitrary schema, eg. those stored inside of
+an NWB file, we need a bit of infrastructure for generating and caching
+pydantic models on the fly.
+
+Relationship to other modules:
+- :mod:`.adapters` manage the conversion from NWB schema language to linkML.
+- :mod:`.generators` create models like pydantic models from the linkML schema
+- :mod:`.providers` then use ``adapters`` and ``generators`` to provide models
+  from generated schema!
+"""
+from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict
+from pathlib import Path
+import os
+from abc import abstractmethod
+
+from linkml_runtime.linkml_model import SchemaDefinition
+from linkml_runtime.dumpers import yaml_dumper
+from linkml_runtime import SchemaView
+
+from nwb_linkml.config import Config
+from nwb_linkml import io
+from nwb_linkml import adapters
+from nwb_linkml.adapters.adapter import BuildResult
+from nwb_linkml.maps.naming import module_case, version_module_case, relative_path
+from nwb_schema_language import Namespaces
+from nwb_linkml.generators.pydantic import NWBPydanticGenerator
+
+class NamespaceVersion(TypedDict):
+    namespace: str
+    version: str
+
+P = TypeVar('P')
+
+class Provider:
+    """
+    Metaclass for different kind of providers!
+    """
+    PROVIDES: str
+    PROVIDES_CLASS: P = None
+
+    def __init__(self,
+                 path: Optional[Path] = None,
+                 verbose: bool = True):
+        if path is not None:
+            config = Config(cache_dir=path)
+        else:
+            config = Config()
+        self.config = config
+        self.cache_dir = config.cache_dir
+
+    @abstractmethod
+    @property
+    def path(self) -> Path:
+        """
+        Base path for this kind of provider
+        """
+
+    @abstractmethod
+    def build(self, *args: Any):
+        """
+        Whatever needs to be done to build this thing, if applicable
+        """
+
+
+    def namespace_path(
+            self,
+            namespace: str,
+            version: Optional[str] = None) -> Path:
+        """
+        Get the location for a given namespace of this type.
+
+        Note that we don't check for existence, because this method should
+        also be used when generating schema --- this is the canonical location
+
+        Arguments:
+            namespace (str): Namespace to get!
+            version (str): Optional, version of namespace. If ``None``,
+                either get the most recent version built, or if
+                ``namespace`` is ``core`` or ``hdmf-common``, use the
+                modules provided with the package. We do not use the most
+                recent *version*, but the most recently *generated* version
+                because it's assumed that's the one you want if you're just
+                gesturally reaching for one.
+        """
+        namespace_module = module_case(namespace)
+        namespace_path = self.path / namespace_module
+        if not namespace_path.exists() and namespace in ('core', 'hdmf-common'):
+            # return builtins
+            if self.PROVIDES == 'linkml':
+                from nwb_linkml import schema
+                namespace_path =  Path(schema.__file__)
+            elif self.PROVIDES == 'pydantic':
+                from nwb_linkml import models
+                namespace_path = Path(models.__file__)
+
+        if version is not None:
+            version_path = namespace_path / version_module_case(version)
+        else:
+            # or find the most recently built one
+            versions = sorted(namespace_path.iterdir(), key=os.path.getmtime)
+            if len(versions) == 0:
+                raise FileNotFoundError('No version provided, and no existing schema found')
+            version_path = versions[-1]
+
+        return version_path
+
+
+
+
+class LinkMLProvider(Provider):
+    PROVIDES = 'linkml'
+    PROVIDES_CLASS = SchemaDefinition
+
+    @property
+    def path(self) -> Path:
+        return self.config.linkml_dir
+
+    def build_from_yaml(self, path: Path, **kwargs):
+        """
+        Build a namespace's schema
+
+        Arguments:
+            path (:class:`pathlib.Path`): Path to the namespace .yaml
+            kwargs: passed to :meth:`.build`
+        """
+        sch = {}
+        ns_dict = io.schema.load_yaml(path)
+        sch['namespace'] = ns_dict
+        namespace = Namespaces(**ns_dict)
+
+        for ns in namespace.namespaces:
+            for schema in ns.schema_:
+                if schema.source is None:
+                    # this is normal, we'll resolve later
+                    continue
+                yml_file = path.parent / schema.source
+                sch[yml_file.stem] = (io.schema.load_yaml(yml_file))
+
+        return self.build(schemas=sch, **kwargs)
+
+    def build(
+        self,
+        schemas:Dict[str, dict],
+        versions: Optional[List[NamespaceVersion]] = None,
+        dump: bool = True,
+    ) -> BuildResult:
+        """
+        Arguments:
+            schemas (dict): A dictionary of ``{'schema_name': {:schema_definition}}``.
+                The "namespace" schema should have the key ``namespace``, which is used
+                to infer version and schema name. Post-load maps should have already
+                been applied
+            versions (List[NamespaceVersion]): List of specific versions to use
+                for cross-namespace imports. If none is provided, use the most recent version
+                available.
+            dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
+        """
+        ns = Namespaces(**schemas['namespace'])
+        typed_schemas = [
+            io.schema.load_schema_file(
+                path=Path(key + ".yaml"),
+                yaml=val)
+            for key,val in schemas.items()
+            if key != 'namespace'
+        ]
+        ns_adapter = adapters.NamespacesAdapter(
+            namespaces=ns,
+            schemas=typed_schemas
+        )
+        self._find_imports(ns_adapter, versions, populate=True)
+        built = ns_adapter.build()
+
+        # write schemas to yaml files
+        namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()]
+        for ns_linkml in namespace_sch:
+            version = ns_adapter.versions[ns_linkml.name]
+            version_path = self.namespace_path(ns_linkml.name, version)
+            with open(version_path / 'namespace.yaml', 'w') as ns_f:
+                yaml_dumper.dump(ns_linkml, version_path)
+            # write the schemas for this namespace
+            ns_schema_names = ns_adapter.namespace_schemas(ns_linkml.name)
+            other_schema = [sch for sch in built.schemas if sch.name in ns_schema_names]
+            for sch in other_schema:
+                output_file = version_path / (sch.name + '.yaml')
+                yaml_dumper.dump(sch, output_file)
+
+        return built
+
+    def get(self, namespace: str, version: Optional[str] = None) -> SchemaView:
+        """
+        Get a schema view over the namespace
+        """
+        path = self.namespace_path(namespace, version) / 'namespace.yaml'
+        return SchemaView(path)
+
+
+    def _find_imports(self,
+                      ns: adapters.NamespacesAdapter,
+                      versions: Optional[List[NamespaceVersion]] = None,
+                      populate: bool=True) -> Dict[str, List[str]]:
+        """
+        Find relative paths to other linkml schema that need to be
+        imported, but lack an explicit source
+
+        Arguments:
+            ns (:class:`.NamespacesAdapter`): Namespaces to find imports to
+            versions (List[:class:`.NamespaceVersion`]): Specific versions to import
+            populate (bool): If ``True`` (default), modify the namespace adapter to include the imports,
+                otherwise just return
+
+        Returns:
+            dict of lists for relative paths to other schema namespaces
+        """
+        import_paths = {}
+        for ns_name, needed_imports in ns.needed_imports.items():
+            our_path = self.namespace_path(ns_name, ns.versions[ns_name]) / 'namespace.yaml'
+            import_paths[ns_name] = []
+            for needed_import in needed_imports:
+                needed_version = None
+                if versions:
+                    needed_versions = [v['version'] for v in versions if v['namespace'] == needed_import]
+                    if len(needed_versions) > 0:
+                        needed_version = needed_versions[0]
+
+                version_path = self.namespace_path(needed_import, needed_version) / 'namespace.yaml'
+                import_paths[ns_name].append(str(relative_path(version_path, our_path)))
+
+            if populate:
+                for sch in ns.schemas:
+                    sch.imports.extend(import_paths)
+
+        return import_paths
+
+
+class PydanticProvider(Provider):
+    PROVIDES = 'pydantic'
+
+    @property
+    def path(self) -> Path:
+        return self.config.pydantic_dir
+
+    def build(
+            self,
+            namespace: str | Path,
+            version: Optional[str] = None,
+            versions: Optional[List[NamespaceVersion]] = None,
+            dump: bool = True
+    ) -> str:
+        if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')):
+            # we're given a name of a namespace to build
+            path = LinkMLProvider(path=self.config.cache_dir).namespace_path(namespace, version) / 'namespace.yaml'
+        else:
+            # given a path to a namespace linkml yaml file
+            path = Path(namespace)
+
+        generator = NWBPydanticGenerator(
+            str(path),
+            split=False,
+            versions=versions,
+            emit_metadata=True,
+            gen_slots=True,
+            pydantic_version='2'
+        )
+        serialized = generator.serialize()
+        if dump:
+            out_file = self.path / path.parts[-3] / path.parts[-2] / 'namespace.py'
+            with open(out_file, 'w') as ofile:
+                ofile.write(serialized)
+
+        return serialized
+
+
+class SchemaProvider:
+    """
+    Class to manage building and caching linkml and pydantic models generated
+    from nwb schema language
+
+    Behaves like a singleton without needing to be one - since we're working off
+    caches on disk that are indexed by hash in most "normal" conditions you should
+    be able to use this anywhere, though no file-level locks are present to ensure
+    consistency.
+
+    Store each generated schema in a directory structure indexed by
+    schema namespace name and a truncated hash of the loaded schema dictionaries
+    (not the hash of the .yaml file, since we are also provided schema in nwbfiles)
+
+    eg:
+
+        cache_dir
+          - linkml
+            - nwb_core
+              - hash_532gn90f
+                - nwb.core.namespace.yaml
+                - nwb.fore.file.yaml
+                - ...
+              - hash_fuia082f
+                - nwb.core.namespace.yaml
+                - ...
+            - my_schema
+              - hash_t3tn908h
+                - ...
+          - pydantic
+            - nwb_core
+              - hash_532gn90f
+                - core.py
+                - ...
+              - hash_fuia082f
+                - core.py
+                - ...
+
+    """
+
+    def __init__(self,
+                 path: Optional[Path] = None,
+                 verbose: bool = True):
+        """
+        Arguments:
+            path (bool): If provided, output to an explicit base directory.
+                Otherwise use that provided in ``NWB_LINKML_CACHE_DIR``
+            verbose (bool): If ``True`` (default), show progress bars and other messages
+                useful for interactive use
+        """
+        if path is not None:
+            config = Config(cache_dir=path)
+        else:
+            config = Config()
+        self.cache_dir = config.cache_dir
+        self.pydantic_dir = config.pydantic_dir
+        self.linkml_dir = config.linkml_dir
+
+        self.verbose = verbose
+
+
+
+    def generate_linkml(
+            self,
+            schemas:Dict[str, dict],
+            versions: Optional[List[NamespaceVersion]] = None
+    ):
+        """
+        Generate linkml from loaded nwb schemas, either from yaml or from an
+        nwb file's ``/specifications`` group.
+
+        Arguments:
+            schemas (dict): A dictionary of ``{'schema_name': {:schema_definition}}``.
+                The "namespace" schema should have the key ``namespace``, which is used
+                to infer version and schema name. Post-load maps should have already
+                been applied
+            versions (List[NamespaceVersion]): List of specific versions to use
+                for cross-namespace imports. If none is provided, use the most recent version
+                available.
+        """
+
+
+
+
+
+
+
+
+
+
+
+
+
+