more docs work

move conftest to root so we find doctests? maybe we don't want to keep that tho
2025-01-09 13:44:27 +00:00 · 2023-10-19 21:20:50 -07:00 · 2023-10-19 21:20:50 -07:00 · eac5ef4c80
commit eac5ef4c80
parent 23a5412854
20 changed files with 422 additions and 159 deletions
--- a/docs/Makefile
+++ b/docs/Makefile
@ -16,7 +16,7 @@ help:
 serve:
 	# env variable that makes it so we don't build all the models while in dev mode
-	SPHINX_MINIMAL="True" sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \
+	SPHINX_MINIMAL="True" sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) \
 	  --watch ../nwb_linkml/src/nwb_linkml \
 	  --watch ../nwb_schema_language/src/nwb_schema_language \
 	  --re-ignore ".*jupyter_execute.*"
--- a/docs/_notes/schema.md
+++ b/docs/_notes/schema.md
@ -54,7 +54,7 @@ dtypes can be
 eg: 
-```yml
+```yaml
 - neurodata_type_def: Image
  neurodata_type_inc: NWBData
  dtype: numeric
@ -122,7 +122,7 @@ we need to map:
 - dims, shape, and dtypes: these should have been just attributes rather than put in the spec
  language, so we'll just make an Array class and use that.
  - dims and shape should probably be a dictionary so you don't need a zillion nulls, eg rather than 
-  ```yml
+  ```yaml
  dims:
  - - x
    - y
@ -137,7 +137,7 @@ we need to map:
    - 3
  ```
  do
-  ```yml
+  ```yaml
  dims:
  - - name: x
    - name: y
@ -147,7 +147,7 @@ we need to map:
      shape: 3
  ```
  or even
-  ```yml
+  ```yaml
  dims:
  - - x
    - y
@ -160,7 +160,7 @@ we need to map:
  And also is there any case that would break where there is some odd dependency between dims where it wouldn't work to just use an `optional` param
-  ```yml
+  ```yaml
  dims:
  - name: x
    shape: null
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@ -18,5 +18,22 @@ pre {
 }
 .hide.below-input span {
-  color: var(--color-background-secondary);
+  color: var(--color-highlighted-text);
 }
 div.cell details.below-input > summary {
  background-color: var(--color-background-primary);
 }
 .cell_output pre {
  border: 1px solid var(--color-code-foreground);
  padding: 1em;
  border-radius: 5px;
 }
 div.cell div.cell_input, div.cell details.above-input > summary {
  border-left-color: unset;
 }
 div.cell details.below-input > summary {
  border-left-color: unset;
 }
--- a/docs/api/nwb_linkml/adapters/index.md
+++ b/docs/api/nwb_linkml/adapters/index.md
@ -1,6 +1,17 @@
 # Adapters
 Adapters translate NWB Schema Language to LinkML Schema.
 - [**Adapter**](adapter.md) - Base Adapter Classes
 - [**Namespaces**](namespaces.md) - Top-level container of NWB namespace indices and schema
 - [**Schema**](schema.md) - Individual NWB Schema files within a namespace
 - [**Classes**](classes.md) - Root methods shared between classes and groups
  - [**Dataset**](dataset.md) - ... Datasets!
  - [**Group**](group.md) - Groups!
 ```{toctree}
 :hidden:
 adapter
 classes
 dataset
--- a/docs/conf.py
+++ b/docs/conf.py
@ -29,7 +29,8 @@ extensions = [
    "sphinx_design",
    #'myst_parser',
    "myst_nb",
-    'sphinx_togglebutton'
+    'sphinx_togglebutton',
    'sphinx.ext.todo'
 ]
@ -102,10 +103,23 @@ autodoc_default_options = {
 nb_render_markdown_format = 'myst'
 nb_append_css = False
-
+# --------------------------------------------------
 # doctest
 doctest_global_setup = """
 from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition, SchemaDefinition
 from nwb_schema_language import Namespaces, Namespace, Dataset, Group, Schema
 from linkml_runtime.dumpers import yaml_dumper
 import yaml
 from pydantic import BaseModel, Field
 import numpy as np
-"""
+
 from nwb_linkml.adapters import BuildResult
 """
 # --------------------------------------------------
 # Etc one-off settings
 # todo
 todo_include_todos = True
 todo_link_only = True
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@ -101,7 +101,16 @@ and `ClassDefinition` classes:
 print(core_linkml.schemas[0])
 ```
-
+## Generating Pydantic Models
-
+
 ```{todo}
 Document Pydantic model generation
 ```
 ## Caching Output with Providers
 ```{todo}
 Document provider usage
 ```
--- a/docs/index.md
+++ b/docs/index.md
@ -11,6 +11,7 @@ to [LinkML](https://linkml.io/).
 * [Purpose](intro/purpose) - Why this package exists
 * [Overview](guide/overview) - Overview of how it works
 * [API Docs](api/nwb_linkml/index) - Ok *really* how it works
 * [TODO](meta/todo) - The work that remains to be done
 ```
 `nwb-linkml` is an independent implementation of the standard capable of:
@ -25,9 +26,6 @@ to [LinkML](https://linkml.io/).
 ## Example Translation
 ## TimeSeries
 (Abbreviated for clarity)
 `````{tab-set}
@ -265,17 +263,6 @@ api/nwb_linkml/index
 api/nwb_schema_language/index
 api/nwb_linkml/schema/index
 ```
 ```{toctree}
 :caption: Notes
 :maxdepth: 3
 :hidden:
 _notes/linkml
 _notes/pynwb
 _notes/schema
 _notes/translation
 ```
 ````
 ````{only} full
--- a/docs/intro/index.md
+++ b/docs/intro/index.md
@ -1,10 +0,0 @@
 # Introduction
 ```{toctree}
 :caption: Introduction:
 :maxdepth: 3
 purpose
 nwb
 translation
 ```
--- a/docs/meta/todo.md
+++ b/docs/meta/todo.md
@ -5,4 +5,9 @@ Important things that are not implemented yet!
 - {meth}`nwb_linkml.adapters.classes.ClassAdapter.handle_dtype` does not yet handle compound dtypes,
  leaving them as `AnyType` instead. This is fine for a first draft since they are used rarely within
  NWB, but we will need to handle them by making slots for each of the dtypes since they typically
-  represent table-like data.
+  represent table-like data.
 ## Docs TODOs
 ```{todolist}
 ```
--- a/nwb_linkml/conftest.py
+++ b/nwb_linkml/conftest.py
@ -0,0 +1,20 @@
 import os
 import pytest
 from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
 from tests.fixtures import tmp_output_dir
 from sybil import Sybil
 from sybil.parsers.rest import DocTestParser, PythonCodeBlockParser
 pytest_collect_file = Sybil(
    parsers=[
        DocTestParser(optionflags=ELLIPSIS + NORMALIZE_WHITESPACE),
        PythonCodeBlockParser(),
    ],
    patterns=['*.py'],
 ).pytest()
@pytest.fixture(autouse=True, scope='session')
 def set_config_vars(tmp_output_dir):
    os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir)
--- a/nwb_linkml/poetry.lock
+++ b/nwb_linkml/poetry.lock
@ -2411,6 +2411,21 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
 pymysql = ["pymysql"]
 sqlcipher = ["sqlcipher3-binary"]
 [[package]]
 name = "sybil"
 version = "5.0.3"
 description = "Automated testing for the examples in your code and documentation."
 optional = true
 python-versions = ">=3.7"
 files = [
    {file = "sybil-5.0.3-py3-none-any.whl", hash = "sha256:6f3c30822169895c4fb34c8366bdb132cf62bb68fb1d03d2ebb05282eab08c95"},
    {file = "sybil-5.0.3.tar.gz", hash = "sha256:20dfe3a35a8d1ffcb4311434d1abf38c030c91064d75ff6b56ddd1060e08e758"},
 ]
 [package.extras]
 build = ["furo", "sphinx", "twine", "urllib3 (<2)", "wheel"]
 test = ["myst-parser", "pytest (>=7.1.0)", "pytest-cov", "seedir", "testfixtures"]
 [[package]]
 name = "tenacity"
 version = "8.2.3"
@ -2686,4 +2701,4 @@ tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pyt
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.13"
-content-hash = "a3ce11b1eb6ecff528a0a7bce0695368a24218766d6eb0385002fb4e8d3ea8b4"
+content-hash = "673c81c4d38d7d0014804459ac152837e6f19bee122a5ea632c3499e86eaed9f"
--- a/nwb_linkml/pyproject.toml
+++ b/nwb_linkml/pyproject.toml
@ -27,6 +27,10 @@ linkml = "^1.6.1"
 nptyping = "^2.5.0"
 pydantic = "^2.3.0"
 h5py = "^3.9.0"
 pydantic-settings = "^2.0.3"
 dask = "^2023.9.2"
 blosc2 = "^2.2.7"
 tqdm = "^4.66.1"
 dash = {version="^2.12.1", optional=true}
 dash-cytoscape = {version="^0.3.0", optional=true}
 pytest = { version="^7.4.0", optional=true}
@ -36,16 +40,13 @@ pytest-md = {version = "^0.2.0", optional = true}
 pytest-cov = {version = "^4.1.0", optional = true}
 coveralls = {version = "^3.3.1", optional = true}
 pytest-profiling = {version = "^1.7.0", optional = true}
-pydantic-settings = "^2.0.3"
+sybil = {version = "^5.0.3", optional = true}
 dask = "^2023.9.2"
 blosc2 = "^2.2.7"
 tqdm = "^4.66.1"
 [tool.poetry.extras]
 tests = [
-    "pytest",       "pytest-depends", "coverage",  "pytest-md",
+    "pytest",       "pytest-depends", "coverage",         "pytest-md",
-    "pytest-cov",   "coveralls",      "pytest-profiling"
+    "pytest-cov",   "coveralls",      "pytest-profiling", "sybil"
 ]
 plot = ["dash", "dash-cytoscape"]
@ -61,6 +62,7 @@ pytest-md = "^0.2.0"
 pytest-cov = "^4.1.0"
 coveralls = "^3.3.1"
 pytest-profiling = "^1.7.0"
 sybil = "^5.0.3"
 [tool.poetry.group.plot]
 optional = true
@ -79,12 +81,13 @@ addopts = [
    "--cov=nwb_linkml",
    "--cov-append",
    "--cov-config=.coveragerc",
-    "--doctest-modules",
+    "-p no:doctest",
    "--ignore=tests/__tmp__"
 ]
 testpaths = [
    "tests",
-    'nwb_linkml/tests'
+    'nwb_linkml/tests',
    'src/nwb_linkml'
 ]
 doctest_optionflags = "NORMALIZE_WHITESPACE"
 filterwarnings = [
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -25,18 +25,73 @@ class DatasetMap(Map):
    @classmethod
    @abstractmethod
-    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
+    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        pass # pragma: no cover
 class MapScalar(DatasetMap):
    """
    Datasets that are just a single value should just be a scalar value, not an array with size 1
-    Replace the built class with
+    Replaces the built class with a slot.
    Examples:
        .. grid:: 2
            :gutter: 1
            :margin: 0
            :padding: 0
            .. grid-item-card::
                :margin: 0
                NWB Schema
                ^^^
                .. code-block:: yaml
                    datasets:
                    - name: MyScalar
                      doc: A scalar
                      dtype: int32
                      quantity: '?'
            .. grid-item-card::
                :margin: 0
                LinkML
                ^^^
                .. code-block:: yaml
                    attributes:
                    - name: MyScalar
                      description: A scalar
                      multivalued: false
                      range: int32
                      required: false
    """
    @classmethod
    def check(c, cls:Dataset) -> bool:
        """
        .. list-table::
            :header-rows: 1
            :align: left
            * - Attr
              - Value
            * - ``neurodata_type_inc``
              - ``None``
            * - ``attributes``
              - ``None``
            * - ``dims``
              - ``None``
            * - ``shape``
              - ``None``
            * - ``name``
              - ``str``
        """
        if cls.neurodata_type_inc != 'VectorData' and \
                not cls.neurodata_type_inc and \
                not cls.attributes and \
@ -48,7 +103,7 @@ class MapScalar(DatasetMap):
            return False
    @classmethod
-    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
+    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        this_slot = SlotDefinition(
            name=cls.name,
            description=cls.doc,
@ -65,6 +120,25 @@ class MapScalarAttributes(DatasetMap):
    """
    @classmethod
    def check(c, cls:Dataset) -> bool:
        """
        .. list-table::
            :header-rows: 1
            :align: left
            * - Attr
              - Value
            * - ``neurodata_type_inc``
              - ``None``
            * - ``attributes``
              - Truthy
            * - ``dims``
              - ``None``
            * - ``shape``
              - ``None``
            * - ``name``
              - ``str``
        """
        if cls.neurodata_type_inc != 'VectorData' and \
             not cls.neurodata_type_inc and \
             cls.attributes and \
@ -76,7 +150,7 @@ class MapScalarAttributes(DatasetMap):
            return False
    @classmethod
-    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
+    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        value_slot = SlotDefinition(
            name='value',
            range=ClassAdapter.handle_dtype(cls.dtype),
@ -98,7 +172,7 @@ class MapListlike(DatasetMap):
            return False
    @classmethod
-    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
+    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype))
        slot = SlotDefinition(
            name=dtype,
@ -125,7 +199,7 @@ class MapArraylike(DatasetMap):
            return False
    @classmethod
-    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
+    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        array_class = make_arraylike(cls, name)
        name = camel_to_snake(cls.name)
        res = BuildResult(
@ -171,7 +245,7 @@ class MapArrayLikeAttributes(DatasetMap):
            return False
    @classmethod
-    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
+    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        array_class = make_arraylike(cls, name)
        # make a slot for the arraylike class
        array_slot = SlotDefinition(
@ -183,6 +257,90 @@ class MapArrayLikeAttributes(DatasetMap):
        res.classes[0].attributes.update({'array': array_slot})
        return res
 # --------------------------------------------------
 # DynamicTable special cases
 # --------------------------------------------------
 class Map1DVector(DatasetMap):
    """
    ``VectorData`` is subclassed with a name but without dims or attributes, treat this as a normal 1D array
    slot that replaces any class that would be built for this
    """
    @classmethod
    def check(c, cls:Dataset) -> bool:
        if cls.neurodata_type_inc == 'VectorData' and \
                not cls.dims and \
                not cls.shape and \
                not cls.attributes \
                and cls.name:
            return True
        else:
            return False
    @classmethod
    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        this_slot = SlotDefinition(
            name=cls.name,
            description=cls.doc,
            range=ClassAdapter.handle_dtype(cls.dtype),
            multivalued=True
        )
        # No need to make a class for us, so we replace the existing build results
        res = BuildResult(slots=[this_slot])
        return res
 class MapNVectors(DatasetMap):
    """
    An unnamed container that indicates an arbitrary quantity of some other neurodata type.
    Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
    arbitrary columns.
    """
    @classmethod
    def check(c, cls:Dataset) -> bool:
        if cls.name is None and \
            cls.neurodata_type_def is None and \
            cls.neurodata_type_inc and \
            cls.quantity in ('*', '+'):
            #cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \
            return True
        else:
            return False
    @classmethod
    def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult:
        this_slot = SlotDefinition(
            name=camel_to_snake(cls.neurodata_type_inc),
            description=cls.doc,
            range=cls.neurodata_type_inc,
            **QUANTITY_MAP[cls.quantity]
        )
        # No need to make a class for us, so we replace the existing build results
        res = BuildResult(slots=[this_slot])
        return res
 class DatasetAdapter(ClassAdapter):
    cls: Dataset
    def build(self) -> BuildResult:
        res = self.build_base()
        # find a map to use
        matches = [m for m in DatasetMap.__subclasses__() if m.check(self.cls)]
        if len(matches) > 1: # pragma: no cover
            raise RuntimeError(f"Only one map should apply to a dataset, you need to refactor the maps! Got maps: {matches}")
        # apply matching maps
        for m in matches:
            res = m.apply(self.cls, res, self._get_full_name())
        return res
 def make_arraylike(cls:Dataset, name:Optional[str] = None) -> ClassDefinition:
    # The schema language doesn't have a way of specifying a dataset/group is "abstract"
    # and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
@ -252,7 +410,7 @@ def make_arraylike(cls:Dataset, name:Optional[str] = None) -> ClassDefinition:
    else:
        raise ValueError(f"Dataset has no name or type definition, what do call it?")
-    name = '__'.join([name, 'Array'])
+    name = '__'.join([name, 'Arraylike'])
    array_class = ClassDefinition(
        name=name,
@ -280,87 +438,3 @@ def has_attrs(cls:Dataset) -> bool:
        return True
    else:
        return False
 # --------------------------------------------------
 # DynamicTable special cases
 # --------------------------------------------------
 class Map1DVector(DatasetMap):
    """
    ``VectorData`` is subclassed with a name but without dims or attributes, treat this as a normal 1D array
    slot that replaces any class that would be built for this
    """
    @classmethod
    def check(c, cls:Dataset) -> bool:
        if cls.neurodata_type_inc == 'VectorData' and \
                not cls.dims and \
                not cls.shape and \
                not cls.attributes \
                and cls.name:
            return True
        else:
            return False
    @classmethod
    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
        this_slot = SlotDefinition(
            name=cls.name,
            description=cls.doc,
            range=ClassAdapter.handle_dtype(cls.dtype),
            multivalued=True
        )
        # No need to make a class for us, so we replace the existing build results
        res = BuildResult(slots=[this_slot])
        return res
 class MapNVectors(DatasetMap):
    """
    An unnamed container that indicates an arbitrary quantity of some other neurodata type.
    Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
    arbitrary columns.
    """
    @classmethod
    def check(c, cls:Dataset) -> bool:
        if cls.name is None and \
            cls.neurodata_type_def is None and \
            cls.neurodata_type_inc and \
            cls.quantity in ('*', '+'):
            #cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \
            return True
        else:
            return False
    @classmethod
    def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
        this_slot = SlotDefinition(
            name=camel_to_snake(cls.neurodata_type_inc),
            description=cls.doc,
            range=cls.neurodata_type_inc,
            **QUANTITY_MAP[cls.quantity]
        )
        # No need to make a class for us, so we replace the existing build results
        res = BuildResult(slots=[this_slot])
        return res
 class DatasetAdapter(ClassAdapter):
    cls: Dataset
    def build(self) -> BuildResult:
        res = self.build_base()
        # find a map to use
        matches = [m for m in DatasetMap.__subclasses__() if m.check(self.cls)]
        if len(matches) > 1: # pragma: no cover
            raise RuntimeError(f"Only one map should apply to a dataset, you need to refactor the maps! Got maps: {matches}")
        # apply matching maps
        for m in matches:
            res = m.apply(res, self.cls, self._get_full_name())
        return res
--- a/nwb_linkml/src/nwb_linkml/annotations.py
+++ b/nwb_linkml/src/nwb_linkml/annotations.py
@ -25,7 +25,3 @@ def get_inner_types(annotation) -> List[Any]:
            types.extend(get_inner_types(arg))
    return types
 def take_outer_type(annotation):
    if typing.get_origin(annotation) is list:
        return list
    return annotation
--- a/nwb_linkml/src/nwb_linkml/io/hdf5.py
+++ b/nwb_linkml/src/nwb_linkml/io/hdf5.py
@ -56,6 +56,38 @@ class HDF5IO():
    def read(self, path:str) -> BaseModel | Dict[str, BaseModel]: ...
    def read(self, path:Optional[str] = None) -> Union['NWBFile', BaseModel, Dict[str, BaseModel]]:
        """
        Read data into models from an NWB File.
        The read process is in several stages:
        * Use :meth:`.make_provider` to generate any needed LinkML Schema or Pydantic Classes using a :class:`.SchemaProvider`
        * :func:`flatten_hdf` file into a :class:`.ReadQueue` of nodes.
        * Apply the queue's :class:`ReadPhases` :
            * ``plan`` - trim any blank nodes, sort nodes to read, etc.
            * ``read`` - load the actual data into temporary holding objects
            * ``construct`` - cast the read data into models.
        Read is split into stages like this to handle references between objects, where the read result of one node
        might depend on another having already been completed. It also allows us to parallelize the operations
        since each mapping operation is independent of the results of all the others in that pass.
        .. todo::
            Implement reading, skipping arrays - they are fast to read with the ArrayProxy class
            and dask, but there are times when we might want to leave them out of the read entirely.
            This might be better implemented as a filter on ``model_dump`` , but to investigate further
            how best to support reading just metadata, or even some specific field value, or if
            we should leave that to other implementations like eg. after we do SQL export then
            not rig up a whole query system ourselves.
        Args:
            path (Optional[str]): If ``None`` (default), read whole file. Otherwise, read from specific (hdf5) path and its children
        Returns:
            ``NWBFile`` if ``path`` is ``None``, otherwise whatever Model or dictionary of models applies to the requested ``path``
        """
        provider = self.make_provider()
@ -91,6 +123,31 @@ class HDF5IO():
        else:
            return queue.completed[path].result
    def write(self, path: Path):
        """
        Write to NWB file
        .. todo::
            Implement HDF5 writing.
            Need to create inverse mappings that can take pydantic models to
            hdf5 groups and datasets. If more metadata about the generation process
            needs to be preserved (eg. explicitly notating that something is an attribute,
            dataset, group, then we can make use of the :class:`~nwb_linkml.generators.pydantic.LinkML_Meta`
            model. If the model to edit has been loaded from an HDF5 file (rather than
            freshly created), then the ``hdf5_path`` should be populated making
            mapping straightforward, but we probably want to generalize that to deterministically
            get hdf5_path from position in the NWBFile object -- I think that might
            require us to explicitly annotate when something is supposed to be a reference
            vs. the original in the model representation, or else it's ambiguous.
            Otherwise, it should be a matter of detecting changes from file if it exists already,
            and then write them.
        """
        raise NotImplementedError('Writing to HDF5 is not implemented yet!')
    def make_provider(self) -> SchemaProvider:
        """
        Create a :class:`~.providers.schema.SchemaProvider` by
@ -122,13 +179,13 @@ class HDF5IO():
 def read_specs_as_dicts(group: h5py.Group) -> dict:
    """
    Utility function to iterate through the `/specifications` group and
-    load
+    load the schemas from it.
    Args:
-        group:
+        group ( :class:`h5py.Group` ): the ``/specifications`` group!
    Returns:
-
+        ``dict`` of schema.
    """
    spec_dict = {}
    def _read_spec(name, node):
@ -158,6 +215,10 @@ def find_references(h5f: h5py.File, path: str) -> List[str]:
        This is extremely slow because we collect all references first,
        rather than checking them as we go and quitting early. PR if you want to make this faster!
    .. todo::
        Test :func:`.find_references` !
    Args:
        h5f (:class:`h5py.File`): Open hdf5 file
        path (str): Path to search for references to
--- a/nwb_linkml/src/nwb_linkml/io/schema.py
+++ b/nwb_linkml/src/nwb_linkml/io/schema.py
@ -15,9 +15,12 @@ from nwb_linkml.adapters.namespaces import NamespacesAdapter
 from nwb_linkml.adapters.schema import SchemaAdapter
-def load_yaml(path:Path) -> dict:
+def load_yaml(path:Path|str) -> dict:
-    with open(path, 'r') as file:
+    if isinstance(path, str) and not Path(path).exists():
-        ns_dict = yaml.safe_load(file)
+        ns_dict = yaml.safe_load(path)
    else:
        with open(path, 'r') as file:
            ns_dict = yaml.safe_load(file)
    ns_dict = apply_postload(ns_dict)
    return ns_dict
--- a/nwb_linkml/src/nwb_linkml/providers/git.py
+++ b/nwb_linkml/src/nwb_linkml/providers/git.py
@ -128,8 +128,8 @@ class GitRepo:
        If ``None``: if :attr:`NamespaceRepo.versions`, use the last version. Otherwise use ``HEAD``
-        Should match :prop:`.active_commit`, differs semantically in that it is used to
+        Should match :attr:`.active_commit`, differs semantically in that it is used to
-        set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out
+        set the active_commit, while :attr:`.active_commit` reads what commit is actually checked out
        """
        return self._commit
--- a/nwb_linkml/src/nwb_linkml/providers/schema.py
+++ b/nwb_linkml/src/nwb_linkml/providers/schema.py
@ -14,6 +14,8 @@ Relationship to other modules:
 Providers create a set of directories with namespaces and versions,
 so eg. for the linkML and pydantic providers:
 .. code-block:: yaml
    cache_dir
      - linkml
        - nwb_core
@ -280,6 +282,7 @@ class LinkMLProvider(Provider):
        >>> # Build a custom schema and then get it
        >>> # provider.build_from_yaml('myschema.yaml')
        >>> # my_schema = provider.get('myschema')
    """
    PROVIDES = 'linkml'
    PROVIDES_CLASS = SchemaDefinition
@ -357,7 +360,7 @@ class LinkMLProvider(Provider):
                to build
            versions (dict): Dict of specific versions to use
                for cross-namespace imports. as ``{'namespace': 'version'}``
-                 If none is provided, use the most recent version
+                If none is provided, use the most recent version
                available.
            dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
            force (bool): If ``False`` (default), don't build schema that already exist. If ``True`` , clear directory and rebuild
--- a/nwb_linkml/tests/conftest.py
+++ b/nwb_linkml/tests/conftest.py
@ -1,8 +0,0 @@
 import os
 import pytest
 from .fixtures import tmp_output_dir
@pytest.fixture(autouse=True, scope='session')
 def set_config_vars(tmp_output_dir):
    os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir)
--- a/nwb_linkml/tests/test_adapters/test_adapter_dataset.py
+++ b/nwb_linkml/tests/test_adapters/test_adapter_dataset.py
@ -4,7 +4,70 @@ import pytest
 from ..fixtures import nwb_core_fixture
 from nwb_schema_language import Namespaces, Namespace, Dataset, Group, Schema
 from linkml_runtime.dumpers import yaml_dumper
 import yaml
 from nwb_linkml.adapters import DatasetAdapter
 from nwb_linkml.adapters.dataset import (
    MapScalar,
    MapListlike,
    MapArraylike,
    MapNVectors,
    Map1DVector,
    MapScalarAttributes,
    MapArrayLikeAttributes
 )
 def test_nothing(nwb_core_fixture):
    pass
 def _compare_dicts(dict1, dict2) -> bool:
    """just in one direction - that all the entries in dict1 are in dict2"""
    assert all([dict1[k] == dict2[k] for k in dict1.keys()])
    #assert all([dict1[k] == dict2[k] for k in dict2.keys()])
 def test_map_scalar():
        model = {
            'name': 'MyScalar',
            'doc': 'This should be a scalar',
            'dtype': 'int32',
            'quantity': '?'
        }
        test = {
            'name': 'MyScalar',
            'description': 'This should be a scalar',
            'multivalued': False,
            'range': 'int32',
            'required': False
        }
        dataset = Dataset(**model)
        assert MapScalar.check(dataset)
        result = MapScalar.apply(dataset)
        assert len(result.classes) == 0
        _compare_dicts(test, result.slots[0])
 def test_map_scalar_attributes():
    pass
 def test_map_listlike():
    pass
 def test_map_arraylike():
    pass
 def test_map_arraylike_attributes():
    pass
 def test_map_1d_vector():
    pass
 def test_map_n_vectors():
    pass