From eac5ef4c80b2ba1ed751ddbe3348a34edaac8a68 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Thu, 19 Oct 2023 21:20:50 -0700 Subject: [PATCH] more docs work move conftest to root so we find doctests? maybe we don't want to keep that tho --- docs/Makefile | 2 +- docs/_notes/schema.md | 10 +- docs/_static/css/custom.css | 19 +- docs/api/nwb_linkml/adapters/index.md | 11 + docs/conf.py | 20 +- docs/guide/quickstart.md | 13 +- docs/index.md | 15 +- docs/intro/index.md | 10 - docs/meta/todo.md | 7 +- nwb_linkml/conftest.py | 20 ++ nwb_linkml/poetry.lock | 17 +- nwb_linkml/pyproject.toml | 19 +- nwb_linkml/src/nwb_linkml/adapters/dataset.py | 258 +++++++++++------- nwb_linkml/src/nwb_linkml/annotations.py | 4 - nwb_linkml/src/nwb_linkml/io/hdf5.py | 67 ++++- nwb_linkml/src/nwb_linkml/io/schema.py | 9 +- nwb_linkml/src/nwb_linkml/providers/git.py | 4 +- nwb_linkml/src/nwb_linkml/providers/schema.py | 5 +- nwb_linkml/tests/conftest.py | 8 - .../test_adapters/test_adapter_dataset.py | 63 +++++ 20 files changed, 422 insertions(+), 159 deletions(-) delete mode 100644 docs/intro/index.md create mode 100644 nwb_linkml/conftest.py delete mode 100644 nwb_linkml/tests/conftest.py diff --git a/docs/Makefile b/docs/Makefile index 887441f..556a005 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -16,7 +16,7 @@ help: serve: # env variable that makes it so we don't build all the models while in dev mode - SPHINX_MINIMAL="True" sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ + SPHINX_MINIMAL="True" sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) \ --watch ../nwb_linkml/src/nwb_linkml \ --watch ../nwb_schema_language/src/nwb_schema_language \ --re-ignore ".*jupyter_execute.*" diff --git a/docs/_notes/schema.md b/docs/_notes/schema.md index ab674a7..8f3b663 100644 --- a/docs/_notes/schema.md +++ b/docs/_notes/schema.md @@ -54,7 +54,7 @@ dtypes can be eg: -```yml +```yaml - neurodata_type_def: Image neurodata_type_inc: NWBData dtype: numeric @@ -122,7 +122,7 @@ we need to map: - dims, shape, and dtypes: these should have been just attributes rather than put in the spec language, so we'll just make an Array class and use that. - dims and shape should probably be a dictionary so you don't need a zillion nulls, eg rather than - ```yml + ```yaml dims: - - x - y @@ -137,7 +137,7 @@ we need to map: - 3 ``` do - ```yml + ```yaml dims: - - name: x - name: y @@ -147,7 +147,7 @@ we need to map: shape: 3 ``` or even - ```yml + ```yaml dims: - - x - y @@ -160,7 +160,7 @@ we need to map: And also is there any case that would break where there is some odd dependency between dims where it wouldn't work to just use an `optional` param - ```yml + ```yaml dims: - name: x shape: null diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css index cb3f768..aa14e39 100644 --- a/docs/_static/css/custom.css +++ b/docs/_static/css/custom.css @@ -18,5 +18,22 @@ pre { } .hide.below-input span { - color: var(--color-background-secondary); + color: var(--color-highlighted-text); +} + +div.cell details.below-input > summary { + background-color: var(--color-background-primary); +} + +.cell_output pre { + border: 1px solid var(--color-code-foreground); + padding: 1em; + border-radius: 5px; +} +div.cell div.cell_input, div.cell details.above-input > summary { + border-left-color: unset; +} + +div.cell details.below-input > summary { + border-left-color: unset; } \ No newline at end of file diff --git a/docs/api/nwb_linkml/adapters/index.md b/docs/api/nwb_linkml/adapters/index.md index f81273d..eca33e1 100644 --- a/docs/api/nwb_linkml/adapters/index.md +++ b/docs/api/nwb_linkml/adapters/index.md @@ -1,6 +1,17 @@ # Adapters +Adapters translate NWB Schema Language to LinkML Schema. + +- [**Adapter**](adapter.md) - Base Adapter Classes +- [**Namespaces**](namespaces.md) - Top-level container of NWB namespace indices and schema +- [**Schema**](schema.md) - Individual NWB Schema files within a namespace +- [**Classes**](classes.md) - Root methods shared between classes and groups + - [**Dataset**](dataset.md) - ... Datasets! + - [**Group**](group.md) - Groups! + ```{toctree} +:hidden: + adapter classes dataset diff --git a/docs/conf.py b/docs/conf.py index d1e40a1..f46e24d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,7 +29,8 @@ extensions = [ "sphinx_design", #'myst_parser', "myst_nb", - 'sphinx_togglebutton' + 'sphinx_togglebutton', + 'sphinx.ext.todo' ] @@ -102,10 +103,23 @@ autodoc_default_options = { nb_render_markdown_format = 'myst' nb_append_css = False - +# -------------------------------------------------- # doctest doctest_global_setup = """ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition, SchemaDefinition +from nwb_schema_language import Namespaces, Namespace, Dataset, Group, Schema +from linkml_runtime.dumpers import yaml_dumper +import yaml from pydantic import BaseModel, Field import numpy as np -""" \ No newline at end of file + +from nwb_linkml.adapters import BuildResult +""" + +# -------------------------------------------------- +# Etc one-off settings + +# todo +todo_include_todos = True +todo_link_only = True + diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md index 8c82f8c..721f1a6 100644 --- a/docs/guide/quickstart.md +++ b/docs/guide/quickstart.md @@ -101,7 +101,16 @@ and `ClassDefinition` classes: print(core_linkml.schemas[0]) ``` - - +## Generating Pydantic Models + +```{todo} +Document Pydantic model generation +``` + +## Caching Output with Providers + +```{todo} +Document provider usage +``` diff --git a/docs/index.md b/docs/index.md index ee11f4f..9ffc483 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,6 +11,7 @@ to [LinkML](https://linkml.io/). * [Purpose](intro/purpose) - Why this package exists * [Overview](guide/overview) - Overview of how it works * [API Docs](api/nwb_linkml/index) - Ok *really* how it works +* [TODO](meta/todo) - The work that remains to be done ``` `nwb-linkml` is an independent implementation of the standard capable of: @@ -25,9 +26,6 @@ to [LinkML](https://linkml.io/). ## Example Translation - -## TimeSeries - (Abbreviated for clarity) `````{tab-set} @@ -265,17 +263,6 @@ api/nwb_linkml/index api/nwb_schema_language/index api/nwb_linkml/schema/index ``` - -```{toctree} -:caption: Notes -:maxdepth: 3 -:hidden: - -_notes/linkml -_notes/pynwb -_notes/schema -_notes/translation -``` ```` ````{only} full diff --git a/docs/intro/index.md b/docs/intro/index.md deleted file mode 100644 index aa43050..0000000 --- a/docs/intro/index.md +++ /dev/null @@ -1,10 +0,0 @@ -# Introduction - -```{toctree} -:caption: Introduction: -:maxdepth: 3 - -purpose -nwb -translation -``` \ No newline at end of file diff --git a/docs/meta/todo.md b/docs/meta/todo.md index 2bb3886..f6cdd6f 100644 --- a/docs/meta/todo.md +++ b/docs/meta/todo.md @@ -5,4 +5,9 @@ Important things that are not implemented yet! - {meth}`nwb_linkml.adapters.classes.ClassAdapter.handle_dtype` does not yet handle compound dtypes, leaving them as `AnyType` instead. This is fine for a first draft since they are used rarely within NWB, but we will need to handle them by making slots for each of the dtypes since they typically - represent table-like data. \ No newline at end of file + represent table-like data. + +## Docs TODOs + +```{todolist} +``` \ No newline at end of file diff --git a/nwb_linkml/conftest.py b/nwb_linkml/conftest.py new file mode 100644 index 0000000..33836f3 --- /dev/null +++ b/nwb_linkml/conftest.py @@ -0,0 +1,20 @@ +import os +import pytest +from doctest import ELLIPSIS, NORMALIZE_WHITESPACE + +from tests.fixtures import tmp_output_dir + +from sybil import Sybil +from sybil.parsers.rest import DocTestParser, PythonCodeBlockParser + +pytest_collect_file = Sybil( + parsers=[ + DocTestParser(optionflags=ELLIPSIS + NORMALIZE_WHITESPACE), + PythonCodeBlockParser(), + ], + patterns=['*.py'], +).pytest() + +@pytest.fixture(autouse=True, scope='session') +def set_config_vars(tmp_output_dir): + os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir) diff --git a/nwb_linkml/poetry.lock b/nwb_linkml/poetry.lock index 372617d..0dba77f 100644 --- a/nwb_linkml/poetry.lock +++ b/nwb_linkml/poetry.lock @@ -2411,6 +2411,21 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3-binary"] +[[package]] +name = "sybil" +version = "5.0.3" +description = "Automated testing for the examples in your code and documentation." +optional = true +python-versions = ">=3.7" +files = [ + {file = "sybil-5.0.3-py3-none-any.whl", hash = "sha256:6f3c30822169895c4fb34c8366bdb132cf62bb68fb1d03d2ebb05282eab08c95"}, + {file = "sybil-5.0.3.tar.gz", hash = "sha256:20dfe3a35a8d1ffcb4311434d1abf38c030c91064d75ff6b56ddd1060e08e758"}, +] + +[package.extras] +build = ["furo", "sphinx", "twine", "urllib3 (<2)", "wheel"] +test = ["myst-parser", "pytest (>=7.1.0)", "pytest-cov", "seedir", "testfixtures"] + [[package]] name = "tenacity" version = "8.2.3" @@ -2686,4 +2701,4 @@ tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pyt [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "a3ce11b1eb6ecff528a0a7bce0695368a24218766d6eb0385002fb4e8d3ea8b4" +content-hash = "673c81c4d38d7d0014804459ac152837e6f19bee122a5ea632c3499e86eaed9f" diff --git a/nwb_linkml/pyproject.toml b/nwb_linkml/pyproject.toml index 770f05c..cf8c7fe 100644 --- a/nwb_linkml/pyproject.toml +++ b/nwb_linkml/pyproject.toml @@ -27,6 +27,10 @@ linkml = "^1.6.1" nptyping = "^2.5.0" pydantic = "^2.3.0" h5py = "^3.9.0" +pydantic-settings = "^2.0.3" +dask = "^2023.9.2" +blosc2 = "^2.2.7" +tqdm = "^4.66.1" dash = {version="^2.12.1", optional=true} dash-cytoscape = {version="^0.3.0", optional=true} pytest = { version="^7.4.0", optional=true} @@ -36,16 +40,13 @@ pytest-md = {version = "^0.2.0", optional = true} pytest-cov = {version = "^4.1.0", optional = true} coveralls = {version = "^3.3.1", optional = true} pytest-profiling = {version = "^1.7.0", optional = true} -pydantic-settings = "^2.0.3" -dask = "^2023.9.2" -blosc2 = "^2.2.7" -tqdm = "^4.66.1" +sybil = {version = "^5.0.3", optional = true} [tool.poetry.extras] tests = [ - "pytest", "pytest-depends", "coverage", "pytest-md", - "pytest-cov", "coveralls", "pytest-profiling" + "pytest", "pytest-depends", "coverage", "pytest-md", + "pytest-cov", "coveralls", "pytest-profiling", "sybil" ] plot = ["dash", "dash-cytoscape"] @@ -61,6 +62,7 @@ pytest-md = "^0.2.0" pytest-cov = "^4.1.0" coveralls = "^3.3.1" pytest-profiling = "^1.7.0" +sybil = "^5.0.3" [tool.poetry.group.plot] optional = true @@ -79,12 +81,13 @@ addopts = [ "--cov=nwb_linkml", "--cov-append", "--cov-config=.coveragerc", - "--doctest-modules", + "-p no:doctest", "--ignore=tests/__tmp__" ] testpaths = [ "tests", - 'nwb_linkml/tests' + 'nwb_linkml/tests', + 'src/nwb_linkml' ] doctest_optionflags = "NORMALIZE_WHITESPACE" filterwarnings = [ diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index 02b1cc6..d2bda77 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -25,18 +25,73 @@ class DatasetMap(Map): @classmethod @abstractmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: pass # pragma: no cover class MapScalar(DatasetMap): """ Datasets that are just a single value should just be a scalar value, not an array with size 1 - Replace the built class with + Replaces the built class with a slot. + + Examples: + + .. grid:: 2 + :gutter: 1 + :margin: 0 + :padding: 0 + + .. grid-item-card:: + :margin: 0 + + NWB Schema + ^^^ + .. code-block:: yaml + + datasets: + - name: MyScalar + doc: A scalar + dtype: int32 + quantity: '?' + + .. grid-item-card:: + :margin: 0 + + LinkML + ^^^ + .. code-block:: yaml + + attributes: + - name: MyScalar + description: A scalar + multivalued: false + range: int32 + required: false + + """ @classmethod def check(c, cls:Dataset) -> bool: + """ + .. list-table:: + :header-rows: 1 + :align: left + + * - Attr + - Value + * - ``neurodata_type_inc`` + - ``None`` + * - ``attributes`` + - ``None`` + * - ``dims`` + - ``None`` + * - ``shape`` + - ``None`` + * - ``name`` + - ``str`` + + """ if cls.neurodata_type_inc != 'VectorData' and \ not cls.neurodata_type_inc and \ not cls.attributes and \ @@ -48,7 +103,7 @@ class MapScalar(DatasetMap): return False @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: this_slot = SlotDefinition( name=cls.name, description=cls.doc, @@ -65,6 +120,25 @@ class MapScalarAttributes(DatasetMap): """ @classmethod def check(c, cls:Dataset) -> bool: + """ + .. list-table:: + :header-rows: 1 + :align: left + + * - Attr + - Value + * - ``neurodata_type_inc`` + - ``None`` + * - ``attributes`` + - Truthy + * - ``dims`` + - ``None`` + * - ``shape`` + - ``None`` + * - ``name`` + - ``str`` + + """ if cls.neurodata_type_inc != 'VectorData' and \ not cls.neurodata_type_inc and \ cls.attributes and \ @@ -76,7 +150,7 @@ class MapScalarAttributes(DatasetMap): return False @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: value_slot = SlotDefinition( name='value', range=ClassAdapter.handle_dtype(cls.dtype), @@ -98,7 +172,7 @@ class MapListlike(DatasetMap): return False @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype)) slot = SlotDefinition( name=dtype, @@ -125,7 +199,7 @@ class MapArraylike(DatasetMap): return False @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: array_class = make_arraylike(cls, name) name = camel_to_snake(cls.name) res = BuildResult( @@ -171,7 +245,7 @@ class MapArrayLikeAttributes(DatasetMap): return False @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: array_class = make_arraylike(cls, name) # make a slot for the arraylike class array_slot = SlotDefinition( @@ -183,6 +257,90 @@ class MapArrayLikeAttributes(DatasetMap): res.classes[0].attributes.update({'array': array_slot}) return res +# -------------------------------------------------- +# DynamicTable special cases +# -------------------------------------------------- + +class Map1DVector(DatasetMap): + """ + ``VectorData`` is subclassed with a name but without dims or attributes, treat this as a normal 1D array + slot that replaces any class that would be built for this + """ + @classmethod + def check(c, cls:Dataset) -> bool: + if cls.neurodata_type_inc == 'VectorData' and \ + not cls.dims and \ + not cls.shape and \ + not cls.attributes \ + and cls.name: + return True + else: + return False + + @classmethod + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: + this_slot = SlotDefinition( + name=cls.name, + description=cls.doc, + range=ClassAdapter.handle_dtype(cls.dtype), + multivalued=True + ) + # No need to make a class for us, so we replace the existing build results + res = BuildResult(slots=[this_slot]) + return res + +class MapNVectors(DatasetMap): + """ + An unnamed container that indicates an arbitrary quantity of some other neurodata type. + + Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate + arbitrary columns. + """ + @classmethod + def check(c, cls:Dataset) -> bool: + if cls.name is None and \ + cls.neurodata_type_def is None and \ + cls.neurodata_type_inc and \ + cls.quantity in ('*', '+'): + #cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \ + return True + else: + return False + + @classmethod + def apply(c, cls: Dataset, res: Optional[BuildResult] = None, name:Optional[str] = None) -> BuildResult: + this_slot = SlotDefinition( + name=camel_to_snake(cls.neurodata_type_inc), + description=cls.doc, + range=cls.neurodata_type_inc, + **QUANTITY_MAP[cls.quantity] + ) + # No need to make a class for us, so we replace the existing build results + res = BuildResult(slots=[this_slot]) + return res + + + + +class DatasetAdapter(ClassAdapter): + cls: Dataset + + def build(self) -> BuildResult: + res = self.build_base() + + # find a map to use + matches = [m for m in DatasetMap.__subclasses__() if m.check(self.cls)] + + if len(matches) > 1: # pragma: no cover + raise RuntimeError(f"Only one map should apply to a dataset, you need to refactor the maps! Got maps: {matches}") + + # apply matching maps + for m in matches: + res = m.apply(self.cls, res, self._get_full_name()) + + return res + + def make_arraylike(cls:Dataset, name:Optional[str] = None) -> ClassDefinition: # The schema language doesn't have a way of specifying a dataset/group is "abstract" # and yet hdmf-common says you don't need a dtype if the dataset is "abstract" @@ -252,7 +410,7 @@ def make_arraylike(cls:Dataset, name:Optional[str] = None) -> ClassDefinition: else: raise ValueError(f"Dataset has no name or type definition, what do call it?") - name = '__'.join([name, 'Array']) + name = '__'.join([name, 'Arraylike']) array_class = ClassDefinition( name=name, @@ -280,87 +438,3 @@ def has_attrs(cls:Dataset) -> bool: return True else: return False - -# -------------------------------------------------- -# DynamicTable special cases -# -------------------------------------------------- - -class Map1DVector(DatasetMap): - """ - ``VectorData`` is subclassed with a name but without dims or attributes, treat this as a normal 1D array - slot that replaces any class that would be built for this - """ - @classmethod - def check(c, cls:Dataset) -> bool: - if cls.neurodata_type_inc == 'VectorData' and \ - not cls.dims and \ - not cls.shape and \ - not cls.attributes \ - and cls.name: - return True - else: - return False - - @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: - this_slot = SlotDefinition( - name=cls.name, - description=cls.doc, - range=ClassAdapter.handle_dtype(cls.dtype), - multivalued=True - ) - # No need to make a class for us, so we replace the existing build results - res = BuildResult(slots=[this_slot]) - return res - -class MapNVectors(DatasetMap): - """ - An unnamed container that indicates an arbitrary quantity of some other neurodata type. - - Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate - arbitrary columns. - """ - @classmethod - def check(c, cls:Dataset) -> bool: - if cls.name is None and \ - cls.neurodata_type_def is None and \ - cls.neurodata_type_inc and \ - cls.quantity in ('*', '+'): - #cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \ - return True - else: - return False - - @classmethod - def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult: - this_slot = SlotDefinition( - name=camel_to_snake(cls.neurodata_type_inc), - description=cls.doc, - range=cls.neurodata_type_inc, - **QUANTITY_MAP[cls.quantity] - ) - # No need to make a class for us, so we replace the existing build results - res = BuildResult(slots=[this_slot]) - return res - - - - -class DatasetAdapter(ClassAdapter): - cls: Dataset - - def build(self) -> BuildResult: - res = self.build_base() - - # find a map to use - matches = [m for m in DatasetMap.__subclasses__() if m.check(self.cls)] - - if len(matches) > 1: # pragma: no cover - raise RuntimeError(f"Only one map should apply to a dataset, you need to refactor the maps! Got maps: {matches}") - - # apply matching maps - for m in matches: - res = m.apply(res, self.cls, self._get_full_name()) - - return res - diff --git a/nwb_linkml/src/nwb_linkml/annotations.py b/nwb_linkml/src/nwb_linkml/annotations.py index 27d45ef..20c9c19 100644 --- a/nwb_linkml/src/nwb_linkml/annotations.py +++ b/nwb_linkml/src/nwb_linkml/annotations.py @@ -25,7 +25,3 @@ def get_inner_types(annotation) -> List[Any]: types.extend(get_inner_types(arg)) return types -def take_outer_type(annotation): - if typing.get_origin(annotation) is list: - return list - return annotation diff --git a/nwb_linkml/src/nwb_linkml/io/hdf5.py b/nwb_linkml/src/nwb_linkml/io/hdf5.py index 485f2ee..20caad2 100644 --- a/nwb_linkml/src/nwb_linkml/io/hdf5.py +++ b/nwb_linkml/src/nwb_linkml/io/hdf5.py @@ -56,6 +56,38 @@ class HDF5IO(): def read(self, path:str) -> BaseModel | Dict[str, BaseModel]: ... def read(self, path:Optional[str] = None) -> Union['NWBFile', BaseModel, Dict[str, BaseModel]]: + """ + Read data into models from an NWB File. + + The read process is in several stages: + + * Use :meth:`.make_provider` to generate any needed LinkML Schema or Pydantic Classes using a :class:`.SchemaProvider` + * :func:`flatten_hdf` file into a :class:`.ReadQueue` of nodes. + * Apply the queue's :class:`ReadPhases` : + + * ``plan`` - trim any blank nodes, sort nodes to read, etc. + * ``read`` - load the actual data into temporary holding objects + * ``construct`` - cast the read data into models. + + Read is split into stages like this to handle references between objects, where the read result of one node + might depend on another having already been completed. It also allows us to parallelize the operations + since each mapping operation is independent of the results of all the others in that pass. + + .. todo:: + + Implement reading, skipping arrays - they are fast to read with the ArrayProxy class + and dask, but there are times when we might want to leave them out of the read entirely. + This might be better implemented as a filter on ``model_dump`` , but to investigate further + how best to support reading just metadata, or even some specific field value, or if + we should leave that to other implementations like eg. after we do SQL export then + not rig up a whole query system ourselves. + + Args: + path (Optional[str]): If ``None`` (default), read whole file. Otherwise, read from specific (hdf5) path and its children + + Returns: + ``NWBFile`` if ``path`` is ``None``, otherwise whatever Model or dictionary of models applies to the requested ``path`` + """ provider = self.make_provider() @@ -91,6 +123,31 @@ class HDF5IO(): else: return queue.completed[path].result + def write(self, path: Path): + """ + Write to NWB file + + .. todo:: + + Implement HDF5 writing. + + Need to create inverse mappings that can take pydantic models to + hdf5 groups and datasets. If more metadata about the generation process + needs to be preserved (eg. explicitly notating that something is an attribute, + dataset, group, then we can make use of the :class:`~nwb_linkml.generators.pydantic.LinkML_Meta` + model. If the model to edit has been loaded from an HDF5 file (rather than + freshly created), then the ``hdf5_path`` should be populated making + mapping straightforward, but we probably want to generalize that to deterministically + get hdf5_path from position in the NWBFile object -- I think that might + require us to explicitly annotate when something is supposed to be a reference + vs. the original in the model representation, or else it's ambiguous. + + Otherwise, it should be a matter of detecting changes from file if it exists already, + and then write them. + + """ + raise NotImplementedError('Writing to HDF5 is not implemented yet!') + def make_provider(self) -> SchemaProvider: """ Create a :class:`~.providers.schema.SchemaProvider` by @@ -122,13 +179,13 @@ class HDF5IO(): def read_specs_as_dicts(group: h5py.Group) -> dict: """ Utility function to iterate through the `/specifications` group and - load + load the schemas from it. Args: - group: + group ( :class:`h5py.Group` ): the ``/specifications`` group! Returns: - + ``dict`` of schema. """ spec_dict = {} def _read_spec(name, node): @@ -158,6 +215,10 @@ def find_references(h5f: h5py.File, path: str) -> List[str]: This is extremely slow because we collect all references first, rather than checking them as we go and quitting early. PR if you want to make this faster! + .. todo:: + + Test :func:`.find_references` ! + Args: h5f (:class:`h5py.File`): Open hdf5 file path (str): Path to search for references to diff --git a/nwb_linkml/src/nwb_linkml/io/schema.py b/nwb_linkml/src/nwb_linkml/io/schema.py index 26c491d..370975e 100644 --- a/nwb_linkml/src/nwb_linkml/io/schema.py +++ b/nwb_linkml/src/nwb_linkml/io/schema.py @@ -15,9 +15,12 @@ from nwb_linkml.adapters.namespaces import NamespacesAdapter from nwb_linkml.adapters.schema import SchemaAdapter -def load_yaml(path:Path) -> dict: - with open(path, 'r') as file: - ns_dict = yaml.safe_load(file) +def load_yaml(path:Path|str) -> dict: + if isinstance(path, str) and not Path(path).exists(): + ns_dict = yaml.safe_load(path) + else: + with open(path, 'r') as file: + ns_dict = yaml.safe_load(file) ns_dict = apply_postload(ns_dict) return ns_dict diff --git a/nwb_linkml/src/nwb_linkml/providers/git.py b/nwb_linkml/src/nwb_linkml/providers/git.py index 3422db7..f7022b1 100644 --- a/nwb_linkml/src/nwb_linkml/providers/git.py +++ b/nwb_linkml/src/nwb_linkml/providers/git.py @@ -128,8 +128,8 @@ class GitRepo: If ``None``: if :attr:`NamespaceRepo.versions`, use the last version. Otherwise use ``HEAD`` - Should match :prop:`.active_commit`, differs semantically in that it is used to - set the active_commit, while :prop:`.active_commit` reads what commit is actually checked out + Should match :attr:`.active_commit`, differs semantically in that it is used to + set the active_commit, while :attr:`.active_commit` reads what commit is actually checked out """ return self._commit diff --git a/nwb_linkml/src/nwb_linkml/providers/schema.py b/nwb_linkml/src/nwb_linkml/providers/schema.py index f33084e..031b509 100644 --- a/nwb_linkml/src/nwb_linkml/providers/schema.py +++ b/nwb_linkml/src/nwb_linkml/providers/schema.py @@ -14,6 +14,8 @@ Relationship to other modules: Providers create a set of directories with namespaces and versions, so eg. for the linkML and pydantic providers: +.. code-block:: yaml + cache_dir - linkml - nwb_core @@ -280,6 +282,7 @@ class LinkMLProvider(Provider): >>> # Build a custom schema and then get it >>> # provider.build_from_yaml('myschema.yaml') >>> # my_schema = provider.get('myschema') + """ PROVIDES = 'linkml' PROVIDES_CLASS = SchemaDefinition @@ -357,7 +360,7 @@ class LinkMLProvider(Provider): to build versions (dict): Dict of specific versions to use for cross-namespace imports. as ``{'namespace': 'version'}`` - If none is provided, use the most recent version + If none is provided, use the most recent version available. dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return force (bool): If ``False`` (default), don't build schema that already exist. If ``True`` , clear directory and rebuild diff --git a/nwb_linkml/tests/conftest.py b/nwb_linkml/tests/conftest.py deleted file mode 100644 index 7e2d2e1..0000000 --- a/nwb_linkml/tests/conftest.py +++ /dev/null @@ -1,8 +0,0 @@ -import os -import pytest - -from .fixtures import tmp_output_dir - -@pytest.fixture(autouse=True, scope='session') -def set_config_vars(tmp_output_dir): - os.environ['NWB_LINKML_CACHE_DIR'] = str(tmp_output_dir) diff --git a/nwb_linkml/tests/test_adapters/test_adapter_dataset.py b/nwb_linkml/tests/test_adapters/test_adapter_dataset.py index be35b6c..198610b 100644 --- a/nwb_linkml/tests/test_adapters/test_adapter_dataset.py +++ b/nwb_linkml/tests/test_adapters/test_adapter_dataset.py @@ -4,7 +4,70 @@ import pytest from ..fixtures import nwb_core_fixture +from nwb_schema_language import Namespaces, Namespace, Dataset, Group, Schema +from linkml_runtime.dumpers import yaml_dumper +import yaml from nwb_linkml.adapters import DatasetAdapter +from nwb_linkml.adapters.dataset import ( + MapScalar, + MapListlike, + MapArraylike, + MapNVectors, + Map1DVector, + MapScalarAttributes, + MapArrayLikeAttributes +) + def test_nothing(nwb_core_fixture): + pass + + +def _compare_dicts(dict1, dict2) -> bool: + """just in one direction - that all the entries in dict1 are in dict2""" + assert all([dict1[k] == dict2[k] for k in dict1.keys()]) + #assert all([dict1[k] == dict2[k] for k in dict2.keys()]) + +def test_map_scalar(): + + model = { + 'name': 'MyScalar', + 'doc': 'This should be a scalar', + 'dtype': 'int32', + 'quantity': '?' + } + test = { + 'name': 'MyScalar', + 'description': 'This should be a scalar', + 'multivalued': False, + 'range': 'int32', + 'required': False + } + + dataset = Dataset(**model) + assert MapScalar.check(dataset) + result = MapScalar.apply(dataset) + assert len(result.classes) == 0 + _compare_dicts(test, result.slots[0]) + + +def test_map_scalar_attributes(): + pass + + +def test_map_listlike(): + pass + + +def test_map_arraylike(): + pass + +def test_map_arraylike_attributes(): + pass + +def test_map_1d_vector(): + pass + + +def test_map_n_vectors(): pass \ No newline at end of file