From f4d397cde1c39e060f7b300a8413eb456efd7a4e Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 9 Jul 2024 03:26:45 -0700 Subject: [PATCH] more docs for dataset adapters. also: - fix search for initial - in dataset doctest - don't clear git repos on repeated test runs - fix string handling in load_yaml --- nwb_linkml/conftest.py | 2 +- nwb_linkml/src/nwb_linkml/adapters/array.py | 2 +- nwb_linkml/src/nwb_linkml/adapters/dataset.py | 265 +++++++++++++++++- nwb_linkml/src/nwb_linkml/io/schema.py | 11 +- nwb_linkml/tests/fixtures.py | 13 +- 5 files changed, 283 insertions(+), 10 deletions(-) diff --git a/nwb_linkml/conftest.py b/nwb_linkml/conftest.py index 79e8c25..1bd1ae3 100644 --- a/nwb_linkml/conftest.py +++ b/nwb_linkml/conftest.py @@ -19,7 +19,7 @@ NWB_KEYS = re.compile(r"(^\s*datasets:\s*\n)|^groups:") def _strip_nwb(nwb: str) -> str: # strip 'datasets:' keys and decoration left in for readability/context nwb = re.sub(NWB_KEYS, "", nwb) - nwb = re.sub(r"-", " ", nwb) + nwb = re.sub(r"^-", " ", nwb) nwb = textwrap.dedent(nwb) return nwb diff --git a/nwb_linkml/src/nwb_linkml/adapters/array.py b/nwb_linkml/src/nwb_linkml/adapters/array.py index 0a380f1..6d77bb1 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/array.py +++ b/nwb_linkml/src/nwb_linkml/adapters/array.py @@ -107,7 +107,7 @@ class ArrayAdapter: Dict[Literal["any_of"], Dict[Literal["array"], List[ArrayExpression]]], ]: """ - Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition, + Make the array expressions in a dict form that can be ``**kwarg``'d into a SlotDefinition, taking into account needing to use ``any_of`` for multiple array range specifications. """ expressions = self.make() diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index ffe4285..3918235 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -1,7 +1,6 @@ """ Adapter for NWB datasets to linkml Classes """ - from abc import abstractmethod from typing import ClassVar, Optional, Type @@ -119,6 +118,54 @@ class MapScalarAttributes(DatasetMap): """ A scalar with attributes gets an additional slot "value" that contains the actual scalar value of this field + + Examples: + + .. adapter:: DatasetAdapter + :nwb: + datasets: + - name: starting_time + dtype: float64 + doc: Timestamp of the first sample in seconds. When timestamps are uniformly + spaced, the timestamp of the first sample can be specified and all subsequent + ones calculated from the sampling rate attribute. + quantity: '?' + attributes: + - name: rate + dtype: float32 + doc: Sampling rate, in Hz. + - name: unit + dtype: text + value: seconds + doc: Unit of measurement for time, which is fixed to 'seconds'. + :linkml: + classes: + - name: starting_time + description: Timestamp of the first sample in seconds. When timestamps are uniformly + spaced, the timestamp of the first sample can be specified and all subsequent + ones calculated from the sampling rate attribute. + attributes: + name: + name: name + ifabsent: string(starting_time) + identifier: true + range: string + required: true + equals_string: starting_time + rate: + name: rate + description: Sampling rate, in Hz. + range: float32 + unit: + name: unit + description: Unit of measurement for time, which is fixed to 'seconds'. + range: text + value: + name: value + range: float64 + required: true + tree_root: true + """ @classmethod @@ -133,7 +180,7 @@ class MapScalarAttributes(DatasetMap): * - ``neurodata_type_inc`` - ``None`` * - ``attributes`` - - Truthy + - ``True`` * - ``dims`` - ``None`` * - ``shape`` @@ -167,13 +214,65 @@ class MapScalarAttributes(DatasetMap): class MapListlike(DatasetMap): """ - Datasets that refer to other datasets (that handle their own arrays) + Datasets that refer to a list of other datasets. + + Used exactly once in the core schema, in ``ImageReferences`` - + an array of references to other ``Image`` datasets. We ignore the + usual array structure and unnest the implicit array into a slot names from the + target type rather than the oddly-named ``num_images`` dimension so that + ultimately in the pydantic model we get a nicely behaved single-level list. + + Examples: + + .. adapter:: DatasetAdapter + :nwb: + datasets: + - neurodata_type_def: ImageReferences + neurodata_type_inc: NWBData + dtype: + target_type: Image + reftype: object + dims: + - num_images + shape: + - null + doc: Ordered dataset of references to Image objects. + :linkml: + classes: + - name: ImageReferences + description: Ordered dataset of references to Image objects. + is_a: NWBData + attributes: + name: + name: name + identifier: true + range: string + required: true + image: + name: image + description: Ordered dataset of references to Image objects. + multivalued: true + range: Image + required: true + tree_root: true + """ @classmethod def check(c, cls: Dataset) -> bool: """ Check if we are a 1D dataset that isn't a normal datatype + + .. list-table:: + :header-rows: 1 + :align: left + + * - Attr + - Value + * - :func:`.is_1d` + - ``True`` + * - ``dtype`` + - ``Class`` """ dtype = ClassAdapter.handle_dtype(cls.dtype) return is_1d(cls) and dtype != "AnyType" and dtype not in flat_to_linkml @@ -202,13 +301,83 @@ class MapArraylike(DatasetMap): Datasets without any additional attributes don't create their own subclass, they're just an array :). - Replace the base class with the array class, and make a slot that refers to it. + Replace the base class with a slot that defines the array. + + Examples: + + eg. from ``image.ImageSeries`` : + + .. adapter:: DatasetAdapter + :nwb: + datasets: + - name: data + dtype: numeric + dims: + - - frame + - x + - y + - - frame + - x + - y + - z + shape: + - - null + - null + - null + - - null + - null + - null + - null + doc: Binary data representing images across frames. If data are stored in an external + file, this should be an empty 3D array. + :linkml: + slots: + - name: data + description: Binary data representing images across frames. If data are stored in + an external file, this should be an empty 3D array. + multivalued: false + range: numeric + required: true + any_of: + - array: + dimensions: + - alias: frame + - alias: x + - alias: y + - array: + dimensions: + - alias: frame + - alias: x + - alias: y + - alias: z + + + + """ @classmethod def check(c, cls: Dataset) -> bool: """ Check if we're a plain array + + .. list-table:: + :header-rows: 1 + :align: left + + * - Attr + - Value + * - ``name`` + - ``True`` + * - ``dims`` + - ``True`` + * - ``shape`` + - ``True`` + * - :func:`.has_attrs` + - ``False`` + * - :func:`.is_compound` + - ``False`` + """ return ( cls.name and all([cls.dims, cls.shape]) and not has_attrs(cls) and not is_compound(cls) @@ -243,6 +412,88 @@ class MapArrayLikeAttributes(DatasetMap): """ The most general case - treat everything that isn't handled by one of the special cases as an array! + + Examples: + + .. adapter:: DatasetAdapter + :nwb: + datasets: + - neurodata_type_def: Image + neurodata_type_inc: NWBData + dtype: numeric + dims: + - - x + - y + - - x + - y + - r, g, b + - - x + - y + - r, g, b, a + shape: + - - null + - null + - - null + - null + - 3 + - - null + - null + - 4 + doc: An abstract data type for an image. Shape can be 2-D (x, y), or 3-D where the + third dimension can have three or four elements, e.g. (x, y, (r, g, b)) or + (x, y, (r, g, b, a)). + attributes: + - name: resolution + dtype: float32 + doc: Pixel resolution of the image, in pixels per centimeter. + required: false + - name: description + dtype: text + doc: Description of the image. + required: false + :linkml: + classes: + - name: Image + description: An abstract data type for an image. Shape can be 2-D (x, y), or 3-D + where the third dimension can have three or four elements, e.g. (x, y, (r, g, + b)) or (x, y, (r, g, b, a)). + is_a: NWBData + attributes: + name: + name: name + identifier: true + range: string + required: true + resolution: + name: resolution + description: Pixel resolution of the image, in pixels per centimeter. + range: float32 + description: + name: description + description: Description of the image. + range: text + array: + name: array + range: numeric + any_of: + - array: + dimensions: + - alias: x + - alias: y + - array: + dimensions: + - alias: x + - alias: y + - alias: r_g_b + exact_cardinality: 3 + - array: + dimensions: + - alias: x + - alias: y + - alias: r_g_b_a + exact_cardinality: 4 + tree_root: true + """ NEEDS_NAME = True @@ -487,7 +738,11 @@ class DatasetAdapter(ClassAdapter): def is_1d(cls: Dataset) -> bool: """ - Check if the values of a dataset are 1-dimensional + Check if the values of a dataset are 1-dimensional. + + Specifically: + * a single-layer dim/shape list of length 1, or + * a nested dim/shape list where every nested spec is of length 1 """ return ( not any([isinstance(dim, list) for dim in cls.dims]) and len(cls.dims) == 1 diff --git a/nwb_linkml/src/nwb_linkml/io/schema.py b/nwb_linkml/src/nwb_linkml/io/schema.py index c76e037..f787eab 100644 --- a/nwb_linkml/src/nwb_linkml/io/schema.py +++ b/nwb_linkml/src/nwb_linkml/io/schema.py @@ -20,7 +20,16 @@ def load_yaml(path: Path | str) -> dict: """ Load yaml file from file, applying postload modifications """ - if isinstance(path, str) and not Path(path).exists(): + is_file = False + try: + a_path = Path(path) + if a_path.exists(): + is_file = True + except OSError: + # long strings can't be made into paths! + pass + + if not is_file: ns_dict = yaml.safe_load(path) else: with open(path) as file: diff --git a/nwb_linkml/tests/fixtures.py b/nwb_linkml/tests/fixtures.py index a232122..e13d91f 100644 --- a/nwb_linkml/tests/fixtures.py +++ b/nwb_linkml/tests/fixtures.py @@ -35,8 +35,17 @@ __all__ = [ def tmp_output_dir() -> Path: path = Path(__file__).parent.resolve() / "__tmp__" if path.exists(): - shutil.rmtree(str(path)) - path.mkdir() + for subdir in path.iterdir(): + if subdir.name == 'git': + # don't wipe out git repos every time, they don't rly change + continue + elif subdir.is_file() and subdir.parent != path: + continue + elif subdir.is_file(): + subdir.unlink(missing_ok=True) + else: + shutil.rmtree(str(subdir)) + path.mkdir(exist_ok=True) return path