more docs for dataset adapters.

also: - fix search for initial - in dataset doctest - don't clear git repos on repeated test runs - fix string handling in load_yaml
2025-01-09 21:54:27 +00:00 · 2024-07-09 03:26:45 -07:00 · 2024-07-09 03:26:45 -07:00 · f4d397cde1
commit f4d397cde1
parent ce902476d1
5 changed files with 283 additions and 10 deletions
--- a/nwb_linkml/conftest.py
+++ b/nwb_linkml/conftest.py
@ -19,7 +19,7 @@ NWB_KEYS = re.compile(r"(^\s*datasets:\s*\n)|^groups:")
 def _strip_nwb(nwb: str) -> str:
    # strip 'datasets:' keys and decoration left in for readability/context
    nwb = re.sub(NWB_KEYS, "", nwb)
-    nwb = re.sub(r"-", " ", nwb)
+    nwb = re.sub(r"^-", " ", nwb)
    nwb = textwrap.dedent(nwb)
    return nwb

--- a/nwb_linkml/src/nwb_linkml/adapters/array.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/array.py
@ -107,7 +107,7 @@ class ArrayAdapter:
        Dict[Literal["any_of"], Dict[Literal["array"], List[ArrayExpression]]],
    ]:
        """
-        Make the array expressions in a dict form that can be **kwarg'd into a SlotDefinition,
+        Make the array expressions in a dict form that can be ``**kwarg``'d into a SlotDefinition,
        taking into account needing to use ``any_of`` for multiple array range specifications.
        """
        expressions = self.make()
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -1,7 +1,6 @@
 """
 Adapter for NWB datasets to linkml Classes
 """
-
 from abc import abstractmethod
 from typing import ClassVar, Optional, Type

@ -119,6 +118,54 @@ class MapScalarAttributes(DatasetMap):
    """
    A scalar with attributes gets an additional slot "value" that contains the actual scalar
    value of this field
+
+    Examples:
+
+        .. adapter:: DatasetAdapter
+            :nwb:
+                datasets:
+                - name: starting_time
+                  dtype: float64
+                  doc: Timestamp of the first sample in seconds. When timestamps are uniformly
+                    spaced, the timestamp of the first sample can be specified and all subsequent
+                    ones calculated from the sampling rate attribute.
+                  quantity: '?'
+                  attributes:
+                  - name: rate
+                    dtype: float32
+                    doc: Sampling rate, in Hz.
+                  - name: unit
+                    dtype: text
+                    value: seconds
+                    doc: Unit of measurement for time, which is fixed to 'seconds'.
+            :linkml:
+                classes:
+                - name: starting_time
+                  description: Timestamp of the first sample in seconds. When timestamps are uniformly
+                    spaced, the timestamp of the first sample can be specified and all subsequent
+                    ones calculated from the sampling rate attribute.
+                  attributes:
+                    name:
+                      name: name
+                      ifabsent: string(starting_time)
+                      identifier: true
+                      range: string
+                      required: true
+                      equals_string: starting_time
+                    rate:
+                      name: rate
+                      description: Sampling rate, in Hz.
+                      range: float32
+                    unit:
+                      name: unit
+                      description: Unit of measurement for time, which is fixed to 'seconds'.
+                      range: text
+                    value:
+                      name: value
+                      range: float64
+                      required: true
+                  tree_root: true
+
    """

    @classmethod
@ -133,7 +180,7 @@ class MapScalarAttributes(DatasetMap):
            * - ``neurodata_type_inc``
              - ``None``
            * - ``attributes``
-              - Truthy
+              - ``True``
            * - ``dims``
              - ``None``
            * - ``shape``
@ -167,13 +214,65 @@ class MapScalarAttributes(DatasetMap):

 class MapListlike(DatasetMap):
    """
-    Datasets that refer to other datasets (that handle their own arrays)
+    Datasets that refer to a list of other datasets.
+
+    Used exactly once in the core schema, in ``ImageReferences`` -
+    an array of references to other ``Image`` datasets. We ignore the
+    usual array structure and unnest the implicit array into a slot names from the
+    target type rather than the oddly-named ``num_images`` dimension so that
+    ultimately in the pydantic model we get a nicely behaved single-level list.
+
+    Examples:
+
+        .. adapter:: DatasetAdapter
+            :nwb:
+                datasets:
+                - neurodata_type_def: ImageReferences
+                  neurodata_type_inc: NWBData
+                  dtype:
+                    target_type: Image
+                    reftype: object
+                  dims:
+                  - num_images
+                  shape:
+                  - null
+                  doc: Ordered dataset of references to Image objects.
+            :linkml:
+                classes:
+                - name: ImageReferences
+                  description: Ordered dataset of references to Image objects.
+                  is_a: NWBData
+                  attributes:
+                    name:
+                      name: name
+                      identifier: true
+                      range: string
+                      required: true
+                    image:
+                      name: image
+                      description: Ordered dataset of references to Image objects.
+                      multivalued: true
+                      range: Image
+                      required: true
+                  tree_root: true
+
    """

    @classmethod
    def check(c, cls: Dataset) -> bool:
        """
        Check if we are a 1D dataset that isn't a normal datatype
+
+        .. list-table::
+            :header-rows: 1
+            :align: left
+
+            * - Attr
+              - Value
+            * - :func:`.is_1d`
+              - ``True``
+            * - ``dtype``
+              - ``Class``
        """
        dtype = ClassAdapter.handle_dtype(cls.dtype)
        return is_1d(cls) and dtype != "AnyType" and dtype not in flat_to_linkml
@ -202,13 +301,83 @@ class MapArraylike(DatasetMap):
    Datasets without any additional attributes don't create their own subclass,
    they're just an array :).

-    Replace the base class with the array class, and make a slot that refers to it.
+    Replace the base class with a slot that defines the array.
+
+    Examples:
+
+        eg. from ``image.ImageSeries`` :
+
+        .. adapter:: DatasetAdapter
+            :nwb:
+                datasets:
+                - name: data
+                  dtype: numeric
+                  dims:
+                  - - frame
+                    - x
+                    - y
+                  - - frame
+                    - x
+                    - y
+                    - z
+                  shape:
+                  - - null
+                    - null
+                    - null
+                  - - null
+                    - null
+                    - null
+                    - null
+                  doc: Binary data representing images across frames. If data are stored in an external
+                    file, this should be an empty 3D array.
+            :linkml:
+                slots:
+                - name: data
+                  description: Binary data representing images across frames. If data are stored in
+                    an external file, this should be an empty 3D array.
+                  multivalued: false
+                  range: numeric
+                  required: true
+                  any_of:
+                  - array:
+                      dimensions:
+                      - alias: frame
+                      - alias: x
+                      - alias: y
+                  - array:
+                      dimensions:
+                      - alias: frame
+                      - alias: x
+                      - alias: y
+                      - alias: z
+
+
+
+
    """

    @classmethod
    def check(c, cls: Dataset) -> bool:
        """
        Check if we're a plain array
+
+        .. list-table::
+            :header-rows: 1
+            :align: left
+
+            * - Attr
+              - Value
+            * - ``name``
+              - ``True``
+            * - ``dims``
+              - ``True``
+            * - ``shape``
+              - ``True``
+            * - :func:`.has_attrs`
+              - ``False``
+            * - :func:`.is_compound`
+              - ``False``
+
        """
        return (
            cls.name and all([cls.dims, cls.shape]) and not has_attrs(cls) and not is_compound(cls)
@ -243,6 +412,88 @@ class MapArrayLikeAttributes(DatasetMap):
    """
    The most general case - treat everything that isn't handled by one of the special cases
    as an array!
+
+    Examples:
+
+        .. adapter:: DatasetAdapter
+            :nwb:
+                datasets:
+                - neurodata_type_def: Image
+                  neurodata_type_inc: NWBData
+                  dtype: numeric
+                  dims:
+                  - - x
+                    - y
+                  - - x
+                    - y
+                    - r, g, b
+                  - - x
+                    - y
+                    - r, g, b, a
+                  shape:
+                  - - null
+                    - null
+                  - - null
+                    - null
+                    - 3
+                  - - null
+                    - null
+                    - 4
+                  doc: An abstract data type for an image. Shape can be 2-D (x, y), or 3-D where the
+                    third dimension can have three or four elements, e.g. (x, y, (r, g, b)) or
+                    (x, y, (r, g, b, a)).
+                  attributes:
+                  - name: resolution
+                    dtype: float32
+                    doc: Pixel resolution of the image, in pixels per centimeter.
+                    required: false
+                  - name: description
+                    dtype: text
+                    doc: Description of the image.
+                    required: false
+            :linkml:
+                classes:
+                - name: Image
+                  description: An abstract data type for an image. Shape can be 2-D (x, y), or 3-D
+                    where the third dimension can have three or four elements, e.g. (x, y, (r, g,
+                    b)) or (x, y, (r, g, b, a)).
+                  is_a: NWBData
+                  attributes:
+                    name:
+                      name: name
+                      identifier: true
+                      range: string
+                      required: true
+                    resolution:
+                      name: resolution
+                      description: Pixel resolution of the image, in pixels per centimeter.
+                      range: float32
+                    description:
+                      name: description
+                      description: Description of the image.
+                      range: text
+                    array:
+                      name: array
+                      range: numeric
+                      any_of:
+                      - array:
+                          dimensions:
+                          - alias: x
+                          - alias: y
+                      - array:
+                          dimensions:
+                          - alias: x
+                          - alias: y
+                          - alias: r_g_b
+                            exact_cardinality: 3
+                      - array:
+                          dimensions:
+                          - alias: x
+                          - alias: y
+                          - alias: r_g_b_a
+                            exact_cardinality: 4
+                  tree_root: true
+
    """

    NEEDS_NAME = True
@ -487,7 +738,11 @@ class DatasetAdapter(ClassAdapter):

 def is_1d(cls: Dataset) -> bool:
    """
-    Check if the values of a dataset are 1-dimensional
+    Check if the values of a dataset are 1-dimensional.
+
+    Specifically:
+    * a single-layer dim/shape list of length 1, or
+    * a nested dim/shape list where every nested spec is of length 1
    """
    return (
        not any([isinstance(dim, list) for dim in cls.dims]) and len(cls.dims) == 1
--- a/nwb_linkml/src/nwb_linkml/io/schema.py
+++ b/nwb_linkml/src/nwb_linkml/io/schema.py
@ -20,7 +20,16 @@ def load_yaml(path: Path | str) -> dict:
    """
    Load yaml file from file, applying postload modifications
    """
-    if isinstance(path, str) and not Path(path).exists():
+    is_file = False
+    try:
+        a_path = Path(path)
+        if a_path.exists():
+            is_file = True
+    except OSError:
+        # long strings can't be made into paths!
+        pass
+
+    if not is_file:
        ns_dict = yaml.safe_load(path)
    else:
        with open(path) as file:
--- a/nwb_linkml/tests/fixtures.py
+++ b/nwb_linkml/tests/fixtures.py
@ -35,8 +35,17 @@ __all__ = [
 def tmp_output_dir() -> Path:
    path = Path(__file__).parent.resolve() / "__tmp__"
    if path.exists():
-        shutil.rmtree(str(path))
-    path.mkdir()
+        for subdir in path.iterdir():
+            if subdir.name == 'git':
+                # don't wipe out git repos every time, they don't rly change
+                continue
+            elif subdir.is_file() and subdir.parent != path:
+                continue
+            elif subdir.is_file():
+                subdir.unlink(missing_ok=True)
+            else:
+                shutil.rmtree(str(subdir))
+    path.mkdir(exist_ok=True)

    return path