catchup with prior work

2025-01-09 21:54:27 +00:00 · 2024-07-01 15:44:26 -07:00 · 2024-07-01 15:44:26 -07:00 · 4296b27538
commit 4296b27538
parent 4ee97263ed
8 changed files with 99 additions and 52 deletions
--- a/docs/conf.py
+++ b/docs/conf.py
@ -87,6 +87,7 @@ napoleon_attr_annotations = True
 # graphviz
 graphviz_output_format = "svg"

+# autodoc
 autodoc_pydantic_model_show_json_error_strategy = 'coerce'
 autodoc_pydantic_model_show_json = False
 autodoc_mock_imports = []
--- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py
+++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py
@ -35,7 +35,7 @@ from copy import deepcopy, copy
 import warnings
 import inspect

-from nwb_linkml.maps import flat_to_npytyping
+from nwb_linkml.maps import flat_to_nptyping
 from linkml.generators import PydanticGenerator
 from linkml_runtime.linkml_model.meta import (
    Annotation,
@ -160,6 +160,7 @@ class ConfiguredBaseModel(BaseModel):
        template += """{{ '\n\n' }}"""
        for cls in extra_classes:
            template += inspect.getsource(cls) + '\n\n'
+
    ### ENUMS ###
    template += """
 {% for e in enums.values() %}
@ -445,7 +446,7 @@ class NWBPydanticGenerator(PydanticGenerator):

        # all dimensions should be the same dtype
        try:
-            dtype = flat_to_npytyping[list(attrs.values())[0].range]
+            dtype = flat_to_nptyping[list(attrs.values())[0].range]
        except KeyError as e: # pragma: no cover
            warnings.warn(str(e))
            range = list(attrs.values())[0].range
--- a/nwb_linkml/src/nwb_linkml/maps/init.py
+++ b/nwb_linkml/src/nwb_linkml/maps/init.py
@ -2,4 +2,4 @@
 from nwb_linkml.maps.map import Map
 from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
 from nwb_linkml.maps.quantity import QUANTITY_MAP
-from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping
+from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_nptyping
--- a/nwb_linkml/src/nwb_linkml/maps/dtype.py
+++ b/nwb_linkml/src/nwb_linkml/maps/dtype.py
@ -1,6 +1,7 @@
 import numpy as np
-from typing import Any
+from typing import Any, Type
 from datetime import datetime
+import nptyping

 flat_to_linkml = {
    "float" : "float",
@ -32,7 +33,7 @@ flat_to_linkml = {
 Map between the flat data types and the simpler linkml base types
 """

-flat_to_npytyping = {
+flat_to_nptyping = {
    "float": "Float",
    "float32": "Float32",
    "double": "Double",
@ -54,10 +55,13 @@ flat_to_npytyping = {
    "utf": "Unicode",
    "utf8": "Unicode",
    "utf_8": "Unicode",
+    "string": "Unicode",
+    "str": "Unicode",
    "ascii": "String",
    "bool": "Bool",
    "isodatetime": "Datetime64",
-    'AnyType': 'Any'
+    'AnyType': 'Any',
+    'object': 'Object'
 }

 np_to_python = {
@ -93,3 +97,17 @@ Following HDMF, it turns out that specifying precision actually specifies minimu
 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22
 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714
 """
+
+
+def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]:
+    """
+    Create a nptyping Structure from a compound numpy dtype
+
+    nptyping structures have the form::
+
+        Structure["name: Str, age: Int"]
+
+    """
+    struct_pieces = [f'{k}: {flat_to_nptyping[v[0].name]}' for k, v in dtype.fields.items()]
+    struct_dtype = ', '.join(struct_pieces)
+    return nptyping.Structure[struct_dtype]
--- a/nwb_linkml/src/nwb_linkml/maps/hdmf.py
+++ b/nwb_linkml/src/nwb_linkml/maps/hdmf.py
@ -1,17 +1,19 @@
 """
 Mapping functions for handling HDMF classes like DynamicTables
 """
+import pdb
 from typing import List, Type, Optional, Any
 import warnings

-
 import h5py
+import nptyping
 from pydantic import create_model, BaseModel
 import numpy as np
-from nwb_linkml.types.hdf5 import HDF5_Path
-from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
 import dask.array as da

+from nwb_linkml.types.hdf5 import HDF5_Path
+from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
+from nwb_linkml.maps.dtype import flat_to_nptyping, struct_from_dtype

 def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]:
    """
@ -21,10 +23,13 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
    types = {}
    for col in colnames:

-        nptype = group[col].dtype.type
-        if nptype == np.void:
-            warnings.warn(f"Cant handle numpy void type for column {col} in {group.name}")
-            continue
+        nptype = group[col].dtype
+        if nptype.type == np.void:
+            #pdb.set_trace()
+            nptype = struct_from_dtype(nptype)
+        else:
+            nptype = nptype.type
+
        type_ = Optional[NDArray[Any, nptype]]

        # FIXME: handling nested column types that appear only in some versions?
--- a/nwb_linkml/src/nwb_linkml/types/ndarray.py
+++ b/nwb_linkml/src/nwb_linkml/types/ndarray.py
@ -13,7 +13,10 @@ from typing import (
 )
 import sys
 from copy import copy
+from functools import reduce
+from operator import or_

+import nptyping.structure
 from pydantic_core import core_schema
 from pydantic import (
    BaseModel,
@ -36,6 +39,46 @@ from nptyping.shape_expression import check_shape

 from nwb_linkml.maps.dtype import np_to_python, allowed_precisions

+def _list_of_lists_schema(shape, array_type_handler):
+    """
+    Make a pydantic JSON schema for an array as a list of lists
+    """
+    shape_parts = shape.__args__[0].split(',')
+    split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
+
+    # Construct a list of list schema
+    # go in reverse order - construct list schemas such that
+    # the final schema is the one that checks the first dimension
+    shape_labels = reversed(split_parts)
+    shape_args = reversed(shape.prepared_args)
+    list_schema = None
+    for arg, label in zip(shape_args, shape_labels):
+        # which handler to use? for the first we use the actual type
+        # handler, everywhere else we use the prior list handler
+        if list_schema is None:
+            inner_schema = array_type_handler
+        else:
+            inner_schema = list_schema
+
+        # make a label annotation, if we have one
+        if label is not None:
+            metadata = {'name': label}
+        else:
+            metadata = None
+
+        # make the current level list schema, accounting for shape
+        if arg == '*':
+            list_schema = core_schema.list_schema(inner_schema,
+                                                  metadata=metadata)
+        else:
+            arg = int(arg)
+            list_schema = core_schema.list_schema(
+                inner_schema,
+                min_length=arg,
+                max_length=arg,
+                metadata=metadata
+            )
+    return list_schema

 class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
    """
@ -59,8 +102,12 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):

        shape, dtype = _source_type.__args__
        # get pydantic core schema for the given specified type
-        array_type_handler = _handler.generate_schema(
-            np_to_python[dtype])
+        if isinstance(dtype, nptyping.structure.StructureMeta):
+            raise NotImplementedError('Jonny finish this')
+            # functools.reduce(operator.or_, [int, float, str])
+        else:
+            array_type_handler = _handler.generate_schema(
+                np_to_python[dtype])

        def validate_dtype(value: np.ndarray) -> np.ndarray:
            if dtype is Any:
@ -82,43 +129,9 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
        if shape is Any:
            list_schema = core_schema.list_schema(core_schema.any_schema())
        else:
-            shape_parts = shape.__args__[0].split(',')
-            split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
+            list_schema = _list_of_lists_schema(shape, array_type_handler)


-            # Construct a list of list schema
-            # go in reverse order - construct list schemas such that
-            # the final schema is the one that checks the first dimension
-            shape_labels = reversed(split_parts)
-            shape_args = reversed(shape.prepared_args)
-            list_schema = None
-            for arg, label in zip(shape_args, shape_labels):
-                # which handler to use? for the first we use the actual type
-                # handler, everywhere else we use the prior list handler
-                if list_schema is None:
-                    inner_schema = array_type_handler
-                else:
-                    inner_schema = list_schema
-
-                # make a label annotation, if we have one
-                if label is not None:
-                    metadata = {'name': label}
-                else:
-                    metadata = None
-
-                # make the current level list schema, accounting for shape
-                if arg == '*':
-                    list_schema = core_schema.list_schema(inner_schema,
-                                                          metadata=metadata)
-                else:
-                    arg = int(arg)
-                    list_schema = core_schema.list_schema(
-                        inner_schema,
-                        min_length=arg,
-                        max_length=arg,
-                        metadata=metadata
-                    )
-

        def array_to_list(instance: np.ndarray | DaskArray) -> list|dict:
            if isinstance(instance, DaskArray):
--- a/nwb_linkml/tests/test_io/test_io_hdf5.py
+++ b/nwb_linkml/tests/test_io/test_io_hdf5.py
@ -11,7 +11,7 @@ from nwb_linkml.io.hdf5 import HDF5IO
 from nwb_linkml.io.hdf5 import truncate_file


-@pytest.mark.parametrize('dset', ['aibs.nwb'])
+@pytest.mark.parametrize('dset', ['aibs.nwb', 'aibs_ecephys.nwb'])
 def test_hdf_read(data_dir, dset):
    NWBFILE = data_dir / dset
    io = HDF5IO(path=NWBFILE)
--- a/nwb_linkml/tests/test_maps/test_dtype.py
+++ b/nwb_linkml/tests/test_maps/test_dtype.py
@ -0,0 +1,9 @@
+import numpy as np
+import nptyping
+from nwb_linkml.maps.dtype import struct_from_dtype
+
+def test_struct_from_dtype():
+    # Super weak test with fixed values, will expand with parameterize if needed
+    np_dtype = np.dtype([('name1', 'int32'), ('name2', 'object'), ('name3', 'str')])
+    struct = struct_from_dtype(np_dtype)
+    assert struct == nptyping.Structure['name1: Int32, name2: Object, name3: Unicode']