catchup with prior work

2025-01-09 21:54:27 +00:00 · 2024-07-01 15:44:26 -07:00 · 2024-07-01 15:44:26 -07:00 · 4296b27538
commit 4296b27538
parent 4ee97263ed
8 changed files with 99 additions and 52 deletions
--- a/docs/conf.py
+++ b/docs/conf.py
@ -87,6 +87,7 @@ napoleon_attr_annotations = True
 # graphviz
 graphviz_output_format = "svg"
 # autodoc
 autodoc_pydantic_model_show_json_error_strategy = 'coerce'
 autodoc_pydantic_model_show_json = False
 autodoc_mock_imports = []
--- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py
+++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py
@ -35,7 +35,7 @@ from copy import deepcopy, copy
 import warnings
 import inspect
-from nwb_linkml.maps import flat_to_npytyping
+from nwb_linkml.maps import flat_to_nptyping
 from linkml.generators import PydanticGenerator
 from linkml_runtime.linkml_model.meta import (
    Annotation,
@ -160,6 +160,7 @@ class ConfiguredBaseModel(BaseModel):
        template += """{{ '\n\n' }}"""
        for cls in extra_classes:
            template += inspect.getsource(cls) + '\n\n'
    ### ENUMS ###
    template += """
 {% for e in enums.values() %}
@ -445,7 +446,7 @@ class NWBPydanticGenerator(PydanticGenerator):
        # all dimensions should be the same dtype
        try:
-            dtype = flat_to_npytyping[list(attrs.values())[0].range]
+            dtype = flat_to_nptyping[list(attrs.values())[0].range]
        except KeyError as e: # pragma: no cover
            warnings.warn(str(e))
            range = list(attrs.values())[0].range
--- a/nwb_linkml/src/nwb_linkml/maps/init.py
+++ b/nwb_linkml/src/nwb_linkml/maps/init.py
@ -2,4 +2,4 @@
 from nwb_linkml.maps.map import Map
 from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
 from nwb_linkml.maps.quantity import QUANTITY_MAP
-from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping
+from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_nptyping
--- a/nwb_linkml/src/nwb_linkml/maps/dtype.py
+++ b/nwb_linkml/src/nwb_linkml/maps/dtype.py
@ -1,6 +1,7 @@
 import numpy as np
-from typing import Any
+from typing import Any, Type
 from datetime import datetime
 import nptyping
 flat_to_linkml = {
    "float" : "float",
@ -32,7 +33,7 @@ flat_to_linkml = {
 Map between the flat data types and the simpler linkml base types
 """
-flat_to_npytyping = {
+flat_to_nptyping = {
    "float": "Float",
    "float32": "Float32",
    "double": "Double",
@ -54,10 +55,13 @@ flat_to_npytyping = {
    "utf": "Unicode",
    "utf8": "Unicode",
    "utf_8": "Unicode",
    "string": "Unicode",
    "str": "Unicode",
    "ascii": "String",
    "bool": "Bool",
    "isodatetime": "Datetime64",
-    'AnyType': 'Any'
+    'AnyType': 'Any',
    'object': 'Object'
 }
 np_to_python = {
@ -92,4 +96,18 @@ allowed_precisions = {
 Following HDMF, it turns out that specifying precision actually specifies minimum precision
 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22
 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714
-"""
+"""
 def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]:
    """
    Create a nptyping Structure from a compound numpy dtype
    nptyping structures have the form::
        Structure["name: Str, age: Int"]
    """
    struct_pieces = [f'{k}: {flat_to_nptyping[v[0].name]}' for k, v in dtype.fields.items()]
    struct_dtype = ', '.join(struct_pieces)
    return nptyping.Structure[struct_dtype]
--- a/nwb_linkml/src/nwb_linkml/maps/hdmf.py
+++ b/nwb_linkml/src/nwb_linkml/maps/hdmf.py
@ -1,17 +1,19 @@
 """
 Mapping functions for handling HDMF classes like DynamicTables
 """
 import pdb
 from typing import List, Type, Optional, Any
 import warnings
 import h5py
 import nptyping
 from pydantic import create_model, BaseModel
 import numpy as np
 from nwb_linkml.types.hdf5 import HDF5_Path
 from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
 import dask.array as da
 from nwb_linkml.types.hdf5 import HDF5_Path
 from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
 from nwb_linkml.maps.dtype import flat_to_nptyping, struct_from_dtype
 def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]:
    """
@ -21,10 +23,13 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
    types = {}
    for col in colnames:
-        nptype = group[col].dtype.type
+        nptype = group[col].dtype
-        if nptype == np.void:
+        if nptype.type == np.void:
-            warnings.warn(f"Cant handle numpy void type for column {col} in {group.name}")
+            #pdb.set_trace()
-            continue
+            nptype = struct_from_dtype(nptype)
        else:
            nptype = nptype.type
        type_ = Optional[NDArray[Any, nptype]]
        # FIXME: handling nested column types that appear only in some versions?
--- a/nwb_linkml/src/nwb_linkml/types/ndarray.py
+++ b/nwb_linkml/src/nwb_linkml/types/ndarray.py
@ -13,7 +13,10 @@ from typing import (
 )
 import sys
 from copy import copy
 from functools import reduce
 from operator import or_
 import nptyping.structure
 from pydantic_core import core_schema
 from pydantic import (
    BaseModel,
@ -36,6 +39,46 @@ from nptyping.shape_expression import check_shape
 from nwb_linkml.maps.dtype import np_to_python, allowed_precisions
 def _list_of_lists_schema(shape, array_type_handler):
    """
    Make a pydantic JSON schema for an array as a list of lists
    """
    shape_parts = shape.__args__[0].split(',')
    split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
    # Construct a list of list schema
    # go in reverse order - construct list schemas such that
    # the final schema is the one that checks the first dimension
    shape_labels = reversed(split_parts)
    shape_args = reversed(shape.prepared_args)
    list_schema = None
    for arg, label in zip(shape_args, shape_labels):
        # which handler to use? for the first we use the actual type
        # handler, everywhere else we use the prior list handler
        if list_schema is None:
            inner_schema = array_type_handler
        else:
            inner_schema = list_schema
        # make a label annotation, if we have one
        if label is not None:
            metadata = {'name': label}
        else:
            metadata = None
        # make the current level list schema, accounting for shape
        if arg == '*':
            list_schema = core_schema.list_schema(inner_schema,
                                                  metadata=metadata)
        else:
            arg = int(arg)
            list_schema = core_schema.list_schema(
                inner_schema,
                min_length=arg,
                max_length=arg,
                metadata=metadata
            )
    return list_schema
 class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
    """
@ -59,8 +102,12 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
        shape, dtype = _source_type.__args__
        # get pydantic core schema for the given specified type
-        array_type_handler = _handler.generate_schema(
+        if isinstance(dtype, nptyping.structure.StructureMeta):
-            np_to_python[dtype])
+            raise NotImplementedError('Jonny finish this')
            # functools.reduce(operator.or_, [int, float, str])
        else:
            array_type_handler = _handler.generate_schema(
                np_to_python[dtype])
        def validate_dtype(value: np.ndarray) -> np.ndarray:
            if dtype is Any:
@ -82,43 +129,9 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
        if shape is Any:
            list_schema = core_schema.list_schema(core_schema.any_schema())
        else:
-            shape_parts = shape.__args__[0].split(',')
+            list_schema = _list_of_lists_schema(shape, array_type_handler)
            split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
            # Construct a list of list schema
            # go in reverse order - construct list schemas such that
            # the final schema is the one that checks the first dimension
            shape_labels = reversed(split_parts)
            shape_args = reversed(shape.prepared_args)
            list_schema = None
            for arg, label in zip(shape_args, shape_labels):
                # which handler to use? for the first we use the actual type
                # handler, everywhere else we use the prior list handler
                if list_schema is None:
                    inner_schema = array_type_handler
                else:
                    inner_schema = list_schema
                # make a label annotation, if we have one
                if label is not None:
                    metadata = {'name': label}
                else:
                    metadata = None
                # make the current level list schema, accounting for shape
                if arg == '*':
                    list_schema = core_schema.list_schema(inner_schema,
                                                          metadata=metadata)
                else:
                    arg = int(arg)
                    list_schema = core_schema.list_schema(
                        inner_schema,
                        min_length=arg,
                        max_length=arg,
                        metadata=metadata
                    )
        def array_to_list(instance: np.ndarray | DaskArray) -> list|dict:
            if isinstance(instance, DaskArray):
--- a/nwb_linkml/tests/test_io/test_io_hdf5.py
+++ b/nwb_linkml/tests/test_io/test_io_hdf5.py
@ -11,7 +11,7 @@ from nwb_linkml.io.hdf5 import HDF5IO
 from nwb_linkml.io.hdf5 import truncate_file
-@pytest.mark.parametrize('dset', ['aibs.nwb'])
+@pytest.mark.parametrize('dset', ['aibs.nwb', 'aibs_ecephys.nwb'])
 def test_hdf_read(data_dir, dset):
    NWBFILE = data_dir / dset
    io = HDF5IO(path=NWBFILE)
--- a/nwb_linkml/tests/test_maps/test_dtype.py
+++ b/nwb_linkml/tests/test_maps/test_dtype.py
@ -0,0 +1,9 @@
 import numpy as np
 import nptyping
 from nwb_linkml.maps.dtype import struct_from_dtype
 def test_struct_from_dtype():
    # Super weak test with fixed values, will expand with parameterize if needed
    np_dtype = np.dtype([('name1', 'int32'), ('name2', 'object'), ('name3', 'str')])
    struct = struct_from_dtype(np_dtype)
    assert struct == nptyping.Structure['name1: Int32, name2: Object, name3: Unicode']