diff --git a/docs/conf.py b/docs/conf.py index f46e24d..a236926 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -87,6 +87,7 @@ napoleon_attr_annotations = True # graphviz graphviz_output_format = "svg" +# autodoc autodoc_pydantic_model_show_json_error_strategy = 'coerce' autodoc_pydantic_model_show_json = False autodoc_mock_imports = [] diff --git a/nwb_linkml/src/nwb_linkml/generators/pydantic.py b/nwb_linkml/src/nwb_linkml/generators/pydantic.py index 2fd1d8e..30d8bef 100644 --- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py +++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py @@ -35,7 +35,7 @@ from copy import deepcopy, copy import warnings import inspect -from nwb_linkml.maps import flat_to_npytyping +from nwb_linkml.maps import flat_to_nptyping from linkml.generators import PydanticGenerator from linkml_runtime.linkml_model.meta import ( Annotation, @@ -160,6 +160,7 @@ class ConfiguredBaseModel(BaseModel): template += """{{ '\n\n' }}""" for cls in extra_classes: template += inspect.getsource(cls) + '\n\n' + ### ENUMS ### template += """ {% for e in enums.values() %} @@ -445,7 +446,7 @@ class NWBPydanticGenerator(PydanticGenerator): # all dimensions should be the same dtype try: - dtype = flat_to_npytyping[list(attrs.values())[0].range] + dtype = flat_to_nptyping[list(attrs.values())[0].range] except KeyError as e: # pragma: no cover warnings.warn(str(e)) range = list(attrs.values())[0].range diff --git a/nwb_linkml/src/nwb_linkml/maps/__init__.py b/nwb_linkml/src/nwb_linkml/maps/__init__.py index cf09679..059ea2a 100644 --- a/nwb_linkml/src/nwb_linkml/maps/__init__.py +++ b/nwb_linkml/src/nwb_linkml/maps/__init__.py @@ -2,4 +2,4 @@ from nwb_linkml.maps.map import Map from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC from nwb_linkml.maps.quantity import QUANTITY_MAP -from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping \ No newline at end of file +from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_nptyping \ No newline at end of file diff --git a/nwb_linkml/src/nwb_linkml/maps/dtype.py b/nwb_linkml/src/nwb_linkml/maps/dtype.py index 64abf14..c8e50b6 100644 --- a/nwb_linkml/src/nwb_linkml/maps/dtype.py +++ b/nwb_linkml/src/nwb_linkml/maps/dtype.py @@ -1,6 +1,7 @@ import numpy as np -from typing import Any +from typing import Any, Type from datetime import datetime +import nptyping flat_to_linkml = { "float" : "float", @@ -32,7 +33,7 @@ flat_to_linkml = { Map between the flat data types and the simpler linkml base types """ -flat_to_npytyping = { +flat_to_nptyping = { "float": "Float", "float32": "Float32", "double": "Double", @@ -54,10 +55,13 @@ flat_to_npytyping = { "utf": "Unicode", "utf8": "Unicode", "utf_8": "Unicode", + "string": "Unicode", + "str": "Unicode", "ascii": "String", "bool": "Bool", "isodatetime": "Datetime64", - 'AnyType': 'Any' + 'AnyType': 'Any', + 'object': 'Object' } np_to_python = { @@ -92,4 +96,18 @@ allowed_precisions = { Following HDMF, it turns out that specifying precision actually specifies minimum precision https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714 -""" \ No newline at end of file +""" + + +def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]: + """ + Create a nptyping Structure from a compound numpy dtype + + nptyping structures have the form:: + + Structure["name: Str, age: Int"] + + """ + struct_pieces = [f'{k}: {flat_to_nptyping[v[0].name]}' for k, v in dtype.fields.items()] + struct_dtype = ', '.join(struct_pieces) + return nptyping.Structure[struct_dtype] \ No newline at end of file diff --git a/nwb_linkml/src/nwb_linkml/maps/hdmf.py b/nwb_linkml/src/nwb_linkml/maps/hdmf.py index 56e29a8..eb62062 100644 --- a/nwb_linkml/src/nwb_linkml/maps/hdmf.py +++ b/nwb_linkml/src/nwb_linkml/maps/hdmf.py @@ -1,17 +1,19 @@ """ Mapping functions for handling HDMF classes like DynamicTables """ +import pdb from typing import List, Type, Optional, Any import warnings - import h5py +import nptyping from pydantic import create_model, BaseModel import numpy as np -from nwb_linkml.types.hdf5 import HDF5_Path -from nwb_linkml.types.ndarray import NDArray, NDArrayProxy import dask.array as da +from nwb_linkml.types.hdf5 import HDF5_Path +from nwb_linkml.types.ndarray import NDArray, NDArrayProxy +from nwb_linkml.maps.dtype import flat_to_nptyping, struct_from_dtype def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]: """ @@ -21,10 +23,13 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) - types = {} for col in colnames: - nptype = group[col].dtype.type - if nptype == np.void: - warnings.warn(f"Cant handle numpy void type for column {col} in {group.name}") - continue + nptype = group[col].dtype + if nptype.type == np.void: + #pdb.set_trace() + nptype = struct_from_dtype(nptype) + else: + nptype = nptype.type + type_ = Optional[NDArray[Any, nptype]] # FIXME: handling nested column types that appear only in some versions? diff --git a/nwb_linkml/src/nwb_linkml/types/ndarray.py b/nwb_linkml/src/nwb_linkml/types/ndarray.py index 612b5d3..9c3e0cc 100644 --- a/nwb_linkml/src/nwb_linkml/types/ndarray.py +++ b/nwb_linkml/src/nwb_linkml/types/ndarray.py @@ -13,7 +13,10 @@ from typing import ( ) import sys from copy import copy +from functools import reduce +from operator import or_ +import nptyping.structure from pydantic_core import core_schema from pydantic import ( BaseModel, @@ -36,6 +39,46 @@ from nptyping.shape_expression import check_shape from nwb_linkml.maps.dtype import np_to_python, allowed_precisions +def _list_of_lists_schema(shape, array_type_handler): + """ + Make a pydantic JSON schema for an array as a list of lists + """ + shape_parts = shape.__args__[0].split(',') + split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts] + + # Construct a list of list schema + # go in reverse order - construct list schemas such that + # the final schema is the one that checks the first dimension + shape_labels = reversed(split_parts) + shape_args = reversed(shape.prepared_args) + list_schema = None + for arg, label in zip(shape_args, shape_labels): + # which handler to use? for the first we use the actual type + # handler, everywhere else we use the prior list handler + if list_schema is None: + inner_schema = array_type_handler + else: + inner_schema = list_schema + + # make a label annotation, if we have one + if label is not None: + metadata = {'name': label} + else: + metadata = None + + # make the current level list schema, accounting for shape + if arg == '*': + list_schema = core_schema.list_schema(inner_schema, + metadata=metadata) + else: + arg = int(arg) + list_schema = core_schema.list_schema( + inner_schema, + min_length=arg, + max_length=arg, + metadata=metadata + ) + return list_schema class NDArrayMeta(_NDArrayMeta, implementation="NDArray"): """ @@ -59,8 +102,12 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta): shape, dtype = _source_type.__args__ # get pydantic core schema for the given specified type - array_type_handler = _handler.generate_schema( - np_to_python[dtype]) + if isinstance(dtype, nptyping.structure.StructureMeta): + raise NotImplementedError('Jonny finish this') + # functools.reduce(operator.or_, [int, float, str]) + else: + array_type_handler = _handler.generate_schema( + np_to_python[dtype]) def validate_dtype(value: np.ndarray) -> np.ndarray: if dtype is Any: @@ -82,43 +129,9 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta): if shape is Any: list_schema = core_schema.list_schema(core_schema.any_schema()) else: - shape_parts = shape.__args__[0].split(',') - split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts] + list_schema = _list_of_lists_schema(shape, array_type_handler) - # Construct a list of list schema - # go in reverse order - construct list schemas such that - # the final schema is the one that checks the first dimension - shape_labels = reversed(split_parts) - shape_args = reversed(shape.prepared_args) - list_schema = None - for arg, label in zip(shape_args, shape_labels): - # which handler to use? for the first we use the actual type - # handler, everywhere else we use the prior list handler - if list_schema is None: - inner_schema = array_type_handler - else: - inner_schema = list_schema - - # make a label annotation, if we have one - if label is not None: - metadata = {'name': label} - else: - metadata = None - - # make the current level list schema, accounting for shape - if arg == '*': - list_schema = core_schema.list_schema(inner_schema, - metadata=metadata) - else: - arg = int(arg) - list_schema = core_schema.list_schema( - inner_schema, - min_length=arg, - max_length=arg, - metadata=metadata - ) - def array_to_list(instance: np.ndarray | DaskArray) -> list|dict: if isinstance(instance, DaskArray): diff --git a/nwb_linkml/tests/test_io/test_io_hdf5.py b/nwb_linkml/tests/test_io/test_io_hdf5.py index 993b93a..c201aeb 100644 --- a/nwb_linkml/tests/test_io/test_io_hdf5.py +++ b/nwb_linkml/tests/test_io/test_io_hdf5.py @@ -11,7 +11,7 @@ from nwb_linkml.io.hdf5 import HDF5IO from nwb_linkml.io.hdf5 import truncate_file -@pytest.mark.parametrize('dset', ['aibs.nwb']) +@pytest.mark.parametrize('dset', ['aibs.nwb', 'aibs_ecephys.nwb']) def test_hdf_read(data_dir, dset): NWBFILE = data_dir / dset io = HDF5IO(path=NWBFILE) diff --git a/nwb_linkml/tests/test_maps/test_dtype.py b/nwb_linkml/tests/test_maps/test_dtype.py new file mode 100644 index 0000000..8d501e3 --- /dev/null +++ b/nwb_linkml/tests/test_maps/test_dtype.py @@ -0,0 +1,9 @@ +import numpy as np +import nptyping +from nwb_linkml.maps.dtype import struct_from_dtype + +def test_struct_from_dtype(): + # Super weak test with fixed values, will expand with parameterize if needed + np_dtype = np.dtype([('name1', 'int32'), ('name2', 'object'), ('name3', 'str')]) + struct = struct_from_dtype(np_dtype) + assert struct == nptyping.Structure['name1: Int32, name2: Object, name3: Unicode'] \ No newline at end of file