catchup with prior work

This commit is contained in:
sneakers-the-rat 2024-07-01 15:44:26 -07:00
parent 4ee97263ed
commit 4296b27538
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 99 additions and 52 deletions

View file

@ -87,6 +87,7 @@ napoleon_attr_annotations = True
# graphviz # graphviz
graphviz_output_format = "svg" graphviz_output_format = "svg"
# autodoc
autodoc_pydantic_model_show_json_error_strategy = 'coerce' autodoc_pydantic_model_show_json_error_strategy = 'coerce'
autodoc_pydantic_model_show_json = False autodoc_pydantic_model_show_json = False
autodoc_mock_imports = [] autodoc_mock_imports = []

View file

@ -35,7 +35,7 @@ from copy import deepcopy, copy
import warnings import warnings
import inspect import inspect
from nwb_linkml.maps import flat_to_npytyping from nwb_linkml.maps import flat_to_nptyping
from linkml.generators import PydanticGenerator from linkml.generators import PydanticGenerator
from linkml_runtime.linkml_model.meta import ( from linkml_runtime.linkml_model.meta import (
Annotation, Annotation,
@ -160,6 +160,7 @@ class ConfiguredBaseModel(BaseModel):
template += """{{ '\n\n' }}""" template += """{{ '\n\n' }}"""
for cls in extra_classes: for cls in extra_classes:
template += inspect.getsource(cls) + '\n\n' template += inspect.getsource(cls) + '\n\n'
### ENUMS ### ### ENUMS ###
template += """ template += """
{% for e in enums.values() %} {% for e in enums.values() %}
@ -445,7 +446,7 @@ class NWBPydanticGenerator(PydanticGenerator):
# all dimensions should be the same dtype # all dimensions should be the same dtype
try: try:
dtype = flat_to_npytyping[list(attrs.values())[0].range] dtype = flat_to_nptyping[list(attrs.values())[0].range]
except KeyError as e: # pragma: no cover except KeyError as e: # pragma: no cover
warnings.warn(str(e)) warnings.warn(str(e))
range = list(attrs.values())[0].range range = list(attrs.values())[0].range

View file

@ -2,4 +2,4 @@
from nwb_linkml.maps.map import Map from nwb_linkml.maps.map import Map
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
from nwb_linkml.maps.quantity import QUANTITY_MAP from nwb_linkml.maps.quantity import QUANTITY_MAP
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_nptyping

View file

@ -1,6 +1,7 @@
import numpy as np import numpy as np
from typing import Any from typing import Any, Type
from datetime import datetime from datetime import datetime
import nptyping
flat_to_linkml = { flat_to_linkml = {
"float" : "float", "float" : "float",
@ -32,7 +33,7 @@ flat_to_linkml = {
Map between the flat data types and the simpler linkml base types Map between the flat data types and the simpler linkml base types
""" """
flat_to_npytyping = { flat_to_nptyping = {
"float": "Float", "float": "Float",
"float32": "Float32", "float32": "Float32",
"double": "Double", "double": "Double",
@ -54,10 +55,13 @@ flat_to_npytyping = {
"utf": "Unicode", "utf": "Unicode",
"utf8": "Unicode", "utf8": "Unicode",
"utf_8": "Unicode", "utf_8": "Unicode",
"string": "Unicode",
"str": "Unicode",
"ascii": "String", "ascii": "String",
"bool": "Bool", "bool": "Bool",
"isodatetime": "Datetime64", "isodatetime": "Datetime64",
'AnyType': 'Any' 'AnyType': 'Any',
'object': 'Object'
} }
np_to_python = { np_to_python = {
@ -92,4 +96,18 @@ allowed_precisions = {
Following HDMF, it turns out that specifying precision actually specifies minimum precision Following HDMF, it turns out that specifying precision actually specifies minimum precision
https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22
https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714 https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714
""" """
def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]:
"""
Create a nptyping Structure from a compound numpy dtype
nptyping structures have the form::
Structure["name: Str, age: Int"]
"""
struct_pieces = [f'{k}: {flat_to_nptyping[v[0].name]}' for k, v in dtype.fields.items()]
struct_dtype = ', '.join(struct_pieces)
return nptyping.Structure[struct_dtype]

View file

@ -1,17 +1,19 @@
""" """
Mapping functions for handling HDMF classes like DynamicTables Mapping functions for handling HDMF classes like DynamicTables
""" """
import pdb
from typing import List, Type, Optional, Any from typing import List, Type, Optional, Any
import warnings import warnings
import h5py import h5py
import nptyping
from pydantic import create_model, BaseModel from pydantic import create_model, BaseModel
import numpy as np import numpy as np
from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
import dask.array as da import dask.array as da
from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
from nwb_linkml.maps.dtype import flat_to_nptyping, struct_from_dtype
def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]: def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]:
""" """
@ -21,10 +23,13 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
types = {} types = {}
for col in colnames: for col in colnames:
nptype = group[col].dtype.type nptype = group[col].dtype
if nptype == np.void: if nptype.type == np.void:
warnings.warn(f"Cant handle numpy void type for column {col} in {group.name}") #pdb.set_trace()
continue nptype = struct_from_dtype(nptype)
else:
nptype = nptype.type
type_ = Optional[NDArray[Any, nptype]] type_ = Optional[NDArray[Any, nptype]]
# FIXME: handling nested column types that appear only in some versions? # FIXME: handling nested column types that appear only in some versions?

View file

@ -13,7 +13,10 @@ from typing import (
) )
import sys import sys
from copy import copy from copy import copy
from functools import reduce
from operator import or_
import nptyping.structure
from pydantic_core import core_schema from pydantic_core import core_schema
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
@ -36,6 +39,46 @@ from nptyping.shape_expression import check_shape
from nwb_linkml.maps.dtype import np_to_python, allowed_precisions from nwb_linkml.maps.dtype import np_to_python, allowed_precisions
def _list_of_lists_schema(shape, array_type_handler):
"""
Make a pydantic JSON schema for an array as a list of lists
"""
shape_parts = shape.__args__[0].split(',')
split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
# Construct a list of list schema
# go in reverse order - construct list schemas such that
# the final schema is the one that checks the first dimension
shape_labels = reversed(split_parts)
shape_args = reversed(shape.prepared_args)
list_schema = None
for arg, label in zip(shape_args, shape_labels):
# which handler to use? for the first we use the actual type
# handler, everywhere else we use the prior list handler
if list_schema is None:
inner_schema = array_type_handler
else:
inner_schema = list_schema
# make a label annotation, if we have one
if label is not None:
metadata = {'name': label}
else:
metadata = None
# make the current level list schema, accounting for shape
if arg == '*':
list_schema = core_schema.list_schema(inner_schema,
metadata=metadata)
else:
arg = int(arg)
list_schema = core_schema.list_schema(
inner_schema,
min_length=arg,
max_length=arg,
metadata=metadata
)
return list_schema
class NDArrayMeta(_NDArrayMeta, implementation="NDArray"): class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
""" """
@ -59,8 +102,12 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
shape, dtype = _source_type.__args__ shape, dtype = _source_type.__args__
# get pydantic core schema for the given specified type # get pydantic core schema for the given specified type
array_type_handler = _handler.generate_schema( if isinstance(dtype, nptyping.structure.StructureMeta):
np_to_python[dtype]) raise NotImplementedError('Jonny finish this')
# functools.reduce(operator.or_, [int, float, str])
else:
array_type_handler = _handler.generate_schema(
np_to_python[dtype])
def validate_dtype(value: np.ndarray) -> np.ndarray: def validate_dtype(value: np.ndarray) -> np.ndarray:
if dtype is Any: if dtype is Any:
@ -82,43 +129,9 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
if shape is Any: if shape is Any:
list_schema = core_schema.list_schema(core_schema.any_schema()) list_schema = core_schema.list_schema(core_schema.any_schema())
else: else:
shape_parts = shape.__args__[0].split(',') list_schema = _list_of_lists_schema(shape, array_type_handler)
split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
# Construct a list of list schema
# go in reverse order - construct list schemas such that
# the final schema is the one that checks the first dimension
shape_labels = reversed(split_parts)
shape_args = reversed(shape.prepared_args)
list_schema = None
for arg, label in zip(shape_args, shape_labels):
# which handler to use? for the first we use the actual type
# handler, everywhere else we use the prior list handler
if list_schema is None:
inner_schema = array_type_handler
else:
inner_schema = list_schema
# make a label annotation, if we have one
if label is not None:
metadata = {'name': label}
else:
metadata = None
# make the current level list schema, accounting for shape
if arg == '*':
list_schema = core_schema.list_schema(inner_schema,
metadata=metadata)
else:
arg = int(arg)
list_schema = core_schema.list_schema(
inner_schema,
min_length=arg,
max_length=arg,
metadata=metadata
)
def array_to_list(instance: np.ndarray | DaskArray) -> list|dict: def array_to_list(instance: np.ndarray | DaskArray) -> list|dict:
if isinstance(instance, DaskArray): if isinstance(instance, DaskArray):

View file

@ -11,7 +11,7 @@ from nwb_linkml.io.hdf5 import HDF5IO
from nwb_linkml.io.hdf5 import truncate_file from nwb_linkml.io.hdf5 import truncate_file
@pytest.mark.parametrize('dset', ['aibs.nwb']) @pytest.mark.parametrize('dset', ['aibs.nwb', 'aibs_ecephys.nwb'])
def test_hdf_read(data_dir, dset): def test_hdf_read(data_dir, dset):
NWBFILE = data_dir / dset NWBFILE = data_dir / dset
io = HDF5IO(path=NWBFILE) io = HDF5IO(path=NWBFILE)

View file

@ -0,0 +1,9 @@
import numpy as np
import nptyping
from nwb_linkml.maps.dtype import struct_from_dtype
def test_struct_from_dtype():
# Super weak test with fixed values, will expand with parameterize if needed
np_dtype = np.dtype([('name1', 'int32'), ('name2', 'object'), ('name3', 'str')])
struct = struct_from_dtype(np_dtype)
assert struct == nptyping.Structure['name1: Int32, name2: Object, name3: Unicode']