catchup with prior work

This commit is contained in:
sneakers-the-rat 2024-07-01 15:44:26 -07:00
parent 4ee97263ed
commit 4296b27538
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 99 additions and 52 deletions

View file

@ -87,6 +87,7 @@ napoleon_attr_annotations = True
# graphviz
graphviz_output_format = "svg"
# autodoc
autodoc_pydantic_model_show_json_error_strategy = 'coerce'
autodoc_pydantic_model_show_json = False
autodoc_mock_imports = []

View file

@ -35,7 +35,7 @@ from copy import deepcopy, copy
import warnings
import inspect
from nwb_linkml.maps import flat_to_npytyping
from nwb_linkml.maps import flat_to_nptyping
from linkml.generators import PydanticGenerator
from linkml_runtime.linkml_model.meta import (
Annotation,
@ -160,6 +160,7 @@ class ConfiguredBaseModel(BaseModel):
template += """{{ '\n\n' }}"""
for cls in extra_classes:
template += inspect.getsource(cls) + '\n\n'
### ENUMS ###
template += """
{% for e in enums.values() %}
@ -445,7 +446,7 @@ class NWBPydanticGenerator(PydanticGenerator):
# all dimensions should be the same dtype
try:
dtype = flat_to_npytyping[list(attrs.values())[0].range]
dtype = flat_to_nptyping[list(attrs.values())[0].range]
except KeyError as e: # pragma: no cover
warnings.warn(str(e))
range = list(attrs.values())[0].range

View file

@ -2,4 +2,4 @@
from nwb_linkml.maps.map import Map
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
from nwb_linkml.maps.quantity import QUANTITY_MAP
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_nptyping

View file

@ -1,6 +1,7 @@
import numpy as np
from typing import Any
from typing import Any, Type
from datetime import datetime
import nptyping
flat_to_linkml = {
"float" : "float",
@ -32,7 +33,7 @@ flat_to_linkml = {
Map between the flat data types and the simpler linkml base types
"""
flat_to_npytyping = {
flat_to_nptyping = {
"float": "Float",
"float32": "Float32",
"double": "Double",
@ -54,10 +55,13 @@ flat_to_npytyping = {
"utf": "Unicode",
"utf8": "Unicode",
"utf_8": "Unicode",
"string": "Unicode",
"str": "Unicode",
"ascii": "String",
"bool": "Bool",
"isodatetime": "Datetime64",
'AnyType': 'Any'
'AnyType': 'Any',
'object': 'Object'
}
np_to_python = {
@ -93,3 +97,17 @@ Following HDMF, it turns out that specifying precision actually specifies minimu
https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22
https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714
"""
def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]:
"""
Create a nptyping Structure from a compound numpy dtype
nptyping structures have the form::
Structure["name: Str, age: Int"]
"""
struct_pieces = [f'{k}: {flat_to_nptyping[v[0].name]}' for k, v in dtype.fields.items()]
struct_dtype = ', '.join(struct_pieces)
return nptyping.Structure[struct_dtype]

View file

@ -1,17 +1,19 @@
"""
Mapping functions for handling HDMF classes like DynamicTables
"""
import pdb
from typing import List, Type, Optional, Any
import warnings
import h5py
import nptyping
from pydantic import create_model, BaseModel
import numpy as np
from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
import dask.array as da
from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
from nwb_linkml.maps.dtype import flat_to_nptyping, struct_from_dtype
def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]:
"""
@ -21,10 +23,13 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
types = {}
for col in colnames:
nptype = group[col].dtype.type
if nptype == np.void:
warnings.warn(f"Cant handle numpy void type for column {col} in {group.name}")
continue
nptype = group[col].dtype
if nptype.type == np.void:
#pdb.set_trace()
nptype = struct_from_dtype(nptype)
else:
nptype = nptype.type
type_ = Optional[NDArray[Any, nptype]]
# FIXME: handling nested column types that appear only in some versions?

View file

@ -13,7 +13,10 @@ from typing import (
)
import sys
from copy import copy
from functools import reduce
from operator import or_
import nptyping.structure
from pydantic_core import core_schema
from pydantic import (
BaseModel,
@ -36,56 +39,13 @@ from nptyping.shape_expression import check_shape
from nwb_linkml.maps.dtype import np_to_python, allowed_precisions
class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
def _list_of_lists_schema(shape, array_type_handler):
"""
Kept here to allow for hooking into metaclass, which has
been necessary on and off as we work this class into a stable
state"""
class NDArray(NPTypingType, metaclass=NDArrayMeta):
Make a pydantic JSON schema for an array as a list of lists
"""
Following the example here: https://docs.pydantic.dev/latest/usage/types/custom/#handling-third-party-types
"""
__args__ = (Any, Any)
@classmethod
def __get_pydantic_core_schema__(
cls,
_source_type: 'NDArray',
_handler: Callable[[Any], core_schema.CoreSchema],
) -> core_schema.CoreSchema:
shape, dtype = _source_type.__args__
# get pydantic core schema for the given specified type
array_type_handler = _handler.generate_schema(
np_to_python[dtype])
def validate_dtype(value: np.ndarray) -> np.ndarray:
if dtype is Any:
return value
assert value.dtype == dtype or value.dtype.name in allowed_precisions[dtype.__name__], f"Invalid dtype! expected {dtype}, got {value.dtype}"
return value
def validate_shape(value: Any) -> np.ndarray:
assert shape is Any or check_shape(value.shape, shape), f'Invalid shape! expected shape {shape.prepared_args}, got shape {value.shape}'
return value
def coerce_list(value: Any) -> np.ndarray:
if isinstance(value, list):
value = np.array(value)
return value
# get the names of the shape constraints, if any
if shape is Any:
list_schema = core_schema.list_schema(core_schema.any_schema())
else:
shape_parts = shape.__args__[0].split(',')
split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
# Construct a list of list schema
# go in reverse order - construct list schemas such that
# the final schema is the one that checks the first dimension
@ -118,6 +78,59 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
max_length=arg,
metadata=metadata
)
return list_schema
class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
"""
Kept here to allow for hooking into metaclass, which has
been necessary on and off as we work this class into a stable
state"""
class NDArray(NPTypingType, metaclass=NDArrayMeta):
"""
Following the example here: https://docs.pydantic.dev/latest/usage/types/custom/#handling-third-party-types
"""
__args__ = (Any, Any)
@classmethod
def __get_pydantic_core_schema__(
cls,
_source_type: 'NDArray',
_handler: Callable[[Any], core_schema.CoreSchema],
) -> core_schema.CoreSchema:
shape, dtype = _source_type.__args__
# get pydantic core schema for the given specified type
if isinstance(dtype, nptyping.structure.StructureMeta):
raise NotImplementedError('Jonny finish this')
# functools.reduce(operator.or_, [int, float, str])
else:
array_type_handler = _handler.generate_schema(
np_to_python[dtype])
def validate_dtype(value: np.ndarray) -> np.ndarray:
if dtype is Any:
return value
assert value.dtype == dtype or value.dtype.name in allowed_precisions[dtype.__name__], f"Invalid dtype! expected {dtype}, got {value.dtype}"
return value
def validate_shape(value: Any) -> np.ndarray:
assert shape is Any or check_shape(value.shape, shape), f'Invalid shape! expected shape {shape.prepared_args}, got shape {value.shape}'
return value
def coerce_list(value: Any) -> np.ndarray:
if isinstance(value, list):
value = np.array(value)
return value
# get the names of the shape constraints, if any
if shape is Any:
list_schema = core_schema.list_schema(core_schema.any_schema())
else:
list_schema = _list_of_lists_schema(shape, array_type_handler)
def array_to_list(instance: np.ndarray | DaskArray) -> list|dict:

View file

@ -11,7 +11,7 @@ from nwb_linkml.io.hdf5 import HDF5IO
from nwb_linkml.io.hdf5 import truncate_file
@pytest.mark.parametrize('dset', ['aibs.nwb'])
@pytest.mark.parametrize('dset', ['aibs.nwb', 'aibs_ecephys.nwb'])
def test_hdf_read(data_dir, dset):
NWBFILE = data_dir / dset
io = HDF5IO(path=NWBFILE)

View file

@ -0,0 +1,9 @@
import numpy as np
import nptyping
from nwb_linkml.maps.dtype import struct_from_dtype
def test_struct_from_dtype():
# Super weak test with fixed values, will expand with parameterize if needed
np_dtype = np.dtype([('name1', 'int32'), ('name2', 'object'), ('name3', 'str')])
struct = struct_from_dtype(np_dtype)
assert struct == nptyping.Structure['name1: Int32, name2: Object, name3: Unicode']