catchup with prior work

This commit is contained in:
sneakers-the-rat 2024-07-01 15:44:26 -07:00
parent 4ee97263ed
commit 4296b27538
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 99 additions and 52 deletions

View file

@ -87,6 +87,7 @@ napoleon_attr_annotations = True
# graphviz
graphviz_output_format = "svg"
# autodoc
autodoc_pydantic_model_show_json_error_strategy = 'coerce'
autodoc_pydantic_model_show_json = False
autodoc_mock_imports = []

View file

@ -35,7 +35,7 @@ from copy import deepcopy, copy
import warnings
import inspect
from nwb_linkml.maps import flat_to_npytyping
from nwb_linkml.maps import flat_to_nptyping
from linkml.generators import PydanticGenerator
from linkml_runtime.linkml_model.meta import (
Annotation,
@ -160,6 +160,7 @@ class ConfiguredBaseModel(BaseModel):
template += """{{ '\n\n' }}"""
for cls in extra_classes:
template += inspect.getsource(cls) + '\n\n'
### ENUMS ###
template += """
{% for e in enums.values() %}
@ -445,7 +446,7 @@ class NWBPydanticGenerator(PydanticGenerator):
# all dimensions should be the same dtype
try:
dtype = flat_to_npytyping[list(attrs.values())[0].range]
dtype = flat_to_nptyping[list(attrs.values())[0].range]
except KeyError as e: # pragma: no cover
warnings.warn(str(e))
range = list(attrs.values())[0].range

View file

@ -2,4 +2,4 @@
from nwb_linkml.maps.map import Map
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
from nwb_linkml.maps.quantity import QUANTITY_MAP
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_nptyping

View file

@ -1,6 +1,7 @@
import numpy as np
from typing import Any
from typing import Any, Type
from datetime import datetime
import nptyping
flat_to_linkml = {
"float" : "float",
@ -32,7 +33,7 @@ flat_to_linkml = {
Map between the flat data types and the simpler linkml base types
"""
flat_to_npytyping = {
flat_to_nptyping = {
"float": "Float",
"float32": "Float32",
"double": "Double",
@ -54,10 +55,13 @@ flat_to_npytyping = {
"utf": "Unicode",
"utf8": "Unicode",
"utf_8": "Unicode",
"string": "Unicode",
"str": "Unicode",
"ascii": "String",
"bool": "Bool",
"isodatetime": "Datetime64",
'AnyType': 'Any'
'AnyType': 'Any',
'object': 'Object'
}
np_to_python = {
@ -93,3 +97,17 @@ Following HDMF, it turns out that specifying precision actually specifies minimu
https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/validate/validator.py#L22
https://github.com/hdmf-dev/hdmf/blob/ddc842b5c81d96e0b957b96e88533b16c137e206/src/hdmf/spec/spec.py#L694-L714
"""
def struct_from_dtype(dtype: np.dtype) -> Type[nptyping.Structure]:
"""
Create a nptyping Structure from a compound numpy dtype
nptyping structures have the form::
Structure["name: Str, age: Int"]
"""
struct_pieces = [f'{k}: {flat_to_nptyping[v[0].name]}' for k, v in dtype.fields.items()]
struct_dtype = ', '.join(struct_pieces)
return nptyping.Structure[struct_dtype]

View file

@ -1,17 +1,19 @@
"""
Mapping functions for handling HDMF classes like DynamicTables
"""
import pdb
from typing import List, Type, Optional, Any
import warnings
import h5py
import nptyping
from pydantic import create_model, BaseModel
import numpy as np
from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
import dask.array as da
from nwb_linkml.types.hdf5 import HDF5_Path
from nwb_linkml.types.ndarray import NDArray, NDArrayProxy
from nwb_linkml.maps.dtype import flat_to_nptyping, struct_from_dtype
def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -> Type[BaseModel]:
"""
@ -21,10 +23,13 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
types = {}
for col in colnames:
nptype = group[col].dtype.type
if nptype == np.void:
warnings.warn(f"Cant handle numpy void type for column {col} in {group.name}")
continue
nptype = group[col].dtype
if nptype.type == np.void:
#pdb.set_trace()
nptype = struct_from_dtype(nptype)
else:
nptype = nptype.type
type_ = Optional[NDArray[Any, nptype]]
# FIXME: handling nested column types that appear only in some versions?

View file

@ -13,7 +13,10 @@ from typing import (
)
import sys
from copy import copy
from functools import reduce
from operator import or_
import nptyping.structure
from pydantic_core import core_schema
from pydantic import (
BaseModel,
@ -36,6 +39,46 @@ from nptyping.shape_expression import check_shape
from nwb_linkml.maps.dtype import np_to_python, allowed_precisions
def _list_of_lists_schema(shape, array_type_handler):
"""
Make a pydantic JSON schema for an array as a list of lists
"""
shape_parts = shape.__args__[0].split(',')
split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
# Construct a list of list schema
# go in reverse order - construct list schemas such that
# the final schema is the one that checks the first dimension
shape_labels = reversed(split_parts)
shape_args = reversed(shape.prepared_args)
list_schema = None
for arg, label in zip(shape_args, shape_labels):
# which handler to use? for the first we use the actual type
# handler, everywhere else we use the prior list handler
if list_schema is None:
inner_schema = array_type_handler
else:
inner_schema = list_schema
# make a label annotation, if we have one
if label is not None:
metadata = {'name': label}
else:
metadata = None
# make the current level list schema, accounting for shape
if arg == '*':
list_schema = core_schema.list_schema(inner_schema,
metadata=metadata)
else:
arg = int(arg)
list_schema = core_schema.list_schema(
inner_schema,
min_length=arg,
max_length=arg,
metadata=metadata
)
return list_schema
class NDArrayMeta(_NDArrayMeta, implementation="NDArray"):
"""
@ -59,8 +102,12 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
shape, dtype = _source_type.__args__
# get pydantic core schema for the given specified type
array_type_handler = _handler.generate_schema(
np_to_python[dtype])
if isinstance(dtype, nptyping.structure.StructureMeta):
raise NotImplementedError('Jonny finish this')
# functools.reduce(operator.or_, [int, float, str])
else:
array_type_handler = _handler.generate_schema(
np_to_python[dtype])
def validate_dtype(value: np.ndarray) -> np.ndarray:
if dtype is Any:
@ -82,43 +129,9 @@ class NDArray(NPTypingType, metaclass=NDArrayMeta):
if shape is Any:
list_schema = core_schema.list_schema(core_schema.any_schema())
else:
shape_parts = shape.__args__[0].split(',')
split_parts = [p.split(' ')[1] if len(p.split(' ')) == 2 else None for p in shape_parts]
list_schema = _list_of_lists_schema(shape, array_type_handler)
# Construct a list of list schema
# go in reverse order - construct list schemas such that
# the final schema is the one that checks the first dimension
shape_labels = reversed(split_parts)
shape_args = reversed(shape.prepared_args)
list_schema = None
for arg, label in zip(shape_args, shape_labels):
# which handler to use? for the first we use the actual type
# handler, everywhere else we use the prior list handler
if list_schema is None:
inner_schema = array_type_handler
else:
inner_schema = list_schema
# make a label annotation, if we have one
if label is not None:
metadata = {'name': label}
else:
metadata = None
# make the current level list schema, accounting for shape
if arg == '*':
list_schema = core_schema.list_schema(inner_schema,
metadata=metadata)
else:
arg = int(arg)
list_schema = core_schema.list_schema(
inner_schema,
min_length=arg,
max_length=arg,
metadata=metadata
)
def array_to_list(instance: np.ndarray | DaskArray) -> list|dict:
if isinstance(instance, DaskArray):

View file

@ -11,7 +11,7 @@ from nwb_linkml.io.hdf5 import HDF5IO
from nwb_linkml.io.hdf5 import truncate_file
@pytest.mark.parametrize('dset', ['aibs.nwb'])
@pytest.mark.parametrize('dset', ['aibs.nwb', 'aibs_ecephys.nwb'])
def test_hdf_read(data_dir, dset):
NWBFILE = data_dir / dset
io = HDF5IO(path=NWBFILE)

View file

@ -0,0 +1,9 @@
import numpy as np
import nptyping
from nwb_linkml.maps.dtype import struct_from_dtype
def test_struct_from_dtype():
# Super weak test with fixed values, will expand with parameterize if needed
np_dtype = np.dtype([('name1', 'int32'), ('name2', 'object'), ('name3', 'str')])
struct = struct_from_dtype(np_dtype)
assert struct == nptyping.Structure['name1: Int32, name2: Object, name3: Unicode']