fix array casting for dtypes that have a shape attr but nothing in it

This commit is contained in:
sneakers-the-rat 2024-08-06 20:41:00 -07:00
parent edea802ff1
commit 3ee7c68e15
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
3 changed files with 61 additions and 90 deletions

View file

@ -23,7 +23,6 @@ from pydantic import BaseModel, ConfigDict, Field
from nwb_linkml.annotations import unwrap_optional from nwb_linkml.annotations import unwrap_optional
from nwb_linkml.maps import Map from nwb_linkml.maps import Map
from nwb_linkml.maps.hdmf import dynamictable_to_model
from nwb_linkml.types.hdf5 import HDF5_Path from nwb_linkml.types.hdf5 import HDF5_Path
if sys.version_info.minor >= 11: if sys.version_info.minor >= 11:
@ -234,63 +233,64 @@ class PruneEmpty(HDF5Map):
return H5ReadResult.model_construct(path=src.path, source=src, completed=True) return H5ReadResult.model_construct(path=src.path, source=src, completed=True)
class ResolveDynamicTable(HDF5Map): #
""" # class ResolveDynamicTable(HDF5Map):
Handle loading a dynamic table! # """
# Handle loading a dynamic table!
Dynamic tables are sort of odd in that their models don't include their fields #
(except as a list of strings in ``colnames`` ), # Dynamic tables are sort of odd in that their models don't include their fields
so we need to create a new model that includes fields for each column, # (except as a list of strings in ``colnames`` ),
and then we include the datasets as :class:`~numpydantic.interface.hdf5.H5ArrayPath` # so we need to create a new model that includes fields for each column,
objects which lazy load the arrays in a thread/process safe way. # and then we include the datasets as :class:`~numpydantic.interface.hdf5.H5ArrayPath`
# objects which lazy load the arrays in a thread/process safe way.
This map also resolves the child elements, #
indicating so by the ``completes`` field in the :class:`.ReadResult` # This map also resolves the child elements,
""" # indicating so by the ``completes`` field in the :class:`.ReadResult`
# """
phase = ReadPhases.read #
priority = 1 # phase = ReadPhases.read
# priority = 1
@classmethod #
def check( # @classmethod
cls, src: H5SourceItem, provider: "SchemaProvider", completed: Dict[str, H5ReadResult] # def check(
) -> bool: # cls, src: H5SourceItem, provider: "SchemaProvider", completed: Dict[str, H5ReadResult]
if src.h5_type == "dataset": # ) -> bool:
return False # if src.h5_type == "dataset":
if "neurodata_type" in src.attrs: # return False
if src.attrs["neurodata_type"] == "DynamicTable": # if "neurodata_type" in src.attrs:
return True # if src.attrs["neurodata_type"] == "DynamicTable":
# otherwise, see if it's a subclass # return True
model = provider.get_class(src.attrs["namespace"], src.attrs["neurodata_type"]) # # otherwise, see if it's a subclass
# just inspect the MRO as strings rather than trying to check subclasses because # model = provider.get_class(src.attrs["namespace"], src.attrs["neurodata_type"])
# we might replace DynamicTable in the future, and there isn't a stable DynamicTable # # just inspect the MRO as strings rather than trying to check subclasses because
# class to inherit from anyway because of the whole multiple versions thing # # we might replace DynamicTable in the future, and there isn't a stable DynamicTable
parents = [parent.__name__ for parent in model.__mro__] # # class to inherit from anyway because of the whole multiple versions thing
return "DynamicTable" in parents # parents = [parent.__name__ for parent in model.__mro__]
else: # return "DynamicTable" in parents
return False # else:
# return False
@classmethod #
def apply( # @classmethod
cls, src: H5SourceItem, provider: "SchemaProvider", completed: Dict[str, H5ReadResult] # def apply(
) -> H5ReadResult: # cls, src: H5SourceItem, provider: "SchemaProvider", completed: Dict[str, H5ReadResult]
with h5py.File(src.h5f_path, "r") as h5f: # ) -> H5ReadResult:
obj = h5f.get(src.path) # with h5py.File(src.h5f_path, "r") as h5f:
# obj = h5f.get(src.path)
# make a populated model :) #
base_model = provider.get_class(src.namespace, src.neurodata_type) # # make a populated model :)
model = dynamictable_to_model(obj, base=base_model) # base_model = provider.get_class(src.namespace, src.neurodata_type)
# model = dynamictable_to_model(obj, base=base_model)
completes = [HDF5_Path(child.name) for child in obj.values()] #
# completes = [HDF5_Path(child.name) for child in obj.values()]
return H5ReadResult( #
path=src.path, # return H5ReadResult(
source=src, # path=src.path,
result=model, # source=src,
completes=completes, # result=model,
completed=True, # completes=completes,
applied=["ResolveDynamicTable"], # completed=True,
) # applied=["ResolveDynamicTable"],
# )
class ResolveModelGroup(HDF5Map): class ResolveModelGroup(HDF5Map):

View file

@ -1,12 +1,7 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime, date from ...hdmf_common.v1_8_0.hdmf_common_base import Data
from decimal import Decimal
from enum import Enum
import re
import sys
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series from pandas import DataFrame, Series
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, overload, Tuple from typing import Any, ClassVar, List, Dict, Optional, Union, overload, Tuple
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
ConfigDict, ConfigDict,
@ -282,7 +277,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and val.shape[0] > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -1,24 +0,0 @@
import time
import h5py
import pytest
from nwb_linkml.maps.hdmf import dynamictable_to_model, model_from_dynamictable
NWBFILE = "/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb"
@pytest.mark.xfail()
@pytest.mark.parametrize("dataset", ["aibs.nwb"])
def test_make_dynamictable(data_dir, dataset):
nwbfile = data_dir / dataset
h5f = h5py.File(nwbfile, "r")
group = h5f["units"]
start_time = time.time()
model = model_from_dynamictable(group)
data = dynamictable_to_model(group, model)
_ = data.model_dump_json()
end_time = time.time()
total_time = end_time - start_time