autogenerating models for dynamictable

This commit is contained in:
sneakers-the-rat 2023-09-23 01:33:28 -07:00
parent 57fa3d34a2
commit ce75dacf93
5 changed files with 95 additions and 4 deletions

View file

@ -0,0 +1,69 @@
"""
Mapping functions for handling HDMF classes like DynamicTables
"""
import pdb
from typing import List, Type, Optional
import ast
from nwb_linkml.types import DataFrame
import h5py
from pydantic import create_model
from nwb_linkml.maps import dtype
import numpy as np
def model_from_dynamictable(group:h5py.Group) -> Type[DataFrame]:
colnames = group.attrs['colnames']
types = {}
for col in colnames:
# read the first entry to see what we got
dset = group.get(col)
item = dset[0]
if isinstance(item, bytes):
item = item.decode('utf-8')
if isinstance(item, str):
# try to see if this is actually a list or smth encoded as a string
try:
item = ast.literal_eval(item)
except ValueError:
pass
type_ = type(item)
type_ = dtype.np_to_python.get(type_, type_)
if type_ is not np.void:
# FIXME: handling nested column types that appear only in some versions?
types[col] = (List[type_ | None], ...)
model = create_model(group.name.split('/')[-1], **types, __base__=DataFrame)
return model
def dynamictable_to_df(group:h5py.Group, model:Optional[Type[DataFrame]]=None) -> DataFrame:
if model is None:
model = model_from_dynamictable(group)
items = {}
for col in model.model_fields.keys():
data = group.get(col)[:]
if isinstance(data[0], bytes):
data = data.astype('unicode')
if isinstance(data[0], str):
try:
eval_type = type(ast.literal_eval(data[0]))
except ValueError:
eval_type = str
if eval_type is not str:
eval_list = []
for item in data.tolist():
try:
eval_list.append(ast.literal_eval(item))
except ValueError:
eval_list.append(None)
items[col] = eval_list
continue
items[col] = data.tolist()
pdb.set_trace()
return model(**items)

View file

@ -44,6 +44,9 @@ class DataFrame(BaseModel, pd.DataFrame):
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
so changes will be reflected then.
Fields that shadow pandas methods WILL prevent them from being usable, except
by directly accessing the dataframe like ``mymodel._df``
"""
_df: pd.DataFrame = None
@ -87,16 +90,20 @@ class DataFrame(BaseModel, pd.DataFrame):
return object.__getattribute__(self._df, item)
except AttributeError:
return object.__getattribute__(self, item)
@model_validator(mode='after')
def recreate_df(self):
"""Remake DF when validating (eg. when updating values on assignment)"""
"""
Remake DF when validating (eg. when updating values on assignment)
"""
self.update_df()
@model_serializer(mode='wrap', when_used='always')
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
"""
We don't handle values that are changed
We don't handle values that are changed on the dataframe by directly
updating the underlying model lists, but we implicitly handle them
by using the dataframe as the source when serializing
"""
if self._df is None:
return nxt(self)
@ -107,5 +114,4 @@ class DataFrame(BaseModel, pd.DataFrame):
k: [inner_v for inner_v in v if inner_v is not None]
for k, v in out.items()
}
return nxt(self.__class__(**out))

View file

View file

@ -0,0 +1,16 @@
import pytest
import h5py
from nwb_linkml.maps.hdmf import model_from_dynamictable, dynamictable_to_df
NWBFILE = '/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb'
@pytest.mark.skip()
def test_make_dynamictable():
h5f = h5py.File(NWBFILE, 'r')
group = h5f['intervals']['drifting_gratings_presentations']
model = model_from_dynamictable(group)
data = dynamictable_to_df(group, model)