mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2024-11-12 17:54:29 +00:00
autogenerating models for dynamictable
This commit is contained in:
parent
57fa3d34a2
commit
ce75dacf93
5 changed files with 95 additions and 4 deletions
69
nwb_linkml/src/nwb_linkml/maps/hdmf.py
Normal file
69
nwb_linkml/src/nwb_linkml/maps/hdmf.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
"""
|
||||||
|
Mapping functions for handling HDMF classes like DynamicTables
|
||||||
|
"""
|
||||||
|
import pdb
|
||||||
|
from typing import List, Type, Optional
|
||||||
|
import ast
|
||||||
|
from nwb_linkml.types import DataFrame
|
||||||
|
import h5py
|
||||||
|
from pydantic import create_model
|
||||||
|
from nwb_linkml.maps import dtype
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def model_from_dynamictable(group:h5py.Group) -> Type[DataFrame]:
|
||||||
|
colnames = group.attrs['colnames']
|
||||||
|
types = {}
|
||||||
|
for col in colnames:
|
||||||
|
# read the first entry to see what we got
|
||||||
|
dset = group.get(col)
|
||||||
|
item = dset[0]
|
||||||
|
if isinstance(item, bytes):
|
||||||
|
item = item.decode('utf-8')
|
||||||
|
if isinstance(item, str):
|
||||||
|
# try to see if this is actually a list or smth encoded as a string
|
||||||
|
try:
|
||||||
|
item = ast.literal_eval(item)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
type_ = type(item)
|
||||||
|
type_ = dtype.np_to_python.get(type_, type_)
|
||||||
|
if type_ is not np.void:
|
||||||
|
# FIXME: handling nested column types that appear only in some versions?
|
||||||
|
types[col] = (List[type_ | None], ...)
|
||||||
|
|
||||||
|
model = create_model(group.name.split('/')[-1], **types, __base__=DataFrame)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def dynamictable_to_df(group:h5py.Group, model:Optional[Type[DataFrame]]=None) -> DataFrame:
|
||||||
|
if model is None:
|
||||||
|
model = model_from_dynamictable(group)
|
||||||
|
|
||||||
|
items = {}
|
||||||
|
for col in model.model_fields.keys():
|
||||||
|
data = group.get(col)[:]
|
||||||
|
if isinstance(data[0], bytes):
|
||||||
|
data = data.astype('unicode')
|
||||||
|
if isinstance(data[0], str):
|
||||||
|
try:
|
||||||
|
eval_type = type(ast.literal_eval(data[0]))
|
||||||
|
except ValueError:
|
||||||
|
eval_type = str
|
||||||
|
|
||||||
|
if eval_type is not str:
|
||||||
|
eval_list = []
|
||||||
|
for item in data.tolist():
|
||||||
|
try:
|
||||||
|
eval_list.append(ast.literal_eval(item))
|
||||||
|
except ValueError:
|
||||||
|
eval_list.append(None)
|
||||||
|
items[col] = eval_list
|
||||||
|
continue
|
||||||
|
|
||||||
|
items[col] = data.tolist()
|
||||||
|
|
||||||
|
pdb.set_trace()
|
||||||
|
return model(**items)
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,9 @@ class DataFrame(BaseModel, pd.DataFrame):
|
||||||
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
|
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
|
||||||
so changes will be reflected then.
|
so changes will be reflected then.
|
||||||
|
|
||||||
|
Fields that shadow pandas methods WILL prevent them from being usable, except
|
||||||
|
by directly accessing the dataframe like ``mymodel._df``
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_df: pd.DataFrame = None
|
_df: pd.DataFrame = None
|
||||||
|
@ -87,16 +90,20 @@ class DataFrame(BaseModel, pd.DataFrame):
|
||||||
return object.__getattribute__(self._df, item)
|
return object.__getattribute__(self._df, item)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return object.__getattribute__(self, item)
|
return object.__getattribute__(self, item)
|
||||||
|
|
||||||
@model_validator(mode='after')
|
@model_validator(mode='after')
|
||||||
def recreate_df(self):
|
def recreate_df(self):
|
||||||
"""Remake DF when validating (eg. when updating values on assignment)"""
|
"""
|
||||||
|
Remake DF when validating (eg. when updating values on assignment)
|
||||||
|
"""
|
||||||
self.update_df()
|
self.update_df()
|
||||||
|
|
||||||
@model_serializer(mode='wrap', when_used='always')
|
@model_serializer(mode='wrap', when_used='always')
|
||||||
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
|
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
We don't handle values that are changed
|
We don't handle values that are changed on the dataframe by directly
|
||||||
|
updating the underlying model lists, but we implicitly handle them
|
||||||
|
by using the dataframe as the source when serializing
|
||||||
"""
|
"""
|
||||||
if self._df is None:
|
if self._df is None:
|
||||||
return nxt(self)
|
return nxt(self)
|
||||||
|
@ -107,5 +114,4 @@ class DataFrame(BaseModel, pd.DataFrame):
|
||||||
k: [inner_v for inner_v in v if inner_v is not None]
|
k: [inner_v for inner_v in v if inner_v is not None]
|
||||||
for k, v in out.items()
|
for k, v in out.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
return nxt(self.__class__(**out))
|
return nxt(self.__class__(**out))
|
||||||
|
|
0
nwb_linkml/tests/test_maps/__init__.py
Normal file
0
nwb_linkml/tests/test_maps/__init__.py
Normal file
16
nwb_linkml/tests/test_maps/test_hdmf.py
Normal file
16
nwb_linkml/tests/test_maps/test_hdmf.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
import pytest
|
||||||
|
import h5py
|
||||||
|
|
||||||
|
from nwb_linkml.maps.hdmf import model_from_dynamictable, dynamictable_to_df
|
||||||
|
|
||||||
|
NWBFILE = '/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb'
|
||||||
|
|
||||||
|
@pytest.mark.skip()
|
||||||
|
def test_make_dynamictable():
|
||||||
|
h5f = h5py.File(NWBFILE, 'r')
|
||||||
|
group = h5f['intervals']['drifting_gratings_presentations']
|
||||||
|
|
||||||
|
model = model_from_dynamictable(group)
|
||||||
|
data = dynamictable_to_df(group, model)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue