From ce75dacf93261fa2c4c15eff675a67985316fd7b Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Sat, 23 Sep 2023 01:33:28 -0700 Subject: [PATCH] autogenerating models for dynamictable --- nwb_linkml/src/nwb_linkml/maps/hdmf.py | 69 +++++++++++++++++++ nwb_linkml/src/nwb_linkml/types/df.py | 14 ++-- nwb_linkml/tests/test_maps/__init__.py | 0 nwb_linkml/tests/test_maps/test_hdmf.py | 16 +++++ nwb_linkml/tests/{ => test_maps}/test_maps.py | 0 5 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 nwb_linkml/src/nwb_linkml/maps/hdmf.py create mode 100644 nwb_linkml/tests/test_maps/__init__.py create mode 100644 nwb_linkml/tests/test_maps/test_hdmf.py rename nwb_linkml/tests/{ => test_maps}/test_maps.py (100%) diff --git a/nwb_linkml/src/nwb_linkml/maps/hdmf.py b/nwb_linkml/src/nwb_linkml/maps/hdmf.py new file mode 100644 index 0000000..fb93b49 --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/maps/hdmf.py @@ -0,0 +1,69 @@ +""" +Mapping functions for handling HDMF classes like DynamicTables +""" +import pdb +from typing import List, Type, Optional +import ast +from nwb_linkml.types import DataFrame +import h5py +from pydantic import create_model +from nwb_linkml.maps import dtype +import numpy as np + +def model_from_dynamictable(group:h5py.Group) -> Type[DataFrame]: + colnames = group.attrs['colnames'] + types = {} + for col in colnames: + # read the first entry to see what we got + dset = group.get(col) + item = dset[0] + if isinstance(item, bytes): + item = item.decode('utf-8') + if isinstance(item, str): + # try to see if this is actually a list or smth encoded as a string + try: + item = ast.literal_eval(item) + except ValueError: + pass + + type_ = type(item) + type_ = dtype.np_to_python.get(type_, type_) + if type_ is not np.void: + # FIXME: handling nested column types that appear only in some versions? + types[col] = (List[type_ | None], ...) + + model = create_model(group.name.split('/')[-1], **types, __base__=DataFrame) + return model + + +def dynamictable_to_df(group:h5py.Group, model:Optional[Type[DataFrame]]=None) -> DataFrame: + if model is None: + model = model_from_dynamictable(group) + + items = {} + for col in model.model_fields.keys(): + data = group.get(col)[:] + if isinstance(data[0], bytes): + data = data.astype('unicode') + if isinstance(data[0], str): + try: + eval_type = type(ast.literal_eval(data[0])) + except ValueError: + eval_type = str + + if eval_type is not str: + eval_list = [] + for item in data.tolist(): + try: + eval_list.append(ast.literal_eval(item)) + except ValueError: + eval_list.append(None) + items[col] = eval_list + continue + + items[col] = data.tolist() + + pdb.set_trace() + return model(**items) + + diff --git a/nwb_linkml/src/nwb_linkml/types/df.py b/nwb_linkml/src/nwb_linkml/types/df.py index 8a5f47a..54d4869 100644 --- a/nwb_linkml/src/nwb_linkml/types/df.py +++ b/nwb_linkml/src/nwb_linkml/types/df.py @@ -44,6 +44,9 @@ class DataFrame(BaseModel, pd.DataFrame): but when the model is dumped to a dictionary or serialized, the dataframe IS used, so changes will be reflected then. + Fields that shadow pandas methods WILL prevent them from being usable, except + by directly accessing the dataframe like ``mymodel._df`` + """ _df: pd.DataFrame = None @@ -87,16 +90,20 @@ class DataFrame(BaseModel, pd.DataFrame): return object.__getattribute__(self._df, item) except AttributeError: return object.__getattribute__(self, item) + @model_validator(mode='after') def recreate_df(self): - """Remake DF when validating (eg. when updating values on assignment)""" + """ + Remake DF when validating (eg. when updating values on assignment) + """ self.update_df() @model_serializer(mode='wrap', when_used='always') def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]: """ - We don't handle values that are changed - + We don't handle values that are changed on the dataframe by directly + updating the underlying model lists, but we implicitly handle them + by using the dataframe as the source when serializing """ if self._df is None: return nxt(self) @@ -107,5 +114,4 @@ class DataFrame(BaseModel, pd.DataFrame): k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items() } - return nxt(self.__class__(**out)) diff --git a/nwb_linkml/tests/test_maps/__init__.py b/nwb_linkml/tests/test_maps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nwb_linkml/tests/test_maps/test_hdmf.py b/nwb_linkml/tests/test_maps/test_hdmf.py new file mode 100644 index 0000000..d81ec7a --- /dev/null +++ b/nwb_linkml/tests/test_maps/test_hdmf.py @@ -0,0 +1,16 @@ +import pytest +import h5py + +from nwb_linkml.maps.hdmf import model_from_dynamictable, dynamictable_to_df + +NWBFILE = '/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb' + +@pytest.mark.skip() +def test_make_dynamictable(): + h5f = h5py.File(NWBFILE, 'r') + group = h5f['intervals']['drifting_gratings_presentations'] + + model = model_from_dynamictable(group) + data = dynamictable_to_df(group, model) + + diff --git a/nwb_linkml/tests/test_maps.py b/nwb_linkml/tests/test_maps/test_maps.py similarity index 100% rename from nwb_linkml/tests/test_maps.py rename to nwb_linkml/tests/test_maps/test_maps.py