continue removing nptyping, actually fix indexing

This commit is contained in:
sneakers-the-rat 2024-08-06 21:40:23 -07:00
parent 3ee7c68e15
commit a993ee10f2
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
24 changed files with 863 additions and 813 deletions

View file

@ -23,7 +23,7 @@ Cleanup
- [ ] Make a minimal pydanticgen-only package to slim linkml deps? - [ ] Make a minimal pydanticgen-only package to slim linkml deps?
- [ ] Disambiguate "maps" terminology - split out simple maps from the eg. dataset mapping classes - [ ] Disambiguate "maps" terminology - split out simple maps from the eg. dataset mapping classes
- [ ] Remove unnecessary imports - [x] Remove unnecessary imports
- dask - dask
- nptyping - nptyping
- [ ] Adapt the split generation to the new split generator style - [ ] Adapt the split generation to the new split generator style

File diff suppressed because it is too large Load diff

View file

@ -14,7 +14,6 @@ dependencies = [
"furo>=2023.8.19", "furo>=2023.8.19",
"myst-parser>=2.0.0", "myst-parser>=2.0.0",
"autodoc-pydantic>=2.0.1", "autodoc-pydantic>=2.0.1",
"nptyping>=2.5.0",
"sphinx-autobuild>=2021.3.14", "sphinx-autobuild>=2021.3.14",
"sphinx-design>=0.5.0", "sphinx-design>=0.5.0",
"sphinx-togglebutton>=0.3.2", "sphinx-togglebutton>=0.3.2",

View file

@ -266,10 +266,7 @@ class NamespacesAdapter(Adapter):
else: else:
ns = ns[0] ns = ns[0]
schema_names = [] schema_names = [sch.source for sch in ns.schema_ if sch.source is not None]
for sch in ns.schema_:
if sch.source is not None:
schema_names.append(sch.source)
return schema_names return schema_names
def schema_namespace(self, name: str) -> Optional[str]: def schema_namespace(self, name: str) -> Optional[str]:

View file

@ -133,7 +133,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])
@ -382,6 +382,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
DYNAMIC_TABLE_IMPORTS = Imports( DYNAMIC_TABLE_IMPORTS = Imports(
imports=[ imports=[
Import( Import(
@ -417,7 +431,9 @@ Imports required for the dynamic table mixin
VectorData is purposefully excluded as an import or an inject so that it will be VectorData is purposefully excluded as an import or an inject so that it will be
resolved to the VectorData definition in the generated module resolved to the VectorData definition in the generated module
""" """
DYNAMIC_TABLE_INJECTS = [VectorDataMixin, VectorIndexMixin, DynamicTableMixin] DYNAMIC_TABLE_INJECTS = [
VectorDataMixin,
# class VectorDataMixin(BaseModel): VectorIndexMixin,
# index: Optional[BaseModel] = None DynamicTableRegionMixin,
DynamicTableMixin,
]

View file

@ -242,10 +242,7 @@ def find_references(h5f: h5py.File, path: str) -> List[str]:
def _find_references(name: str, obj: h5py.Group | h5py.Dataset) -> None: def _find_references(name: str, obj: h5py.Group | h5py.Dataset) -> None:
pbar.update() pbar.update()
refs = [] refs = [attr for attr in obj.attrs.values() if isinstance(attr, h5py.h5r.Reference)]
for attr in obj.attrs.values():
if isinstance(attr, h5py.h5r.Reference):
refs.append(attr)
if isinstance(obj, h5py.Dataset): if isinstance(obj, h5py.Dataset):
# dataset is all references # dataset is all references

View file

@ -100,10 +100,9 @@ np_to_python = {
np.float64, np.float64,
np.single, np.single,
np.double, np.double,
np.float_,
) )
}, },
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)}, **{n: str for n in (np.character, np.str_)},
} }
allowed_precisions = { allowed_precisions = {

View file

@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -1,7 +1,12 @@
from __future__ import annotations from __future__ import annotations
from ...hdmf_common.v1_8_0.hdmf_common_base import Data from datetime import datetime, date
from decimal import Decimal
from enum import Enum
import re
import sys
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series from pandas import DataFrame, Series
from typing import Any, ClassVar, List, Dict, Optional, Union, overload, Tuple from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, overload, Tuple
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
ConfigDict, ConfigDict,
@ -167,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value) return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel): class DynamicTableMixin(BaseModel):
""" """
Mixin to make DynamicTable subclasses behave like tables/dataframes Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -277,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model # special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict # into {n_fields} rows, rather than keeping it in a dict
val = Series([val]) val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and val.shape[0] > 1: elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array, # special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long # same as above - prevent pandas pivoting to long
val = Series([val]) val = Series([val])

View file

@ -14,180 +14,163 @@ Pydantic models that behave like pandas dataframes
left in this module since it is necessary for it to make sense. left in this module since it is necessary for it to make sense.
""" """
import ast #
from typing import Any, Dict, Optional, Type # class DataFrame(BaseModel, pd.DataFrame):
# """
# Pydantic model root class that mimics a pandas dataframe.
#
# Notes:
#
# The synchronization between the underlying lists in the pydantic model
# and the derived dataframe is partial, and at the moment unidirectional.
# This class is primarily intended for reading from tables stored in
# NWB files rather than being able to manipulate them.
#
# The dataframe IS updated when new values are *assigned* to a field.
#
# eg.::
#
# MyModel.fieldval = [1,2,3]
#
# But the dataframe is NOT updated when existing values are updated.
#
# eg.::
#
# MyModel.fieldval.append(4)
#
# In that case you need to call :meth:`.update_df` manually.
#
# Additionally, if the dataframe is modified, the underlying lists are NOT updated,
# but when the model is dumped to a dictionary or serialized, the dataframe IS used,
# so changes will be reflected then.
#
# Fields that shadow pandas methods WILL prevent them from being usable, except
# by directly accessing the dataframe like ``mymodel._df``
#
# """
#
# _df: pd.DataFrame = None
# model_config = ConfigDict(validate_assignment=True)
#
# def __init__(self, **kwargs):
# # pdb.set_trace()
# super().__init__(**kwargs)
#
# self._df = self.__make_df()
#
# def __make_df(self) -> pd.DataFrame:
# # make dict that can handle ragged arrays and NoneTypes
# items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
#
# df_dict = {
# k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
# }
# df = pd.DataFrame(df_dict)
# # replace Nans with None
# df = df.fillna(np.nan).replace([np.nan], [None])
# return df
#
# def update_df(self) -> None:
# """
# Update the internal dataframe in the case that the model values are changed
# in a way that we can't detect, like appending to one of the lists.
#
# """
# self._df = self.__make_df()
#
# def __getattr__(self, item: str):
# """
# Mimic pandas dataframe and pydantic model behavior
# """
# if item in ("df", "_df"):
# return self.__pydantic_private__["_df"]
# elif item in self.model_fields:
# return self._df[item]
# else:
# try:
# return object.__getattribute__(self._df, item)
# except AttributeError:
# return object.__getattribute__(self, item)
#
# @model_validator(mode="after")
# def recreate_df(self) -> None:
# """
# Remake DF when validating (eg. when updating values on assignment)
# """
# self.update_df()
#
# @model_serializer(mode="wrap", when_used="always")
# def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
# """
# We don't handle values that are changed on the dataframe by directly
# updating the underlying model lists, but we implicitly handle them
# by using the dataframe as the source when serializing
# """
# if self._df is None:
# return nxt(self)
# else:
# out = self._df.to_dict("list")
# # remove Nones
# out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
# return nxt(self.__class__(**out))
import h5py #
import numpy as np # def dynamictable_to_df(
import pandas as pd # group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
from pydantic import ( # ) -> DataFrame:
BaseModel, # """Generate a dataframe from an NDB DynamicTable"""
ConfigDict, # if model is None:
SerializerFunctionWrapHandler, # model = model_from_dynamictable(group, base)
model_serializer, #
model_validator, # items = {}
) # for col, _col_type in model.model_fields.items():
# if col not in group:
from nwb_linkml.maps.hdmf import dereference_reference_vector, model_from_dynamictable # continue
from nwb_linkml.types.hdf5 import HDF5_Path # idxname = col + "_index"
# if idxname in group:
# idx = group.get(idxname)[:]
class DataFrame(BaseModel, pd.DataFrame): # data = group.get(col)[idx - 1]
""" # else:
Pydantic model root class that mimics a pandas dataframe. # data = group.get(col)[:]
#
Notes: # # Handle typing inside of list
# if isinstance(data[0], bytes):
The synchronization between the underlying lists in the pydantic model # data = data.astype("unicode")
and the derived dataframe is partial, and at the moment unidirectional. # if isinstance(data[0], str):
This class is primarily intended for reading from tables stored in # # lists and other compound data types can get flattened out to strings when stored
NWB files rather than being able to manipulate them. # # so we try and literal eval and recover them
# try:
The dataframe IS updated when new values are *assigned* to a field. # eval_type = type(ast.literal_eval(data[0]))
# except (ValueError, SyntaxError):
eg.:: # eval_type = str
#
MyModel.fieldval = [1,2,3] # # if we've found one of those, get the data type within it.
# if eval_type is not str:
But the dataframe is NOT updated when existing values are updated. # eval_list = []
# for item in data.tolist():
eg.:: # try:
# eval_list.append(ast.literal_eval(item))
MyModel.fieldval.append(4) # except ValueError:
# eval_list.append(None)
In that case you need to call :meth:`.update_df` manually. # data = eval_list
# elif isinstance(data[0], h5py.h5r.Reference):
Additionally, if the dataframe is modified, the underlying lists are NOT updated, # data = [HDF5_Path(group[d].name) for d in data]
but when the model is dumped to a dictionary or serialized, the dataframe IS used, # elif isinstance(data[0], tuple) and any(
so changes will be reflected then. # [isinstance(d, h5py.h5r.Reference) for d in data[0]]
# ):
Fields that shadow pandas methods WILL prevent them from being usable, except # # references stored inside a tuple, reference + location.
by directly accessing the dataframe like ``mymodel._df`` # # dereference them!?
# dset = group.get(col)
""" # names = dset.dtype.names
# if names is not None and names[0] == "idx_start" and names[1] == "count":
_df: pd.DataFrame = None # data = dereference_reference_vector(dset, data)
model_config = ConfigDict(validate_assignment=True) #
# else:
def __init__(self, **kwargs): # data = data.tolist()
# pdb.set_trace() #
super().__init__(**kwargs) # # After list, check if we need to put this thing inside of
# # another class, as indicated by the enclosing model
self._df = self.__make_df() #
# items[col] = data
def __make_df(self) -> pd.DataFrame: #
# make dict that can handle ragged arrays and NoneTypes # return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)
items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
df_dict = {
k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
}
df = pd.DataFrame(df_dict)
# replace Nans with None
df = df.fillna(np.nan).replace([np.nan], [None])
return df
def update_df(self) -> None:
"""
Update the internal dataframe in the case that the model values are changed
in a way that we can't detect, like appending to one of the lists.
"""
self._df = self.__make_df()
def __getattr__(self, item: str):
"""
Mimic pandas dataframe and pydantic model behavior
"""
if item in ("df", "_df"):
return self.__pydantic_private__["_df"]
elif item in self.model_fields:
return self._df[item]
else:
try:
return object.__getattribute__(self._df, item)
except AttributeError:
return object.__getattribute__(self, item)
@model_validator(mode="after")
def recreate_df(self) -> None:
"""
Remake DF when validating (eg. when updating values on assignment)
"""
self.update_df()
@model_serializer(mode="wrap", when_used="always")
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
"""
We don't handle values that are changed on the dataframe by directly
updating the underlying model lists, but we implicitly handle them
by using the dataframe as the source when serializing
"""
if self._df is None:
return nxt(self)
else:
out = self._df.to_dict("list")
# remove Nones
out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
return nxt(self.__class__(**out))
def dynamictable_to_df(
group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
) -> DataFrame:
"""Generate a dataframe from an NDB DynamicTable"""
if model is None:
model = model_from_dynamictable(group, base)
items = {}
for col, _col_type in model.model_fields.items():
if col not in group:
continue
idxname = col + "_index"
if idxname in group:
idx = group.get(idxname)[:]
data = group.get(col)[idx - 1]
else:
data = group.get(col)[:]
# Handle typing inside of list
if isinstance(data[0], bytes):
data = data.astype("unicode")
if isinstance(data[0], str):
# lists and other compound data types can get flattened out to strings when stored
# so we try and literal eval and recover them
try:
eval_type = type(ast.literal_eval(data[0]))
except (ValueError, SyntaxError):
eval_type = str
# if we've found one of those, get the data type within it.
if eval_type is not str:
eval_list = []
for item in data.tolist():
try:
eval_list.append(ast.literal_eval(item))
except ValueError:
eval_list.append(None)
data = eval_list
elif isinstance(data[0], h5py.h5r.Reference):
data = [HDF5_Path(group[d].name) for d in data]
elif isinstance(data[0], tuple) and any(
[isinstance(d, h5py.h5r.Reference) for d in data[0]]
):
# references stored inside a tuple, reference + location.
# dereference them!?
dset = group.get(col)
names = dset.dtype.names
if names is not None and names[0] == "idx_start" and names[1] == "count":
data = dereference_reference_vector(dset, data)
else:
data = data.tolist()
# After list, check if we need to put this thing inside of
# another class, as indicated by the enclosing model
items[col] = data
return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)

View file

@ -67,8 +67,9 @@ def units(request) -> Tuple[Units, list[np.ndarray], np.ndarray]:
""" """
n_units = 24 n_units = 24
generator = np.random.default_rng()
spike_times = [ spike_times = [
np.full(shape=np.random.randint(10, 50), fill_value=i, dtype=float) for i in range(n_units) np.full(shape=generator.integers(10, 50), fill_value=i, dtype=float) for i in range(n_units)
] ]
spike_idx = [] spike_idx = []
for i in range(n_units): for i in range(n_units):
@ -141,6 +142,19 @@ def test_dynamictable_indexing(electrical_series):
assert subsection.dtypes.values.tolist() == dtypes[0:3] assert subsection.dtypes.values.tolist() == dtypes[0:3]
def test_dynamictable_region(electrical_series):
"""
Dynamictableregion should
Args:
electrical_series:
Returns:
"""
series, electrodes = electrical_series
def test_dynamictable_ragged_arrays(units): def test_dynamictable_ragged_arrays(units):
""" """
Should be able to index ragged arrays using an implicit _index column Should be able to index ragged arrays using an implicit _index column

View file

@ -4,8 +4,9 @@ from pathlib import Path
from typing import Optional from typing import Optional
import pytest import pytest
from nptyping import Shape, UByte from numpydantic import NDArray, Shape
from numpydantic import NDArray import numpy as np
import nwb_linkml import nwb_linkml
from nwb_linkml.maps.naming import version_module_case from nwb_linkml.maps.naming import version_module_case
@ -77,7 +78,7 @@ def test_linkml_build_from_yaml(tmp_output_dir):
"comments": Optional[str], "comments": Optional[str],
"data": "TimeSeriesData", "data": "TimeSeriesData",
"timestamps": "Optional", # __name__ just gets the first part of Optional[TimeSeriesTimestamps] "timestamps": "Optional", # __name__ just gets the first part of Optional[TimeSeriesTimestamps]
"control": Optional[NDArray[Shape["* num_times"], UByte]], "control": Optional[NDArray[Shape["* num_times"], np.uint8]],
}, },
) )
], ],

View file

@ -58,6 +58,10 @@ select = [
"D210", "D211", "D210", "D211",
# emptiness # emptiness
"D419", "D419",
# perf
"PERF",
# numpy
"NPY",
] ]
ignore = [ ignore = [
# annotations for *args and **kwargs # annotations for *args and **kwargs

View file

@ -3,6 +3,7 @@ import os
import sys import sys
import traceback import traceback
from pdb import post_mortem from pdb import post_mortem
import subprocess
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
@ -179,6 +180,8 @@ def generate_versions(
with open(pydantic_path / "__init__.py", "w") as initfile: with open(pydantic_path / "__init__.py", "w") as initfile:
initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *") initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")
subprocess.run(["black", "."])
finally: finally:
if len(failed_versions) > 0: if len(failed_versions) > 0:
print("Failed Building Versions:") print("Failed Building Versions:")