mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-09 13:44:27 +00:00
continue removing nptyping, actually fix indexing
This commit is contained in:
parent
3ee7c68e15
commit
a993ee10f2
24 changed files with 863 additions and 813 deletions
|
@ -23,7 +23,7 @@ Cleanup
|
|||
|
||||
- [ ] Make a minimal pydanticgen-only package to slim linkml deps?
|
||||
- [ ] Disambiguate "maps" terminology - split out simple maps from the eg. dataset mapping classes
|
||||
- [ ] Remove unnecessary imports
|
||||
- [x] Remove unnecessary imports
|
||||
- dask
|
||||
- nptyping
|
||||
- [ ] Adapt the split generation to the new split generator style
|
||||
|
|
1068
docs/pdm.lock
1068
docs/pdm.lock
File diff suppressed because it is too large
Load diff
|
@ -14,7 +14,6 @@ dependencies = [
|
|||
"furo>=2023.8.19",
|
||||
"myst-parser>=2.0.0",
|
||||
"autodoc-pydantic>=2.0.1",
|
||||
"nptyping>=2.5.0",
|
||||
"sphinx-autobuild>=2021.3.14",
|
||||
"sphinx-design>=0.5.0",
|
||||
"sphinx-togglebutton>=0.3.2",
|
||||
|
|
|
@ -266,10 +266,7 @@ class NamespacesAdapter(Adapter):
|
|||
else:
|
||||
ns = ns[0]
|
||||
|
||||
schema_names = []
|
||||
for sch in ns.schema_:
|
||||
if sch.source is not None:
|
||||
schema_names.append(sch.source)
|
||||
schema_names = [sch.source for sch in ns.schema_ if sch.source is not None]
|
||||
return schema_names
|
||||
|
||||
def schema_namespace(self, name: str) -> Optional[str]:
|
||||
|
|
|
@ -133,7 +133,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
@ -382,6 +382,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
DYNAMIC_TABLE_IMPORTS = Imports(
|
||||
imports=[
|
||||
Import(
|
||||
|
@ -417,7 +431,9 @@ Imports required for the dynamic table mixin
|
|||
VectorData is purposefully excluded as an import or an inject so that it will be
|
||||
resolved to the VectorData definition in the generated module
|
||||
"""
|
||||
DYNAMIC_TABLE_INJECTS = [VectorDataMixin, VectorIndexMixin, DynamicTableMixin]
|
||||
|
||||
# class VectorDataMixin(BaseModel):
|
||||
# index: Optional[BaseModel] = None
|
||||
DYNAMIC_TABLE_INJECTS = [
|
||||
VectorDataMixin,
|
||||
VectorIndexMixin,
|
||||
DynamicTableRegionMixin,
|
||||
DynamicTableMixin,
|
||||
]
|
||||
|
|
|
@ -242,10 +242,7 @@ def find_references(h5f: h5py.File, path: str) -> List[str]:
|
|||
|
||||
def _find_references(name: str, obj: h5py.Group | h5py.Dataset) -> None:
|
||||
pbar.update()
|
||||
refs = []
|
||||
for attr in obj.attrs.values():
|
||||
if isinstance(attr, h5py.h5r.Reference):
|
||||
refs.append(attr)
|
||||
refs = [attr for attr in obj.attrs.values() if isinstance(attr, h5py.h5r.Reference)]
|
||||
|
||||
if isinstance(obj, h5py.Dataset):
|
||||
# dataset is all references
|
||||
|
|
|
@ -100,10 +100,9 @@ np_to_python = {
|
|||
np.float64,
|
||||
np.single,
|
||||
np.double,
|
||||
np.float_,
|
||||
)
|
||||
},
|
||||
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
|
||||
**{n: str for n in (np.character, np.str_)},
|
||||
}
|
||||
|
||||
allowed_precisions = {
|
||||
|
|
|
@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
from __future__ import annotations
|
||||
from ...hdmf_common.v1_8_0.hdmf_common_base import Data
|
||||
from datetime import datetime, date
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
import re
|
||||
import sys
|
||||
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
|
||||
from pandas import DataFrame, Series
|
||||
from typing import Any, ClassVar, List, Dict, Optional, Union, overload, Tuple
|
||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, overload, Tuple
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
ConfigDict,
|
||||
|
@ -167,6 +172,20 @@ class VectorIndexMixin(BaseModel):
|
|||
return len(self.value)
|
||||
|
||||
|
||||
class DynamicTableRegionMixin(BaseModel):
|
||||
"""
|
||||
Mixin to allow indexing references to regions of dynamictables
|
||||
"""
|
||||
|
||||
table: "DynamicTableMixin"
|
||||
|
||||
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
|
||||
return self.table[item]
|
||||
|
||||
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
|
||||
self.table[key] = value
|
||||
|
||||
|
||||
class DynamicTableMixin(BaseModel):
|
||||
"""
|
||||
Mixin to make DynamicTable subclasses behave like tables/dataframes
|
||||
|
@ -277,7 +296,7 @@ class DynamicTableMixin(BaseModel):
|
|||
# special case where pandas will unpack a pydantic model
|
||||
# into {n_fields} rows, rather than keeping it in a dict
|
||||
val = Series([val])
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and val.shape[0] > 1:
|
||||
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
|
||||
# special case where we are returning a row in a ragged array,
|
||||
# same as above - prevent pandas pivoting to long
|
||||
val = Series([val])
|
||||
|
|
|
@ -14,180 +14,163 @@ Pydantic models that behave like pandas dataframes
|
|||
left in this module since it is necessary for it to make sense.
|
||||
"""
|
||||
|
||||
import ast
|
||||
from typing import Any, Dict, Optional, Type
|
||||
#
|
||||
# class DataFrame(BaseModel, pd.DataFrame):
|
||||
# """
|
||||
# Pydantic model root class that mimics a pandas dataframe.
|
||||
#
|
||||
# Notes:
|
||||
#
|
||||
# The synchronization between the underlying lists in the pydantic model
|
||||
# and the derived dataframe is partial, and at the moment unidirectional.
|
||||
# This class is primarily intended for reading from tables stored in
|
||||
# NWB files rather than being able to manipulate them.
|
||||
#
|
||||
# The dataframe IS updated when new values are *assigned* to a field.
|
||||
#
|
||||
# eg.::
|
||||
#
|
||||
# MyModel.fieldval = [1,2,3]
|
||||
#
|
||||
# But the dataframe is NOT updated when existing values are updated.
|
||||
#
|
||||
# eg.::
|
||||
#
|
||||
# MyModel.fieldval.append(4)
|
||||
#
|
||||
# In that case you need to call :meth:`.update_df` manually.
|
||||
#
|
||||
# Additionally, if the dataframe is modified, the underlying lists are NOT updated,
|
||||
# but when the model is dumped to a dictionary or serialized, the dataframe IS used,
|
||||
# so changes will be reflected then.
|
||||
#
|
||||
# Fields that shadow pandas methods WILL prevent them from being usable, except
|
||||
# by directly accessing the dataframe like ``mymodel._df``
|
||||
#
|
||||
# """
|
||||
#
|
||||
# _df: pd.DataFrame = None
|
||||
# model_config = ConfigDict(validate_assignment=True)
|
||||
#
|
||||
# def __init__(self, **kwargs):
|
||||
# # pdb.set_trace()
|
||||
# super().__init__(**kwargs)
|
||||
#
|
||||
# self._df = self.__make_df()
|
||||
#
|
||||
# def __make_df(self) -> pd.DataFrame:
|
||||
# # make dict that can handle ragged arrays and NoneTypes
|
||||
# items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
|
||||
#
|
||||
# df_dict = {
|
||||
# k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
|
||||
# }
|
||||
# df = pd.DataFrame(df_dict)
|
||||
# # replace Nans with None
|
||||
# df = df.fillna(np.nan).replace([np.nan], [None])
|
||||
# return df
|
||||
#
|
||||
# def update_df(self) -> None:
|
||||
# """
|
||||
# Update the internal dataframe in the case that the model values are changed
|
||||
# in a way that we can't detect, like appending to one of the lists.
|
||||
#
|
||||
# """
|
||||
# self._df = self.__make_df()
|
||||
#
|
||||
# def __getattr__(self, item: str):
|
||||
# """
|
||||
# Mimic pandas dataframe and pydantic model behavior
|
||||
# """
|
||||
# if item in ("df", "_df"):
|
||||
# return self.__pydantic_private__["_df"]
|
||||
# elif item in self.model_fields:
|
||||
# return self._df[item]
|
||||
# else:
|
||||
# try:
|
||||
# return object.__getattribute__(self._df, item)
|
||||
# except AttributeError:
|
||||
# return object.__getattribute__(self, item)
|
||||
#
|
||||
# @model_validator(mode="after")
|
||||
# def recreate_df(self) -> None:
|
||||
# """
|
||||
# Remake DF when validating (eg. when updating values on assignment)
|
||||
# """
|
||||
# self.update_df()
|
||||
#
|
||||
# @model_serializer(mode="wrap", when_used="always")
|
||||
# def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
|
||||
# """
|
||||
# We don't handle values that are changed on the dataframe by directly
|
||||
# updating the underlying model lists, but we implicitly handle them
|
||||
# by using the dataframe as the source when serializing
|
||||
# """
|
||||
# if self._df is None:
|
||||
# return nxt(self)
|
||||
# else:
|
||||
# out = self._df.to_dict("list")
|
||||
# # remove Nones
|
||||
# out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
|
||||
# return nxt(self.__class__(**out))
|
||||
|
||||
import h5py
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
ConfigDict,
|
||||
SerializerFunctionWrapHandler,
|
||||
model_serializer,
|
||||
model_validator,
|
||||
)
|
||||
|
||||
from nwb_linkml.maps.hdmf import dereference_reference_vector, model_from_dynamictable
|
||||
from nwb_linkml.types.hdf5 import HDF5_Path
|
||||
|
||||
|
||||
class DataFrame(BaseModel, pd.DataFrame):
|
||||
"""
|
||||
Pydantic model root class that mimics a pandas dataframe.
|
||||
|
||||
Notes:
|
||||
|
||||
The synchronization between the underlying lists in the pydantic model
|
||||
and the derived dataframe is partial, and at the moment unidirectional.
|
||||
This class is primarily intended for reading from tables stored in
|
||||
NWB files rather than being able to manipulate them.
|
||||
|
||||
The dataframe IS updated when new values are *assigned* to a field.
|
||||
|
||||
eg.::
|
||||
|
||||
MyModel.fieldval = [1,2,3]
|
||||
|
||||
But the dataframe is NOT updated when existing values are updated.
|
||||
|
||||
eg.::
|
||||
|
||||
MyModel.fieldval.append(4)
|
||||
|
||||
In that case you need to call :meth:`.update_df` manually.
|
||||
|
||||
Additionally, if the dataframe is modified, the underlying lists are NOT updated,
|
||||
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
|
||||
so changes will be reflected then.
|
||||
|
||||
Fields that shadow pandas methods WILL prevent them from being usable, except
|
||||
by directly accessing the dataframe like ``mymodel._df``
|
||||
|
||||
"""
|
||||
|
||||
_df: pd.DataFrame = None
|
||||
model_config = ConfigDict(validate_assignment=True)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# pdb.set_trace()
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self._df = self.__make_df()
|
||||
|
||||
def __make_df(self) -> pd.DataFrame:
|
||||
# make dict that can handle ragged arrays and NoneTypes
|
||||
items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
|
||||
|
||||
df_dict = {
|
||||
k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
|
||||
}
|
||||
df = pd.DataFrame(df_dict)
|
||||
# replace Nans with None
|
||||
df = df.fillna(np.nan).replace([np.nan], [None])
|
||||
return df
|
||||
|
||||
def update_df(self) -> None:
|
||||
"""
|
||||
Update the internal dataframe in the case that the model values are changed
|
||||
in a way that we can't detect, like appending to one of the lists.
|
||||
|
||||
"""
|
||||
self._df = self.__make_df()
|
||||
|
||||
def __getattr__(self, item: str):
|
||||
"""
|
||||
Mimic pandas dataframe and pydantic model behavior
|
||||
"""
|
||||
if item in ("df", "_df"):
|
||||
return self.__pydantic_private__["_df"]
|
||||
elif item in self.model_fields:
|
||||
return self._df[item]
|
||||
else:
|
||||
try:
|
||||
return object.__getattribute__(self._df, item)
|
||||
except AttributeError:
|
||||
return object.__getattribute__(self, item)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def recreate_df(self) -> None:
|
||||
"""
|
||||
Remake DF when validating (eg. when updating values on assignment)
|
||||
"""
|
||||
self.update_df()
|
||||
|
||||
@model_serializer(mode="wrap", when_used="always")
|
||||
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
|
||||
"""
|
||||
We don't handle values that are changed on the dataframe by directly
|
||||
updating the underlying model lists, but we implicitly handle them
|
||||
by using the dataframe as the source when serializing
|
||||
"""
|
||||
if self._df is None:
|
||||
return nxt(self)
|
||||
else:
|
||||
out = self._df.to_dict("list")
|
||||
# remove Nones
|
||||
out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
|
||||
return nxt(self.__class__(**out))
|
||||
|
||||
|
||||
def dynamictable_to_df(
|
||||
group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
|
||||
) -> DataFrame:
|
||||
"""Generate a dataframe from an NDB DynamicTable"""
|
||||
if model is None:
|
||||
model = model_from_dynamictable(group, base)
|
||||
|
||||
items = {}
|
||||
for col, _col_type in model.model_fields.items():
|
||||
if col not in group:
|
||||
continue
|
||||
idxname = col + "_index"
|
||||
if idxname in group:
|
||||
idx = group.get(idxname)[:]
|
||||
data = group.get(col)[idx - 1]
|
||||
else:
|
||||
data = group.get(col)[:]
|
||||
|
||||
# Handle typing inside of list
|
||||
if isinstance(data[0], bytes):
|
||||
data = data.astype("unicode")
|
||||
if isinstance(data[0], str):
|
||||
# lists and other compound data types can get flattened out to strings when stored
|
||||
# so we try and literal eval and recover them
|
||||
try:
|
||||
eval_type = type(ast.literal_eval(data[0]))
|
||||
except (ValueError, SyntaxError):
|
||||
eval_type = str
|
||||
|
||||
# if we've found one of those, get the data type within it.
|
||||
if eval_type is not str:
|
||||
eval_list = []
|
||||
for item in data.tolist():
|
||||
try:
|
||||
eval_list.append(ast.literal_eval(item))
|
||||
except ValueError:
|
||||
eval_list.append(None)
|
||||
data = eval_list
|
||||
elif isinstance(data[0], h5py.h5r.Reference):
|
||||
data = [HDF5_Path(group[d].name) for d in data]
|
||||
elif isinstance(data[0], tuple) and any(
|
||||
[isinstance(d, h5py.h5r.Reference) for d in data[0]]
|
||||
):
|
||||
# references stored inside a tuple, reference + location.
|
||||
# dereference them!?
|
||||
dset = group.get(col)
|
||||
names = dset.dtype.names
|
||||
if names is not None and names[0] == "idx_start" and names[1] == "count":
|
||||
data = dereference_reference_vector(dset, data)
|
||||
|
||||
else:
|
||||
data = data.tolist()
|
||||
|
||||
# After list, check if we need to put this thing inside of
|
||||
# another class, as indicated by the enclosing model
|
||||
|
||||
items[col] = data
|
||||
|
||||
return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)
|
||||
#
|
||||
# def dynamictable_to_df(
|
||||
# group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
|
||||
# ) -> DataFrame:
|
||||
# """Generate a dataframe from an NDB DynamicTable"""
|
||||
# if model is None:
|
||||
# model = model_from_dynamictable(group, base)
|
||||
#
|
||||
# items = {}
|
||||
# for col, _col_type in model.model_fields.items():
|
||||
# if col not in group:
|
||||
# continue
|
||||
# idxname = col + "_index"
|
||||
# if idxname in group:
|
||||
# idx = group.get(idxname)[:]
|
||||
# data = group.get(col)[idx - 1]
|
||||
# else:
|
||||
# data = group.get(col)[:]
|
||||
#
|
||||
# # Handle typing inside of list
|
||||
# if isinstance(data[0], bytes):
|
||||
# data = data.astype("unicode")
|
||||
# if isinstance(data[0], str):
|
||||
# # lists and other compound data types can get flattened out to strings when stored
|
||||
# # so we try and literal eval and recover them
|
||||
# try:
|
||||
# eval_type = type(ast.literal_eval(data[0]))
|
||||
# except (ValueError, SyntaxError):
|
||||
# eval_type = str
|
||||
#
|
||||
# # if we've found one of those, get the data type within it.
|
||||
# if eval_type is not str:
|
||||
# eval_list = []
|
||||
# for item in data.tolist():
|
||||
# try:
|
||||
# eval_list.append(ast.literal_eval(item))
|
||||
# except ValueError:
|
||||
# eval_list.append(None)
|
||||
# data = eval_list
|
||||
# elif isinstance(data[0], h5py.h5r.Reference):
|
||||
# data = [HDF5_Path(group[d].name) for d in data]
|
||||
# elif isinstance(data[0], tuple) and any(
|
||||
# [isinstance(d, h5py.h5r.Reference) for d in data[0]]
|
||||
# ):
|
||||
# # references stored inside a tuple, reference + location.
|
||||
# # dereference them!?
|
||||
# dset = group.get(col)
|
||||
# names = dset.dtype.names
|
||||
# if names is not None and names[0] == "idx_start" and names[1] == "count":
|
||||
# data = dereference_reference_vector(dset, data)
|
||||
#
|
||||
# else:
|
||||
# data = data.tolist()
|
||||
#
|
||||
# # After list, check if we need to put this thing inside of
|
||||
# # another class, as indicated by the enclosing model
|
||||
#
|
||||
# items[col] = data
|
||||
#
|
||||
# return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)
|
||||
|
|
|
@ -67,8 +67,9 @@ def units(request) -> Tuple[Units, list[np.ndarray], np.ndarray]:
|
|||
"""
|
||||
|
||||
n_units = 24
|
||||
generator = np.random.default_rng()
|
||||
spike_times = [
|
||||
np.full(shape=np.random.randint(10, 50), fill_value=i, dtype=float) for i in range(n_units)
|
||||
np.full(shape=generator.integers(10, 50), fill_value=i, dtype=float) for i in range(n_units)
|
||||
]
|
||||
spike_idx = []
|
||||
for i in range(n_units):
|
||||
|
@ -141,6 +142,19 @@ def test_dynamictable_indexing(electrical_series):
|
|||
assert subsection.dtypes.values.tolist() == dtypes[0:3]
|
||||
|
||||
|
||||
def test_dynamictable_region(electrical_series):
|
||||
"""
|
||||
Dynamictableregion should
|
||||
Args:
|
||||
electrical_series:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
series, electrodes = electrical_series
|
||||
|
||||
|
||||
|
||||
def test_dynamictable_ragged_arrays(units):
|
||||
"""
|
||||
Should be able to index ragged arrays using an implicit _index column
|
||||
|
|
|
@ -4,8 +4,9 @@ from pathlib import Path
|
|||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from nptyping import Shape, UByte
|
||||
from numpydantic import NDArray
|
||||
from numpydantic import NDArray, Shape
|
||||
import numpy as np
|
||||
|
||||
|
||||
import nwb_linkml
|
||||
from nwb_linkml.maps.naming import version_module_case
|
||||
|
@ -77,7 +78,7 @@ def test_linkml_build_from_yaml(tmp_output_dir):
|
|||
"comments": Optional[str],
|
||||
"data": "TimeSeriesData",
|
||||
"timestamps": "Optional", # __name__ just gets the first part of Optional[TimeSeriesTimestamps]
|
||||
"control": Optional[NDArray[Shape["* num_times"], UByte]],
|
||||
"control": Optional[NDArray[Shape["* num_times"], np.uint8]],
|
||||
},
|
||||
)
|
||||
],
|
||||
|
|
|
@ -58,6 +58,10 @@ select = [
|
|||
"D210", "D211",
|
||||
# emptiness
|
||||
"D419",
|
||||
# perf
|
||||
"PERF",
|
||||
# numpy
|
||||
"NPY",
|
||||
]
|
||||
ignore = [
|
||||
# annotations for *args and **kwargs
|
||||
|
|
|
@ -3,6 +3,7 @@ import os
|
|||
import sys
|
||||
import traceback
|
||||
from pdb import post_mortem
|
||||
import subprocess
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
@ -179,6 +180,8 @@ def generate_versions(
|
|||
with open(pydantic_path / "__init__.py", "w") as initfile:
|
||||
initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")
|
||||
|
||||
subprocess.run(["black", "."])
|
||||
|
||||
finally:
|
||||
if len(failed_versions) > 0:
|
||||
print("Failed Building Versions:")
|
||||
|
|
Loading…
Reference in a new issue