From cebb21993d3e943d1c5a089a859becd75f2d2407 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Wed, 7 Aug 2024 19:22:29 -0700 Subject: [PATCH] actually fix indexing --- nwb_linkml/src/nwb_linkml/includes/hdmf.py | 16 ++++---- .../hdmf_common/v1_1_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_1_2/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_1_3/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_2_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_2_1/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_3_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_4_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_5_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_5_1/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_6_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_7_0/hdmf_common_table.py | 19 +++++----- .../hdmf_common/v1_8_0/hdmf_common_table.py | 19 +++++----- nwb_linkml/tests/test_includes/test_hdmf.py | 38 +++++++++---------- 14 files changed, 134 insertions(+), 148 deletions(-) diff --git a/nwb_linkml/src/nwb_linkml/includes/hdmf.py b/nwb_linkml/src/nwb_linkml/includes/hdmf.py index 506f098..1e0c3f7 100644 --- a/nwb_linkml/src/nwb_linkml/includes/hdmf.py +++ b/nwb_linkml/src/nwb_linkml/includes/hdmf.py @@ -109,7 +109,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -127,10 +127,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -145,14 +147,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py index d75a127..41ca9bd 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py @@ -258,7 +258,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -266,6 +266,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -286,7 +287,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -304,10 +305,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -322,14 +325,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py index 3882294..927d9c0 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py @@ -258,7 +258,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -266,6 +266,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -286,7 +287,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -304,10 +305,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -322,14 +325,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py index 8df75da..01324a9 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py @@ -258,7 +258,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -266,6 +266,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -286,7 +287,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -304,10 +305,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -322,14 +325,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py index 0823281..f9f4450 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py index 88405cc..e297747 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py index 545f0e9..50eeb23 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py index c3fb548..affd5dc 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py index 5dda9ab..5b99f2c 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py index 910b294..2eb4675 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py index 3df1e78..d578633 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py index 3e438ce..1d6e89f 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py index c016f61..2c1798b 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py @@ -259,7 +259,7 @@ class DynamicTableMixin(BaseModel): ]: ... @overload - def __getitem__(self, item: slice) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__( self, @@ -267,6 +267,7 @@ class DynamicTableMixin(BaseModel): str, int, slice, + "NDArray", Tuple[int, Union[int, str]], Tuple[Union[int, slice], ...], ], @@ -287,7 +288,7 @@ class DynamicTableMixin(BaseModel): if isinstance(item, str): return self._columns[item] if isinstance(item, (int, slice, np.integer, np.ndarray)): - return DataFrame.from_dict(self._slice_range(item)) + data = self._slice_range(item) elif isinstance(item, tuple): if len(item) != 2: raise ValueError( @@ -305,10 +306,12 @@ class DynamicTableMixin(BaseModel): return self._columns[cols][rows] data = self._slice_range(rows, cols) - return DataFrame.from_dict(data) else: raise ValueError(f"Unsure how to get item with key {item}") + # cast to DF + return DataFrame(data) + def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None ) -> Dict[str, Union[list, "NDArray", "VectorData"]]: @@ -323,14 +326,10 @@ class DynamicTableMixin(BaseModel): else: val = self._columns[k][rows] - if isinstance(val, BaseModel): - # special case where pandas will unpack a pydantic model - # into {n_fields} rows, rather than keeping it in a dict - val = Series([val]) - elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1: - # special case where we are returning a row in a ragged array, - # same as above - prevent pandas pivoting to long + # scalars need to be wrapped in series for pandas + if not isinstance(rows, (Iterable, slice)): val = Series([val]) + data[k] = val return data diff --git a/nwb_linkml/tests/test_includes/test_hdmf.py b/nwb_linkml/tests/test_includes/test_hdmf.py index 6ade3a2..2d07d2d 100644 --- a/nwb_linkml/tests/test_includes/test_hdmf.py +++ b/nwb_linkml/tests/test_includes/test_hdmf.py @@ -150,6 +150,24 @@ def test_dynamictable_indexing(electrical_series): assert subsection.dtypes.values.tolist() == dtypes[0:3] +def test_dynamictable_ragged(units): + """ + Should be able to index ragged arrays using an implicit _index column + + Also tests: + - passing arrays directly instead of wrapping in vectordata/index specifically, + if the models in the fixture instantiate then this works + """ + units, spike_times, spike_idx = units + + # ensure we don't pivot to long when indexing + assert units[0].shape[0] == 1 + # check that we got the indexing boundaries corrunect + # (and that we are forwarding attr calls to the dataframe by accessing shape + for i in range(units.shape[0]): + assert np.all(units.iloc[i, 0] == spike_times[i]) + + def test_dynamictable_region_basic(electrical_series): """ DynamicTableRegion should be able to refer to a row or rows of another table @@ -175,7 +193,7 @@ def test_dynamictable_region_basic(electrical_series): # getting a list of table rows is actually correct behavior here because # this list of table rows is actually the cell of another table rows = series.electrodes[0:3] - assert all([row.id == idx for row, idx in zip(rows, [4, 3, 2])]) + assert all([all(row.id == idx) for row, idx in zip(rows, [4, 3, 2])]) def test_dynamictable_region_ragged(): @@ -218,24 +236,6 @@ def test_dynamictable_region_ragged(): assert all([all(row[1].timeseries == i) for i, row in zip([1, 2, 3], rows.iterrows())]) -def test_dynamictable_ragged(units): - """ - Should be able to index ragged arrays using an implicit _index column - - Also tests: - - passing arrays directly instead of wrapping in vectordata/index specifically, - if the models in the fixture instantiate then this works - """ - units, spike_times, spike_idx = units - - # ensure we don't pivot to long when indexing - assert units[0].shape[0] == 1 - # check that we got the indexing boundaries corrunect - # (and that we are forwarding attr calls to the dataframe by accessing shape - for i in range(units.shape[0]): - assert np.all(units.iloc[i, 0] == spike_times[i]) - - def test_dynamictable_append_column(): pass