mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-09 21:54:27 +00:00
initial draft of aligned dynamic table before testing and refining. switch pandas import to module-level
This commit is contained in:
parent
92d28baedd
commit
54c18e333e
15 changed files with 988 additions and 86 deletions
|
@ -305,6 +305,10 @@ There are several different ways to create references between objects in nwb/hdm
|
||||||
target_type: ElectrodeGroup
|
target_type: ElectrodeGroup
|
||||||
reftype: object
|
reftype: object
|
||||||
```
|
```
|
||||||
|
- `TimeSeriesReferenceVectorData` is a compound dtype that behaves like VectorData and VectorIndex combined
|
||||||
|
into a single type. It is slightly different in that each row of the vector can refer to a different table,
|
||||||
|
and has a different way of handling selection (with `start` and `count`
|
||||||
|
rather than a series of indices for the end of each cell)
|
||||||
- Implicitly, hdmf creates references between objects according to some naming conventions, eg.
|
- Implicitly, hdmf creates references between objects according to some naming conventions, eg.
|
||||||
an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
|
an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
|
||||||
object `mydata`.
|
object `mydata`.
|
||||||
|
|
|
@ -252,6 +252,8 @@ class AfterGenerateClass:
|
||||||
cls.cls.bases = ["VectorIndexMixin"]
|
cls.cls.bases = ["VectorIndexMixin"]
|
||||||
elif cls.cls.name == "DynamicTableRegion":
|
elif cls.cls.name == "DynamicTableRegion":
|
||||||
cls.cls.bases = ["DynamicTableRegionMixin", "VectorData"]
|
cls.cls.bases = ["DynamicTableRegionMixin", "VectorData"]
|
||||||
|
elif cls.cls.name == "AlignedDynamicTable":
|
||||||
|
cls.cls.bases = ["AlignedDynamicTableMixin", "DynamicTable"]
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ from typing import (
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from linkml.generators.pydanticgen.template import Import, Imports, ObjectImport
|
from linkml.generators.pydanticgen.template import Import, Imports, ObjectImport
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
ConfigDict,
|
ConfigDict,
|
||||||
|
@ -66,21 +66,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -131,7 +131,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -149,7 +149,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -180,6 +180,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -467,11 +475,70 @@ class DynamicTableRegionMixin(BaseModel):
|
||||||
self.table[self.value[key]] = value
|
self.table[self.value[key]] = value
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
DYNAMIC_TABLE_IMPORTS = Imports(
|
DYNAMIC_TABLE_IMPORTS = Imports(
|
||||||
imports=[
|
imports=[
|
||||||
Import(
|
Import(module="pandas", alias="pd"),
|
||||||
module="pandas", objects=[ObjectImport(name="DataFrame"), ObjectImport(name="Series")]
|
|
||||||
),
|
|
||||||
Import(
|
Import(
|
||||||
module="typing",
|
module="typing",
|
||||||
objects=[
|
objects=[
|
||||||
|
@ -508,4 +575,5 @@ DYNAMIC_TABLE_INJECTS = [
|
||||||
VectorIndexMixin,
|
VectorIndexMixin,
|
||||||
DynamicTableRegionMixin,
|
DynamicTableRegionMixin,
|
||||||
DynamicTableMixin,
|
DynamicTableMixin,
|
||||||
|
AlignedDynamicTableMixin,
|
||||||
]
|
]
|
||||||
|
|
|
@ -4,7 +4,7 @@ from decimal import Decimal
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -4,7 +4,7 @@ from decimal import Decimal
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -4,7 +4,7 @@ from decimal import Decimal
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_2_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_2_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_2_1.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_2_1.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_3_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_3_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_4_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_4_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from numpydantic import NDArray, Shape
|
from numpydantic import NDArray, Shape
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_5_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_5_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AlignedDynamicTable(DynamicTable):
|
class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
|
||||||
"""
|
"""
|
||||||
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_5_1.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_5_1.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AlignedDynamicTable(DynamicTable):
|
class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
|
||||||
"""
|
"""
|
||||||
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_6_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_6_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AlignedDynamicTable(DynamicTable):
|
class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
|
||||||
"""
|
"""
|
||||||
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_7_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_7_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AlignedDynamicTable(DynamicTable):
|
class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
|
||||||
"""
|
"""
|
||||||
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -5,7 +5,7 @@ from enum import Enum
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
|
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
|
||||||
from pandas import DataFrame, Series
|
import pandas as pd
|
||||||
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
|
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
|
||||||
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: int) -> DataFrame: ...
|
def __getitem__(self, item: int) -> pd.DataFrame: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
|
||||||
DataFrame,
|
pd.DataFrame,
|
||||||
list,
|
list,
|
||||||
"NDArray",
|
"NDArray",
|
||||||
"VectorDataMixin",
|
"VectorDataMixin",
|
||||||
]: ...
|
]: ...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ...
|
def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
|
||||||
|
|
||||||
def __getitem__(
|
def __getitem__(
|
||||||
self,
|
self,
|
||||||
|
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
raise ValueError(f"Unsure how to get item with key {item}")
|
raise ValueError(f"Unsure how to get item with key {item}")
|
||||||
|
|
||||||
# cast to DF
|
# cast to DF
|
||||||
return DataFrame(data)
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
def _slice_range(
|
def _slice_range(
|
||||||
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
|
||||||
|
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
|
||||||
|
|
||||||
# scalars need to be wrapped in series for pandas
|
# scalars need to be wrapped in series for pandas
|
||||||
if not isinstance(rows, (Iterable, slice)):
|
if not isinstance(rows, (Iterable, slice)):
|
||||||
val = Series([val])
|
val = pd.Series([val])
|
||||||
|
|
||||||
data[k] = val
|
data[k] = val
|
||||||
return data
|
return data
|
||||||
|
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
Use the id column to determine length.
|
||||||
|
|
||||||
|
If the id column doesn't represent length accurately, it's a bug
|
||||||
|
"""
|
||||||
|
return len(self.id)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
def create_id(cls, model: Dict[str, Any]) -> Dict:
|
||||||
|
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignedDynamicTableMixin(DynamicTableMixin):
|
||||||
|
"""
|
||||||
|
Mixin to allow indexing multiple tables that are aligned on a common ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
|
||||||
|
|
||||||
|
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
|
||||||
|
"name",
|
||||||
|
"categories",
|
||||||
|
"colnames",
|
||||||
|
"description",
|
||||||
|
)
|
||||||
|
|
||||||
|
name: str = "aligned_table"
|
||||||
|
categories: List[str] = Field(default_factory=list)
|
||||||
|
id: Optional[NDArray[Shape["* num_rows"], int]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
|
||||||
|
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
|
||||||
|
|
||||||
|
def __getitem__(
|
||||||
|
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Mimic hdmf:
|
||||||
|
|
||||||
|
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
|
||||||
|
Args:
|
||||||
|
item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(item, str):
|
||||||
|
# get a single table
|
||||||
|
return self._categories[item][:]
|
||||||
|
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
|
||||||
|
# get a slice of a single table
|
||||||
|
return self._categories[item[1]][item[0]]
|
||||||
|
elif isinstance(item, (int, slice)):
|
||||||
|
# get a slice of all the tables
|
||||||
|
ids = self.id[item]
|
||||||
|
if not isinstance(ids, Iterable):
|
||||||
|
ids = pd.Series([ids])
|
||||||
|
ids = pd.DataFrame({"id": ids})
|
||||||
|
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
|
||||||
|
names = [self.name] + self.categories
|
||||||
|
# construct below in case we need to support array indexing in the future
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Dont know how to index with {item}, "
|
||||||
|
"need an int, string, slice, or tuple[int | slice, str]"
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.concat(tables, axis=1, keys=names)
|
||||||
|
df.set_index((self.name, "id"), drop=True, inplace=True)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
linkml_meta = LinkMLMeta(
|
linkml_meta = LinkMLMeta(
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AlignedDynamicTable(DynamicTable):
|
class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
|
||||||
"""
|
"""
|
||||||
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in a new issue