initial draft of aligned dynamic table before testing and refining. switch pandas import to module-level

This commit is contained in:
sneakers-the-rat 2024-08-08 00:31:41 -07:00
parent 92d28baedd
commit 54c18e333e
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
15 changed files with 988 additions and 86 deletions

View file

@ -305,6 +305,10 @@ There are several different ways to create references between objects in nwb/hdm
target_type: ElectrodeGroup target_type: ElectrodeGroup
reftype: object reftype: object
``` ```
- `TimeSeriesReferenceVectorData` is a compound dtype that behaves like VectorData and VectorIndex combined
into a single type. It is slightly different in that each row of the vector can refer to a different table,
and has a different way of handling selection (with `start` and `count`
rather than a series of indices for the end of each cell)
- Implicitly, hdmf creates references between objects according to some naming conventions, eg. - Implicitly, hdmf creates references between objects according to some naming conventions, eg.
an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData` an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData`
object `mydata`. object `mydata`.

View file

@ -252,6 +252,8 @@ class AfterGenerateClass:
cls.cls.bases = ["VectorIndexMixin"] cls.cls.bases = ["VectorIndexMixin"]
elif cls.cls.name == "DynamicTableRegion": elif cls.cls.name == "DynamicTableRegion":
cls.cls.bases = ["DynamicTableRegionMixin", "VectorData"] cls.cls.bases = ["DynamicTableRegionMixin", "VectorData"]
elif cls.cls.name == "AlignedDynamicTable":
cls.cls.bases = ["AlignedDynamicTableMixin", "DynamicTable"]
return cls return cls

View file

@ -18,7 +18,7 @@ from typing import (
import numpy as np import numpy as np
from linkml.generators.pydanticgen.template import Import, Imports, ObjectImport from linkml.generators.pydanticgen.template import Import, Imports, ObjectImport
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pandas import DataFrame, Series import pandas as pd
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
ConfigDict, ConfigDict,
@ -66,21 +66,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -131,7 +131,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -149,7 +149,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -180,6 +180,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -467,11 +475,70 @@ class DynamicTableRegionMixin(BaseModel):
self.table[self.value[key]] = value self.table[self.value[key]] = value
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
DYNAMIC_TABLE_IMPORTS = Imports( DYNAMIC_TABLE_IMPORTS = Imports(
imports=[ imports=[
Import( Import(module="pandas", alias="pd"),
module="pandas", objects=[ObjectImport(name="DataFrame"), ObjectImport(name="Series")]
),
Import( Import(
module="typing", module="typing",
objects=[ objects=[
@ -508,4 +575,5 @@ DYNAMIC_TABLE_INJECTS = [
VectorIndexMixin, VectorIndexMixin,
DynamicTableRegionMixin, DynamicTableRegionMixin,
DynamicTableMixin, DynamicTableMixin,
AlignedDynamicTableMixin,
] ]

View file

@ -4,7 +4,7 @@ from decimal import Decimal
from enum import Enum from enum import Enum
import re import re
import sys import sys
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -4,7 +4,7 @@ from decimal import Decimal
from enum import Enum from enum import Enum
import re import re
import sys import sys
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -4,7 +4,7 @@ from decimal import Decimal
from enum import Enum from enum import Enum
import re import re
import sys import sys
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_2_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_2_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_2_1.hdmf_common_base import Data, Container from ...hdmf_common.v1_2_1.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_3_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_3_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_4_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_4_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from numpydantic import NDArray, Shape from numpydantic import NDArray, Shape
from pydantic import ( from pydantic import (
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_5_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_5_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
) )
class AlignedDynamicTable(DynamicTable): class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
""" """
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
""" """

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_5_1.hdmf_common_base import Data, Container from ...hdmf_common.v1_5_1.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
) )
class AlignedDynamicTable(DynamicTable): class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
""" """
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
""" """

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_6_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_6_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
) )
class AlignedDynamicTable(DynamicTable): class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
""" """
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
""" """

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_7_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_7_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
) )
class AlignedDynamicTable(DynamicTable): class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
""" """
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
""" """

View file

@ -5,7 +5,7 @@ from enum import Enum
import re import re
import sys import sys
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series import pandas as pd
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel):
def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ...
@overload @overload
def __getitem__(self, item: int) -> DataFrame: ... def __getitem__(self, item: int) -> pd.DataFrame: ...
@overload @overload
def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ...
@overload @overload
def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[
DataFrame, pd.DataFrame,
list, list,
"NDArray", "NDArray",
"VectorDataMixin", "VectorDataMixin",
]: ... ]: ...
@overload @overload
def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ...
def __getitem__( def __getitem__(
self, self,
@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel):
raise ValueError(f"Unsure how to get item with key {item}") raise ValueError(f"Unsure how to get item with key {item}")
# cast to DF # cast to DF
return DataFrame(data) return pd.DataFrame(data)
def _slice_range( def _slice_range(
self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None
@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel):
# scalars need to be wrapped in series for pandas # scalars need to be wrapped in series for pandas
if not isinstance(rows, (Iterable, slice)): if not isinstance(rows, (Iterable, slice)):
val = Series([val]) val = pd.Series([val])
data[k] = val data[k] = val
return data return data
@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel):
except AttributeError: except AttributeError:
raise e from None raise e from None
def __len__(self) -> int:
"""
Use the id column to determine length.
If the id column doesn't represent length accurately, it's a bug
"""
return len(self.id)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def create_id(cls, model: Dict[str, Any]) -> Dict: def create_id(cls, model: Dict[str, Any]) -> Dict:
@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel):
) )
class AlignedDynamicTableMixin(DynamicTableMixin):
"""
Mixin to allow indexing multiple tables that are aligned on a common ID
"""
__pydantic_extra__: Dict[str, "DynamicTableMixin"]
NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = (
"name",
"categories",
"colnames",
"description",
)
name: str = "aligned_table"
categories: List[str] = Field(default_factory=list)
id: Optional[NDArray[Shape["* num_rows"], int]] = None
@property
def _categories(self) -> Dict[str, "DynamicTableMixin"]:
return {k: getattr(self, k) for i, k in enumerate(self.categories)}
def __getitem__(
self, item: Union[int, str, slice, Tuple[Union[int, slice], str]]
) -> pd.DataFrame:
"""
Mimic hdmf:
https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261
Args:
item:
Returns:
"""
if isinstance(item, str):
# get a single table
return self._categories[item][:]
elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str):
# get a slice of a single table
return self._categories[item[1]][item[0]]
elif isinstance(item, (int, slice)):
# get a slice of all the tables
ids = self.id[item]
if not isinstance(ids, Iterable):
ids = pd.Series([ids])
ids = pd.DataFrame({"id": ids})
tables = [ids] + [table[item].reset_index() for table in self._categories.values()]
names = [self.name] + self.categories
# construct below in case we need to support array indexing in the future
else:
raise ValueError(
f"Dont know how to index with {item}, "
"need an int, string, slice, or tuple[int | slice, str]"
)
df = pd.concat(tables, axis=1, keys=names)
df.set_index((self.name, "id"), drop=True, inplace=True)
return df
linkml_meta = LinkMLMeta( linkml_meta = LinkMLMeta(
{ {
"annotations": { "annotations": {
@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin):
) )
class AlignedDynamicTable(DynamicTable): class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable):
""" """
DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group.
""" """