From 54c18e333e4e7947a932c7f56d7bca4b557e1f09 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Thu, 8 Aug 2024 00:31:41 -0700 Subject: [PATCH] initial draft of aligned dynamic table before testing and refining. switch pandas import to module-level --- docs/intro/translation.md | 4 + .../src/nwb_linkml/generators/pydantic.py | 2 + nwb_linkml/src/nwb_linkml/includes/hdmf.py | 86 +++++++++++++++++-- .../hdmf_common/v1_1_0/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_1_2/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_1_3/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_2_0/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_2_1/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_3_0/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_4_0/hdmf_common_table.py | 81 +++++++++++++++-- .../hdmf_common/v1_5_0/hdmf_common_table.py | 83 ++++++++++++++++-- .../hdmf_common/v1_5_1/hdmf_common_table.py | 83 ++++++++++++++++-- .../hdmf_common/v1_6_0/hdmf_common_table.py | 83 ++++++++++++++++-- .../hdmf_common/v1_7_0/hdmf_common_table.py | 83 ++++++++++++++++-- .../hdmf_common/v1_8_0/hdmf_common_table.py | 83 ++++++++++++++++-- 15 files changed, 988 insertions(+), 86 deletions(-) diff --git a/docs/intro/translation.md b/docs/intro/translation.md index 613b93f..899dfbe 100644 --- a/docs/intro/translation.md +++ b/docs/intro/translation.md @@ -305,6 +305,10 @@ There are several different ways to create references between objects in nwb/hdm target_type: ElectrodeGroup reftype: object ``` +- `TimeSeriesReferenceVectorData` is a compound dtype that behaves like VectorData and VectorIndex combined + into a single type. It is slightly different in that each row of the vector can refer to a different table, + and has a different way of handling selection (with `start` and `count` + rather than a series of indices for the end of each cell) - Implicitly, hdmf creates references between objects according to some naming conventions, eg. an attribute/dataset that is a `VectorIndex` named `mydata_index` will be linked to a `VectorData` object `mydata`. diff --git a/nwb_linkml/src/nwb_linkml/generators/pydantic.py b/nwb_linkml/src/nwb_linkml/generators/pydantic.py index e1f07af..109ce5b 100644 --- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py +++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py @@ -252,6 +252,8 @@ class AfterGenerateClass: cls.cls.bases = ["VectorIndexMixin"] elif cls.cls.name == "DynamicTableRegion": cls.cls.bases = ["DynamicTableRegionMixin", "VectorData"] + elif cls.cls.name == "AlignedDynamicTable": + cls.cls.bases = ["AlignedDynamicTableMixin", "DynamicTable"] return cls diff --git a/nwb_linkml/src/nwb_linkml/includes/hdmf.py b/nwb_linkml/src/nwb_linkml/includes/hdmf.py index c34bf9a..addc32c 100644 --- a/nwb_linkml/src/nwb_linkml/includes/hdmf.py +++ b/nwb_linkml/src/nwb_linkml/includes/hdmf.py @@ -18,7 +18,7 @@ from typing import ( import numpy as np from linkml.generators.pydanticgen.template import Import, Imports, ObjectImport from numpydantic import NDArray, Shape -from pandas import DataFrame, Series +import pandas as pd from pydantic import ( BaseModel, ConfigDict, @@ -66,21 +66,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -131,7 +131,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -149,7 +149,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -180,6 +180,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -467,11 +475,70 @@ class DynamicTableRegionMixin(BaseModel): self.table[self.value[key]] = value +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + DYNAMIC_TABLE_IMPORTS = Imports( imports=[ - Import( - module="pandas", objects=[ObjectImport(name="DataFrame"), ObjectImport(name="Series")] - ), + Import(module="pandas", alias="pd"), Import( module="typing", objects=[ @@ -508,4 +575,5 @@ DYNAMIC_TABLE_INJECTS = [ VectorIndexMixin, DynamicTableRegionMixin, DynamicTableMixin, + AlignedDynamicTableMixin, ] diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py index 1c07ed3..0c982dd 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_0/hdmf_common_table.py @@ -4,7 +4,7 @@ from decimal import Decimal from enum import Enum import re import sys -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py index 36a79bb..9a2696f 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_2/hdmf_common_table.py @@ -4,7 +4,7 @@ from decimal import Decimal from enum import Enum import re import sys -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py index a5477d8..35fe280 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_1_3/hdmf_common_table.py @@ -4,7 +4,7 @@ from decimal import Decimal from enum import Enum import re import sys -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -244,21 +244,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -309,7 +309,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -327,7 +327,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -358,6 +358,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -492,6 +500,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py index bd03453..d12fd85 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_2_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py index 82c84bf..1bfb82c 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_2_1/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_2_1.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py index 23f75ee..b19b63a 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_3_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_3_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py index e5d4abc..b3d2f89 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_4_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_4_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from numpydantic import NDArray, Shape from pydantic import ( @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py index 46796a1..53e9996 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_5_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from pydantic import ( BaseModel, @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { @@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin): ) -class AlignedDynamicTable(DynamicTable): +class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable): """ DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. """ diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py index 9880ee8..641f20c 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_5_1/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_5_1.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from pydantic import ( BaseModel, @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { @@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin): ) -class AlignedDynamicTable(DynamicTable): +class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable): """ DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. """ diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py index 158f8c1..45e4269 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_6_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_6_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from pydantic import ( BaseModel, @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { @@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin): ) -class AlignedDynamicTable(DynamicTable): +class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable): """ DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. """ diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py index 3ffb25d..0ca3130 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_7_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_7_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from pydantic import ( BaseModel, @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { @@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin): ) -class AlignedDynamicTable(DynamicTable): +class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable): """ DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. """ diff --git a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py index de43571..d6e1081 100644 --- a/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py +++ b/nwb_linkml/src/nwb_linkml/models/pydantic/hdmf_common/v1_8_0/hdmf_common_table.py @@ -5,7 +5,7 @@ from enum import Enum import re import sys from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container -from pandas import DataFrame, Series +import pandas as pd from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, Iterable, Tuple, overload from pydantic import ( BaseModel, @@ -245,21 +245,21 @@ class DynamicTableMixin(BaseModel): def __getitem__(self, item: str) -> Union[list, "NDArray", "VectorDataMixin"]: ... @overload - def __getitem__(self, item: int) -> DataFrame: ... + def __getitem__(self, item: int) -> pd.DataFrame: ... @overload def __getitem__(self, item: Tuple[int, Union[int, str]]) -> Any: ... @overload def __getitem__(self, item: Tuple[Union[int, slice], ...]) -> Union[ - DataFrame, + pd.DataFrame, list, "NDArray", "VectorDataMixin", ]: ... @overload - def __getitem__(self, item: Union[slice, "NDArray"]) -> DataFrame: ... + def __getitem__(self, item: Union[slice, "NDArray"]) -> pd.DataFrame: ... def __getitem__( self, @@ -310,7 +310,7 @@ class DynamicTableMixin(BaseModel): raise ValueError(f"Unsure how to get item with key {item}") # cast to DF - return DataFrame(data) + return pd.DataFrame(data) def _slice_range( self, rows: Union[int, slice, np.ndarray], cols: Optional[Union[str, List[str]]] = None @@ -328,7 +328,7 @@ class DynamicTableMixin(BaseModel): # scalars need to be wrapped in series for pandas if not isinstance(rows, (Iterable, slice)): - val = Series([val]) + val = pd.Series([val]) data[k] = val return data @@ -359,6 +359,14 @@ class DynamicTableMixin(BaseModel): except AttributeError: raise e from None + def __len__(self) -> int: + """ + Use the id column to determine length. + + If the id column doesn't represent length accurately, it's a bug + """ + return len(self.id) + @model_validator(mode="before") @classmethod def create_id(cls, model: Dict[str, Any]) -> Dict: @@ -493,6 +501,67 @@ class DynamicTableMixin(BaseModel): ) +class AlignedDynamicTableMixin(DynamicTableMixin): + """ + Mixin to allow indexing multiple tables that are aligned on a common ID + """ + + __pydantic_extra__: Dict[str, "DynamicTableMixin"] + + NON_CATEGORY_FIELDS: ClassVar[tuple[str]] = ( + "name", + "categories", + "colnames", + "description", + ) + + name: str = "aligned_table" + categories: List[str] = Field(default_factory=list) + id: Optional[NDArray[Shape["* num_rows"], int]] = None + + @property + def _categories(self) -> Dict[str, "DynamicTableMixin"]: + return {k: getattr(self, k) for i, k in enumerate(self.categories)} + + def __getitem__( + self, item: Union[int, str, slice, Tuple[Union[int, slice], str]] + ) -> pd.DataFrame: + """ + Mimic hdmf: + + https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/alignedtable.py#L261 + Args: + item: + + Returns: + + """ + if isinstance(item, str): + # get a single table + return self._categories[item][:] + elif isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], str): + # get a slice of a single table + return self._categories[item[1]][item[0]] + elif isinstance(item, (int, slice)): + # get a slice of all the tables + ids = self.id[item] + if not isinstance(ids, Iterable): + ids = pd.Series([ids]) + ids = pd.DataFrame({"id": ids}) + tables = [ids] + [table[item].reset_index() for table in self._categories.values()] + names = [self.name] + self.categories + # construct below in case we need to support array indexing in the future + else: + raise ValueError( + f"Dont know how to index with {item}, " + "need an int, string, slice, or tuple[int | slice, str]" + ) + + df = pd.concat(tables, axis=1, keys=names) + df.set_index((self.name, "id"), drop=True, inplace=True) + return df + + linkml_meta = LinkMLMeta( { "annotations": { @@ -617,7 +686,7 @@ class DynamicTable(DynamicTableMixin): ) -class AlignedDynamicTable(DynamicTable): +class AlignedDynamicTable(AlignedDynamicTableMixin, DynamicTable): """ DynamicTable container that supports storing a collection of sub-tables. Each sub-table is a DynamicTable itself that is aligned with the main table by row index. I.e., all DynamicTables stored in this group MUST have the same number of rows. This type effectively defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being represented by a separate DynamicTable stored within the group. """