continue removing nptyping, actually fix indexing

This commit is contained in:
sneakers-the-rat 2024-08-06 21:40:23 -07:00
parent 3ee7c68e15
commit a993ee10f2
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
24 changed files with 863 additions and 813 deletions

View file

@ -23,7 +23,7 @@ Cleanup
- [ ] Make a minimal pydanticgen-only package to slim linkml deps?
- [ ] Disambiguate "maps" terminology - split out simple maps from the eg. dataset mapping classes
- [ ] Remove unnecessary imports
- [x] Remove unnecessary imports
- dask
- nptyping
- [ ] Adapt the split generation to the new split generator style

File diff suppressed because it is too large Load diff

View file

@ -14,7 +14,6 @@ dependencies = [
"furo>=2023.8.19",
"myst-parser>=2.0.0",
"autodoc-pydantic>=2.0.1",
"nptyping>=2.5.0",
"sphinx-autobuild>=2021.3.14",
"sphinx-design>=0.5.0",
"sphinx-togglebutton>=0.3.2",

View file

@ -266,10 +266,7 @@ class NamespacesAdapter(Adapter):
else:
ns = ns[0]
schema_names = []
for sch in ns.schema_:
if sch.source is not None:
schema_names.append(sch.source)
schema_names = [sch.source for sch in ns.schema_ if sch.source is not None]
return schema_names
def schema_namespace(self, name: str) -> Optional[str]:

View file

@ -133,7 +133,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])
@ -382,6 +382,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
DYNAMIC_TABLE_IMPORTS = Imports(
imports=[
Import(
@ -417,7 +431,9 @@ Imports required for the dynamic table mixin
VectorData is purposefully excluded as an import or an inject so that it will be
resolved to the VectorData definition in the generated module
"""
DYNAMIC_TABLE_INJECTS = [VectorDataMixin, VectorIndexMixin, DynamicTableMixin]
# class VectorDataMixin(BaseModel):
# index: Optional[BaseModel] = None
DYNAMIC_TABLE_INJECTS = [
VectorDataMixin,
VectorIndexMixin,
DynamicTableRegionMixin,
DynamicTableMixin,
]

View file

@ -242,10 +242,7 @@ def find_references(h5f: h5py.File, path: str) -> List[str]:
def _find_references(name: str, obj: h5py.Group | h5py.Dataset) -> None:
pbar.update()
refs = []
for attr in obj.attrs.values():
if isinstance(attr, h5py.h5r.Reference):
refs.append(attr)
refs = [attr for attr in obj.attrs.values() if isinstance(attr, h5py.h5r.Reference)]
if isinstance(obj, h5py.Dataset):
# dataset is all references

View file

@ -100,10 +100,9 @@ np_to_python = {
np.float64,
np.single,
np.double,
np.float_,
)
},
**{n: str for n in (np.character, np.str_, np.string_, np.unicode_)},
**{n: str for n in (np.character, np.str_)},
}
allowed_precisions = {

View file

@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -171,6 +171,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -281,7 +295,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -172,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -282,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and len(val) > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -1,7 +1,12 @@
from __future__ import annotations
from ...hdmf_common.v1_8_0.hdmf_common_base import Data
from datetime import datetime, date
from decimal import Decimal
from enum import Enum
import re
import sys
from ...hdmf_common.v1_8_0.hdmf_common_base import Data, Container
from pandas import DataFrame, Series
from typing import Any, ClassVar, List, Dict, Optional, Union, overload, Tuple
from typing import Any, ClassVar, List, Literal, Dict, Optional, Union, overload, Tuple
from pydantic import (
BaseModel,
ConfigDict,
@ -167,6 +172,20 @@ class VectorIndexMixin(BaseModel):
return len(self.value)
class DynamicTableRegionMixin(BaseModel):
"""
Mixin to allow indexing references to regions of dynamictables
"""
table: "DynamicTableMixin"
def __getitem__(self, item: Union[str, int, slice, Tuple[Union[str, int, slice], ...]]) -> Any:
return self.table[item]
def __setitem__(self, key: Union[int, str, slice], value: Any) -> None:
self.table[key] = value
class DynamicTableMixin(BaseModel):
"""
Mixin to make DynamicTable subclasses behave like tables/dataframes
@ -277,7 +296,7 @@ class DynamicTableMixin(BaseModel):
# special case where pandas will unpack a pydantic model
# into {n_fields} rows, rather than keeping it in a dict
val = Series([val])
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and val.shape[0] > 1:
elif isinstance(rows, int) and hasattr(val, "shape") and val.shape and len(val) > 1:
# special case where we are returning a row in a ragged array,
# same as above - prevent pandas pivoting to long
val = Series([val])

View file

@ -14,180 +14,163 @@ Pydantic models that behave like pandas dataframes
left in this module since it is necessary for it to make sense.
"""
import ast
from typing import Any, Dict, Optional, Type
#
# class DataFrame(BaseModel, pd.DataFrame):
# """
# Pydantic model root class that mimics a pandas dataframe.
#
# Notes:
#
# The synchronization between the underlying lists in the pydantic model
# and the derived dataframe is partial, and at the moment unidirectional.
# This class is primarily intended for reading from tables stored in
# NWB files rather than being able to manipulate them.
#
# The dataframe IS updated when new values are *assigned* to a field.
#
# eg.::
#
# MyModel.fieldval = [1,2,3]
#
# But the dataframe is NOT updated when existing values are updated.
#
# eg.::
#
# MyModel.fieldval.append(4)
#
# In that case you need to call :meth:`.update_df` manually.
#
# Additionally, if the dataframe is modified, the underlying lists are NOT updated,
# but when the model is dumped to a dictionary or serialized, the dataframe IS used,
# so changes will be reflected then.
#
# Fields that shadow pandas methods WILL prevent them from being usable, except
# by directly accessing the dataframe like ``mymodel._df``
#
# """
#
# _df: pd.DataFrame = None
# model_config = ConfigDict(validate_assignment=True)
#
# def __init__(self, **kwargs):
# # pdb.set_trace()
# super().__init__(**kwargs)
#
# self._df = self.__make_df()
#
# def __make_df(self) -> pd.DataFrame:
# # make dict that can handle ragged arrays and NoneTypes
# items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
#
# df_dict = {
# k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
# }
# df = pd.DataFrame(df_dict)
# # replace Nans with None
# df = df.fillna(np.nan).replace([np.nan], [None])
# return df
#
# def update_df(self) -> None:
# """
# Update the internal dataframe in the case that the model values are changed
# in a way that we can't detect, like appending to one of the lists.
#
# """
# self._df = self.__make_df()
#
# def __getattr__(self, item: str):
# """
# Mimic pandas dataframe and pydantic model behavior
# """
# if item in ("df", "_df"):
# return self.__pydantic_private__["_df"]
# elif item in self.model_fields:
# return self._df[item]
# else:
# try:
# return object.__getattribute__(self._df, item)
# except AttributeError:
# return object.__getattribute__(self, item)
#
# @model_validator(mode="after")
# def recreate_df(self) -> None:
# """
# Remake DF when validating (eg. when updating values on assignment)
# """
# self.update_df()
#
# @model_serializer(mode="wrap", when_used="always")
# def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
# """
# We don't handle values that are changed on the dataframe by directly
# updating the underlying model lists, but we implicitly handle them
# by using the dataframe as the source when serializing
# """
# if self._df is None:
# return nxt(self)
# else:
# out = self._df.to_dict("list")
# # remove Nones
# out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
# return nxt(self.__class__(**out))
import h5py
import numpy as np
import pandas as pd
from pydantic import (
BaseModel,
ConfigDict,
SerializerFunctionWrapHandler,
model_serializer,
model_validator,
)
from nwb_linkml.maps.hdmf import dereference_reference_vector, model_from_dynamictable
from nwb_linkml.types.hdf5 import HDF5_Path
class DataFrame(BaseModel, pd.DataFrame):
"""
Pydantic model root class that mimics a pandas dataframe.
Notes:
The synchronization between the underlying lists in the pydantic model
and the derived dataframe is partial, and at the moment unidirectional.
This class is primarily intended for reading from tables stored in
NWB files rather than being able to manipulate them.
The dataframe IS updated when new values are *assigned* to a field.
eg.::
MyModel.fieldval = [1,2,3]
But the dataframe is NOT updated when existing values are updated.
eg.::
MyModel.fieldval.append(4)
In that case you need to call :meth:`.update_df` manually.
Additionally, if the dataframe is modified, the underlying lists are NOT updated,
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
so changes will be reflected then.
Fields that shadow pandas methods WILL prevent them from being usable, except
by directly accessing the dataframe like ``mymodel._df``
"""
_df: pd.DataFrame = None
model_config = ConfigDict(validate_assignment=True)
def __init__(self, **kwargs):
# pdb.set_trace()
super().__init__(**kwargs)
self._df = self.__make_df()
def __make_df(self) -> pd.DataFrame:
# make dict that can handle ragged arrays and NoneTypes
items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
df_dict = {
k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
}
df = pd.DataFrame(df_dict)
# replace Nans with None
df = df.fillna(np.nan).replace([np.nan], [None])
return df
def update_df(self) -> None:
"""
Update the internal dataframe in the case that the model values are changed
in a way that we can't detect, like appending to one of the lists.
"""
self._df = self.__make_df()
def __getattr__(self, item: str):
"""
Mimic pandas dataframe and pydantic model behavior
"""
if item in ("df", "_df"):
return self.__pydantic_private__["_df"]
elif item in self.model_fields:
return self._df[item]
else:
try:
return object.__getattribute__(self._df, item)
except AttributeError:
return object.__getattribute__(self, item)
@model_validator(mode="after")
def recreate_df(self) -> None:
"""
Remake DF when validating (eg. when updating values on assignment)
"""
self.update_df()
@model_serializer(mode="wrap", when_used="always")
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
"""
We don't handle values that are changed on the dataframe by directly
updating the underlying model lists, but we implicitly handle them
by using the dataframe as the source when serializing
"""
if self._df is None:
return nxt(self)
else:
out = self._df.to_dict("list")
# remove Nones
out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
return nxt(self.__class__(**out))
def dynamictable_to_df(
group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
) -> DataFrame:
"""Generate a dataframe from an NDB DynamicTable"""
if model is None:
model = model_from_dynamictable(group, base)
items = {}
for col, _col_type in model.model_fields.items():
if col not in group:
continue
idxname = col + "_index"
if idxname in group:
idx = group.get(idxname)[:]
data = group.get(col)[idx - 1]
else:
data = group.get(col)[:]
# Handle typing inside of list
if isinstance(data[0], bytes):
data = data.astype("unicode")
if isinstance(data[0], str):
# lists and other compound data types can get flattened out to strings when stored
# so we try and literal eval and recover them
try:
eval_type = type(ast.literal_eval(data[0]))
except (ValueError, SyntaxError):
eval_type = str
# if we've found one of those, get the data type within it.
if eval_type is not str:
eval_list = []
for item in data.tolist():
try:
eval_list.append(ast.literal_eval(item))
except ValueError:
eval_list.append(None)
data = eval_list
elif isinstance(data[0], h5py.h5r.Reference):
data = [HDF5_Path(group[d].name) for d in data]
elif isinstance(data[0], tuple) and any(
[isinstance(d, h5py.h5r.Reference) for d in data[0]]
):
# references stored inside a tuple, reference + location.
# dereference them!?
dset = group.get(col)
names = dset.dtype.names
if names is not None and names[0] == "idx_start" and names[1] == "count":
data = dereference_reference_vector(dset, data)
else:
data = data.tolist()
# After list, check if we need to put this thing inside of
# another class, as indicated by the enclosing model
items[col] = data
return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)
#
# def dynamictable_to_df(
# group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
# ) -> DataFrame:
# """Generate a dataframe from an NDB DynamicTable"""
# if model is None:
# model = model_from_dynamictable(group, base)
#
# items = {}
# for col, _col_type in model.model_fields.items():
# if col not in group:
# continue
# idxname = col + "_index"
# if idxname in group:
# idx = group.get(idxname)[:]
# data = group.get(col)[idx - 1]
# else:
# data = group.get(col)[:]
#
# # Handle typing inside of list
# if isinstance(data[0], bytes):
# data = data.astype("unicode")
# if isinstance(data[0], str):
# # lists and other compound data types can get flattened out to strings when stored
# # so we try and literal eval and recover them
# try:
# eval_type = type(ast.literal_eval(data[0]))
# except (ValueError, SyntaxError):
# eval_type = str
#
# # if we've found one of those, get the data type within it.
# if eval_type is not str:
# eval_list = []
# for item in data.tolist():
# try:
# eval_list.append(ast.literal_eval(item))
# except ValueError:
# eval_list.append(None)
# data = eval_list
# elif isinstance(data[0], h5py.h5r.Reference):
# data = [HDF5_Path(group[d].name) for d in data]
# elif isinstance(data[0], tuple) and any(
# [isinstance(d, h5py.h5r.Reference) for d in data[0]]
# ):
# # references stored inside a tuple, reference + location.
# # dereference them!?
# dset = group.get(col)
# names = dset.dtype.names
# if names is not None and names[0] == "idx_start" and names[1] == "count":
# data = dereference_reference_vector(dset, data)
#
# else:
# data = data.tolist()
#
# # After list, check if we need to put this thing inside of
# # another class, as indicated by the enclosing model
#
# items[col] = data
#
# return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)

View file

@ -67,8 +67,9 @@ def units(request) -> Tuple[Units, list[np.ndarray], np.ndarray]:
"""
n_units = 24
generator = np.random.default_rng()
spike_times = [
np.full(shape=np.random.randint(10, 50), fill_value=i, dtype=float) for i in range(n_units)
np.full(shape=generator.integers(10, 50), fill_value=i, dtype=float) for i in range(n_units)
]
spike_idx = []
for i in range(n_units):
@ -141,6 +142,19 @@ def test_dynamictable_indexing(electrical_series):
assert subsection.dtypes.values.tolist() == dtypes[0:3]
def test_dynamictable_region(electrical_series):
"""
Dynamictableregion should
Args:
electrical_series:
Returns:
"""
series, electrodes = electrical_series
def test_dynamictable_ragged_arrays(units):
"""
Should be able to index ragged arrays using an implicit _index column

View file

@ -4,8 +4,9 @@ from pathlib import Path
from typing import Optional
import pytest
from nptyping import Shape, UByte
from numpydantic import NDArray
from numpydantic import NDArray, Shape
import numpy as np
import nwb_linkml
from nwb_linkml.maps.naming import version_module_case
@ -77,7 +78,7 @@ def test_linkml_build_from_yaml(tmp_output_dir):
"comments": Optional[str],
"data": "TimeSeriesData",
"timestamps": "Optional", # __name__ just gets the first part of Optional[TimeSeriesTimestamps]
"control": Optional[NDArray[Shape["* num_times"], UByte]],
"control": Optional[NDArray[Shape["* num_times"], np.uint8]],
},
)
],

View file

@ -58,6 +58,10 @@ select = [
"D210", "D211",
# emptiness
"D419",
# perf
"PERF",
# numpy
"NPY",
]
ignore = [
# annotations for *args and **kwargs

View file

@ -3,6 +3,7 @@ import os
import sys
import traceback
from pdb import post_mortem
import subprocess
from argparse import ArgumentParser
from pathlib import Path
@ -179,6 +180,8 @@ def generate_versions(
with open(pydantic_path / "__init__.py", "w") as initfile:
initfile.write(f"from .pydantic.core.{latest_version.name}.namespace import *")
subprocess.run(["black", "."])
finally:
if len(failed_versions) > 0:
print("Failed Building Versions:")