diff --git a/nwb_linkml/pdm.lock b/nwb_linkml/pdm.lock index f6f2c7c..36e3896 100644 --- a/nwb_linkml/pdm.lock +++ b/nwb_linkml/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "plot", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:1c297e11f6dc9e4f6b8d29df872177d2ce65bbd334c0b65aa5175dfb125c4d9f" +content_hash = "sha256:14dd3d0b396dc25e554b924825664346d2644f265e48346180f1cfdf833a8c92" [[metadata.targets]] requires_python = ">=3.10,<3.13" @@ -1038,9 +1038,9 @@ files = [ [[package]] name = "numpydantic" -version = "1.3.3" +version = "1.6.0" requires_python = "<4.0,>=3.9" -summary = "Type and shape validation and serialization for numpy arrays in pydantic models" +summary = "Type and shape validation and serialization for arbitrary array types in pydantic models" groups = ["default"] dependencies = [ "numpy>=1.24.0", @@ -1048,13 +1048,13 @@ dependencies = [ "typing-extensions>=4.11.0; python_version < \"3.11\"", ] files = [ - {file = "numpydantic-1.3.3-py3-none-any.whl", hash = "sha256:e002767252b1b77abb7715834ab7cbf58964baddae44863710f09e71b23287e4"}, - {file = "numpydantic-1.3.3.tar.gz", hash = "sha256:1cc2744f7b5fbcecd51a64fafaf8c9a564bb296336a566a16be97ba7b1c28698"}, + {file = "numpydantic-1.6.0-py3-none-any.whl", hash = "sha256:72f3ef0bc8a5801bac6fb79920467d763d51cddec8476875efeb5064c11c04cf"}, + {file = "numpydantic-1.6.0.tar.gz", hash = "sha256:9785ba7eb5489b9e5438109e9b2dcd1cc0aa87d1b6b5df71fb906dc0708df83c"}, ] [[package]] name = "nwb-models" -version = "0.1.0" +version = "0.2.0" requires_python = ">=3.10" summary = "Pydantic/LinkML models for Neurodata Without Borders" groups = ["default"] @@ -1064,23 +1064,23 @@ dependencies = [ "pydantic>=2.3.0", ] files = [ - {file = "nwb_models-0.1.0-py3-none-any.whl", hash = "sha256:d485422865f6762586e8f8389d67bce17a3e66d07f6273385a751145afbbbfea"}, - {file = "nwb_models-0.1.0.tar.gz", hash = "sha256:3c3ccfc6c2ac03dffe26ba7f180aecc650d6593c05d4f306f84b90fabc3ff2b8"}, + {file = "nwb_models-0.2.0-py3-none-any.whl", hash = "sha256:72bb8a8879261488071d4e8eff35f2cbb20c44ac4bb7f67806c6329b4f8b2068"}, + {file = "nwb_models-0.2.0.tar.gz", hash = "sha256:7e7f280378c668e1695dd9d53b32073d85615e90fee0ec417888dd83bdb9cbb3"}, ] [[package]] name = "nwb-schema-language" -version = "0.1.3" -requires_python = ">=3.9,<4.0" +version = "0.2.0" +requires_python = "<3.13,>=3.10" summary = "Translation of the nwb-schema-language to LinkML" groups = ["default"] dependencies = [ - "linkml-runtime<2.0.0,>=1.1.24", - "pydantic<3.0.0,>=2.3.0", + "linkml-runtime>=1.7.7", + "pydantic>=2.3.0", ] files = [ - {file = "nwb_schema_language-0.1.3-py3-none-any.whl", hash = "sha256:2eb86aac6614d490f7ec3fa68634bb9dceb3834d9820f5afc5645a9f3b0c3401"}, - {file = "nwb_schema_language-0.1.3.tar.gz", hash = "sha256:ad290e2896a9cde7e2f353bc3b8ddf42be865238d991167d397ff2e0d03c88ba"}, + {file = "nwb_schema_language-0.2.0-py3-none-any.whl", hash = "sha256:354afb0abfbc61a6d6b227695b9a4312df5030f2746b517fc5849ac085c8e5f2"}, + {file = "nwb_schema_language-0.2.0.tar.gz", hash = "sha256:59beda56ea52a55f4514d7e4b73e30ceaee1c60b7ddf4fc80afd48777acf9e50"}, ] [[package]] diff --git a/nwb_linkml/pyproject.toml b/nwb_linkml/pyproject.toml index edf3579..2670310 100644 --- a/nwb_linkml/pyproject.toml +++ b/nwb_linkml/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "pydantic-settings>=2.0.3", "tqdm>=4.66.1", 'typing-extensions>=4.12.2;python_version<"3.11"', - "numpydantic>=1.5.0", + "numpydantic>=1.6.0", "black>=24.4.2", "pandas>=2.2.2", "networkx>=3.3", diff --git a/nwb_linkml/src/nwb_linkml/generators/pydantic.py b/nwb_linkml/src/nwb_linkml/generators/pydantic.py index f4c1c9e..336bbf8 100644 --- a/nwb_linkml/src/nwb_linkml/generators/pydantic.py +++ b/nwb_linkml/src/nwb_linkml/generators/pydantic.py @@ -9,7 +9,7 @@ import re from dataclasses import dataclass, field from pathlib import Path from types import ModuleType -from typing import Callable, ClassVar, Dict, List, Literal, Optional, Tuple +from typing import Callable, ClassVar, Dict, List, Optional, Tuple from linkml.generators import PydanticGenerator from linkml.generators.pydanticgen.array import ArrayRepresentation, NumpydanticArray @@ -72,7 +72,7 @@ class NWBPydanticGenerator(PydanticGenerator): emit_metadata: bool = True gen_classvars: bool = True gen_slots: bool = True - extra_fields: Literal["allow", "forbid", "ignore"] = "allow" + # extra_fields: Literal["allow", "forbid", "ignore"] = "allow" skip_meta: ClassVar[Tuple[str]] = ("domain_of", "alias") @@ -269,7 +269,7 @@ class AfterGenerateClass: """ if cls.cls.name == "DynamicTable": - cls.cls.bases = ["DynamicTableMixin", "ConfiguredBaseModel"] + cls.cls.bases = ["DynamicTableMixin"] if ( cls.injected_classes is None @@ -287,18 +287,18 @@ class AfterGenerateClass: else: # pragma: no cover - for completeness, shouldn't happen cls.imports = DYNAMIC_TABLE_IMPORTS.model_copy() elif cls.cls.name == "VectorData": - cls.cls.bases = ["VectorDataMixin", "ConfiguredBaseModel"] + cls.cls.bases = ["VectorDataMixin"] # make ``value`` generic on T if "value" in cls.cls.attributes: cls.cls.attributes["value"].range = "Optional[T]" elif cls.cls.name == "VectorIndex": - cls.cls.bases = ["VectorIndexMixin", "ConfiguredBaseModel"] + cls.cls.bases = ["VectorIndexMixin"] elif cls.cls.name == "DynamicTableRegion": - cls.cls.bases = ["DynamicTableRegionMixin", "VectorData", "ConfiguredBaseModel"] + cls.cls.bases = ["DynamicTableRegionMixin", "VectorData"] elif cls.cls.name == "AlignedDynamicTable": cls.cls.bases = ["AlignedDynamicTableMixin", "DynamicTable"] elif cls.cls.name == "ElementIdentifiers": - cls.cls.bases = ["ElementIdentifiersMixin", "Data", "ConfiguredBaseModel"] + cls.cls.bases = ["ElementIdentifiersMixin", "Data"] # make ``value`` generic on T if "value" in cls.cls.attributes: cls.cls.attributes["value"].range = "Optional[T]" diff --git a/nwb_linkml/src/nwb_linkml/includes/base.py b/nwb_linkml/src/nwb_linkml/includes/base.py index 75b5ca6..d77a759 100644 --- a/nwb_linkml/src/nwb_linkml/includes/base.py +++ b/nwb_linkml/src/nwb_linkml/includes/base.py @@ -30,7 +30,8 @@ BASEMODEL_COERCE_VALUE = """ raise ValueError( f"coerce_value: Could not use the value field of {type(v)} " f"to construct {cls.__name__}.{info.field_name}, " - f"expected type: {cls.model_fields[info.field_name].annotation}" + f"expected type: {cls.model_fields[info.field_name].annotation}\\n" + f"inner error: {str(e1)}" ) from e1 """ @@ -48,7 +49,8 @@ BASEMODEL_CAST_WITH_VALUE = """ raise ValueError( f"cast_with_value: Could not cast {type(v)} as value field for " f"{cls.__name__}.{info.field_name}," - f" expected_type: {cls.model_fields[info.field_name].annotation}" + f" expected_type: {cls.model_fields[info.field_name].annotation}\\n" + f"inner error: {str(e1)}" ) from e1 """ diff --git a/nwb_linkml/src/nwb_linkml/includes/hdmf.py b/nwb_linkml/src/nwb_linkml/includes/hdmf.py index 7a7d294..df73d68 100644 --- a/nwb_linkml/src/nwb_linkml/includes/hdmf.py +++ b/nwb_linkml/src/nwb_linkml/includes/hdmf.py @@ -39,8 +39,30 @@ if TYPE_CHECKING: # pragma: no cover T = TypeVar("T", bound=NDArray) T_INJECT = 'T = TypeVar("T", bound=NDArray)' +if "pytest" in sys.modules: + from nwb_models.models import ConfiguredBaseModel +else: -class DynamicTableMixin(BaseModel): + class ConfiguredBaseModel(BaseModel): + """ + Dummy ConfiguredBaseModel (without its methods from :mod:`.includes.base` ) + used so that the injected mixins inherit from the `ConfiguredBaseModel` + and we get a linear inheritance MRO (rather than needing to inherit + from the mixins *and* the configured base model) so that the + model_config is correctly resolved (ie. to allow extra args) + """ + + model_config = ConfigDict( + validate_assignment=True, + validate_default=True, + extra="forbid", + arbitrary_types_allowed=True, + use_enum_values=True, + strict=False, + ) + + +class DynamicTableMixin(ConfiguredBaseModel): """ Mixin to make DynamicTable subclasses behave like tables/dataframes @@ -295,13 +317,19 @@ class DynamicTableMixin(BaseModel): model[key] = to_cast(name=key, description="", value=val) except ValidationError as e: # pragma: no cover raise ValidationError.from_exception_data( - title=f"field {key} cannot be cast to VectorData from {val}", + title="cast_extra_columns", line_errors=[ { - "type": "ValueError", - "loc": ("DynamicTableMixin", "cast_extra_columns"), + "type": "value_error", "input": val, - } + "loc": ("DynamicTableMixin", "cast_extra_columns"), + "ctx": { + "error": ValueError( + f"field {key} cannot be cast to {to_cast} from {val}" + ) + }, + }, + *e.errors(), ], ) from e return model @@ -364,18 +392,21 @@ class DynamicTableMixin(BaseModel): # should pass if we're supposed to be a VectorData column # don't want to override intention here by insisting that it is # *actually* a VectorData column in case an NDArray has been specified for now + description = cls.model_fields[info.field_name].description + description = description if description is not None else "" + return handler( annotation( val, name=info.field_name, - description=cls.model_fields[info.field_name].description, + description=description, ) ) except Exception: raise e from None -class VectorDataMixin(BaseModel, Generic[T]): +class VectorDataMixin(ConfiguredBaseModel, Generic[T]): """ Mixin class to give VectorData indexing abilities """ @@ -426,7 +457,7 @@ class VectorDataMixin(BaseModel, Generic[T]): return len(self.value) -class VectorIndexMixin(BaseModel, Generic[T]): +class VectorIndexMixin(ConfiguredBaseModel, Generic[T]): """ Mixin class to give VectorIndex indexing abilities """ @@ -518,7 +549,7 @@ class VectorIndexMixin(BaseModel, Generic[T]): return len(self.value) -class DynamicTableRegionMixin(BaseModel): +class DynamicTableRegionMixin(ConfiguredBaseModel): """ Mixin to allow indexing references to regions of dynamictables """ @@ -574,7 +605,7 @@ class DynamicTableRegionMixin(BaseModel): ) # pragma: no cover -class AlignedDynamicTableMixin(BaseModel): +class AlignedDynamicTableMixin(ConfiguredBaseModel): """ Mixin to allow indexing multiple tables that are aligned on a common ID @@ -927,12 +958,18 @@ if "pytest" in sys.modules: class VectorData(VectorDataMixin): """VectorData subclass for testing""" - pass + name: str = Field(...) + description: str = Field( + ..., description="""Description of what these vectors represent.""" + ) class VectorIndex(VectorIndexMixin): """VectorIndex subclass for testing""" - pass + name: str = Field(...) + description: str = Field( + ..., description="""Description of what these vectors represent.""" + ) class DynamicTableRegion(DynamicTableRegionMixin, VectorData): """DynamicTableRegion subclass for testing""" diff --git a/nwb_linkml/src/nwb_linkml/lang_elements.py b/nwb_linkml/src/nwb_linkml/lang_elements.py index c199062..fdde634 100644 --- a/nwb_linkml/src/nwb_linkml/lang_elements.py +++ b/nwb_linkml/src/nwb_linkml/lang_elements.py @@ -12,7 +12,7 @@ from linkml_runtime.linkml_model import ( TypeDefinition, ) -from nwb_linkml.maps import flat_to_linkml +from nwb_linkml.maps import flat_to_linkml, linkml_reprs def _make_dtypes() -> List[TypeDefinition]: @@ -36,6 +36,7 @@ def _make_dtypes() -> List[TypeDefinition]: name=nwbtype, minimum_value=amin, typeof=linkmltype, # repr=repr_string + repr=linkml_reprs.get(nwbtype, None), ) DTypeTypes.append(atype) return DTypeTypes diff --git a/nwb_linkml/src/nwb_linkml/maps/__init__.py b/nwb_linkml/src/nwb_linkml/maps/__init__.py index 8b01447..cdad7d0 100644 --- a/nwb_linkml/src/nwb_linkml/maps/__init__.py +++ b/nwb_linkml/src/nwb_linkml/maps/__init__.py @@ -2,7 +2,7 @@ Mapping from one domain to another """ -from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np +from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np, linkml_reprs from nwb_linkml.maps.map import Map from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC from nwb_linkml.maps.quantity import QUANTITY_MAP @@ -14,4 +14,5 @@ __all__ = [ "Map", "flat_to_linkml", "flat_to_np", + "linkml_reprs", ] diff --git a/nwb_linkml/src/nwb_linkml/maps/dtype.py b/nwb_linkml/src/nwb_linkml/maps/dtype.py index 6d944dd..95cb296 100644 --- a/nwb_linkml/src/nwb_linkml/maps/dtype.py +++ b/nwb_linkml/src/nwb_linkml/maps/dtype.py @@ -39,6 +39,12 @@ flat_to_linkml = { Map between the flat data types and the simpler linkml base types """ +linkml_reprs = {"numeric": "float | int"} +""" +``repr`` fields used in the nwb language elements injected in every namespace +that give the nwb type a specific representation in the generated pydantic models +""" + flat_to_np = { "float": float, "float32": np.float32, diff --git a/nwb_linkml/tests/test_includes/test_hdmf.py b/nwb_linkml/tests/test_includes/test_hdmf.py index a8b14b7..349a93f 100644 --- a/nwb_linkml/tests/test_includes/test_hdmf.py +++ b/nwb_linkml/tests/test_includes/test_hdmf.py @@ -149,8 +149,8 @@ def test_dynamictable_mixin_colnames_index(): cols = { "existing_col": np.arange(10), - "new_col_1": hdmf.VectorData(value=np.arange(10)), - "new_col_2": hdmf.VectorData(value=np.arange(10)), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(10)), + "new_col_2": hdmf.VectorData(name="new_col_2", description="", value=np.arange(10)), } # explicit index with mismatching name cols["weirdname_index"] = VectorIndexMixin(value=np.arange(10), target=cols["new_col_1"]) @@ -171,9 +171,9 @@ def test_dynamictable_mixin_colnames_ordered(): cols = { "existing_col": np.arange(10), - "new_col_1": hdmf.VectorData(value=np.arange(10)), - "new_col_2": hdmf.VectorData(value=np.arange(10)), - "new_col_3": hdmf.VectorData(value=np.arange(10)), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(10)), + "new_col_2": hdmf.VectorData(name="new_col_2", description="", value=np.arange(10)), + "new_col_3": hdmf.VectorData(name="new_col_2", description="", value=np.arange(10)), } order = ["new_col_2", "existing_col", "new_col_1", "new_col_3"] @@ -198,7 +198,7 @@ def test_dynamictable_mixin_getattr(): class MyDT(DynamicTableMixin): existing_col: hdmf.VectorData[NDArray[Shape["* col"], int]] - col = hdmf.VectorData(value=np.arange(10)) + col = hdmf.VectorData(name="existing_col", description="", value=np.arange(10)) inst = MyDT(existing_col=col) # regular lookup for attrs that exist @@ -257,13 +257,17 @@ def test_dynamictable_resolve_index(): cols = { "existing_col": np.arange(10), - "new_col_1": hdmf.VectorData(value=np.arange(10)), - "new_col_2": hdmf.VectorData(value=np.arange(10)), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(10)), + "new_col_2": hdmf.VectorData(name="new_col_2", description="", value=np.arange(10)), } # explicit index with mismatching name - cols["weirdname_index"] = hdmf.VectorIndex(value=np.arange(10), target=cols["new_col_1"]) + cols["weirdname_index"] = hdmf.VectorIndex( + name="weirdname_index", description="", value=np.arange(10), target=cols["new_col_1"] + ) # implicit index with matching name - cols["new_col_2_index"] = hdmf.VectorIndex(value=np.arange(10)) + cols["new_col_2_index"] = hdmf.VectorIndex( + name="new_col_2_index", description="", value=np.arange(10) + ) inst = MyDT(**cols) assert inst.weirdname_index.target is inst.new_col_1 @@ -282,14 +286,14 @@ def test_dynamictable_assert_equal_length(): cols = { "existing_col": np.arange(10), - "new_col_1": hdmf.VectorData(value=np.arange(11)), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(11)), } with pytest.raises(ValidationError, match="columns are not of equal length"): _ = MyDT(**cols) cols = { "existing_col": np.arange(11), - "new_col_1": hdmf.VectorData(value=np.arange(10)), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(10)), } with pytest.raises(ValidationError, match="columns are not of equal length"): _ = MyDT(**cols) @@ -297,16 +301,20 @@ def test_dynamictable_assert_equal_length(): # wrong lengths are fine as long as the index is good cols = { "existing_col": np.arange(10), - "new_col_1": hdmf.VectorData(value=np.arange(100)), - "new_col_1_index": hdmf.VectorIndex(value=np.arange(0, 100, 10) + 10), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(100)), + "new_col_1_index": hdmf.VectorIndex( + name="new_col_1_index", description="", value=np.arange(0, 100, 10) + 10 + ), } _ = MyDT(**cols) # but not fine if the index is not good cols = { "existing_col": np.arange(10), - "new_col_1": hdmf.VectorData(value=np.arange(100)), - "new_col_1_index": hdmf.VectorIndex(value=np.arange(0, 100, 5) + 5), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(100)), + "new_col_1_index": hdmf.VectorIndex( + name="new_col_1_index", description="", value=np.arange(0, 100, 5) + 5 + ), } with pytest.raises(ValidationError, match="columns are not of equal length"): _ = MyDT(**cols) @@ -321,8 +329,8 @@ def test_dynamictable_setattr(): existing_col: hdmf.VectorData[NDArray[Shape["* col"], int]] cols = { - "existing_col": hdmf.VectorData(value=np.arange(10)), - "new_col_1": hdmf.VectorData(value=np.arange(10)), + "existing_col": hdmf.VectorData(name="existing_col", description="", value=np.arange(10)), + "new_col_1": hdmf.VectorData(name="new_col_1", description="", value=np.arange(10)), } inst = MyDT(existing_col=cols["existing_col"]) assert inst.colnames == ["existing_col"] @@ -335,7 +343,7 @@ def test_dynamictable_setattr(): # model validators should be called to ensure equal length with pytest.raises(ValidationError): - inst.new_col_2 = hdmf.VectorData(value=np.arange(11)) + inst.new_col_2 = hdmf.VectorData(name="new_col_2", description="", value=np.arange(11)) def test_vectordata_indexing(): @@ -346,7 +354,7 @@ def test_vectordata_indexing(): value_array, index_array = _ragged_array(n_rows) value_array = np.concatenate(value_array) - data = hdmf.VectorData(value=value_array) + data = hdmf.VectorData(name="data", description="", value=value_array) # before we have an index, things should work as normal, indexing a 1D array assert data[0] == 0 @@ -356,7 +364,7 @@ def test_vectordata_indexing(): data[0] = 0 # indexes by themselves are the same - index_notarget = hdmf.VectorIndex(value=index_array) + index_notarget = hdmf.VectorIndex(name="no_target_index", description="", value=index_array) assert index_notarget[0] == index_array[0] assert all(index_notarget[0:3] == index_array[0:3]) oldval = index_array[0] @@ -364,7 +372,7 @@ def test_vectordata_indexing(): assert index_notarget[0] == 5 index_notarget[0] = oldval - index = hdmf.VectorIndex(value=index_array, target=data) + index = hdmf.VectorIndex(name="data_index", description="", value=index_array, target=data) data._index = index # after an index, both objects should index raggedly @@ -396,8 +404,10 @@ def test_vectordata_getattr(): """ VectorData and VectorIndex both forward getattr to ``value`` """ - data = hdmf.VectorData(value=np.arange(100)) - index = hdmf.VectorIndex(value=np.arange(10, 101, 10), target=data) + data = hdmf.VectorData(name="data", description="", value=np.arange(100)) + index = hdmf.VectorIndex( + name="data_index", description="", value=np.arange(10, 101, 10), target=data + ) # get attrs that we defined on the models # i.e. no attribute errors here @@ -447,7 +457,9 @@ def test_dynamictable_region_indexing(basic_table): index = np.array([9, 4, 8, 3, 7, 2, 6, 1, 5, 0]) - table_region = hdmf.DynamicTableRegion(value=index, table=inst) + table_region = hdmf.DynamicTableRegion( + name="table_region", description="", value=index, table=inst + ) row = table_region[1] assert all(row.iloc[0] == index[1]) @@ -499,10 +511,14 @@ def test_dynamictable_region_ragged(): timeseries_index=spike_idx, ) region = hdmf.DynamicTableRegion( + name="region", + description="a table region what else would it be", table=table, value=value, ) - index = hdmf.VectorIndex(name="index", description="hgggggggjjjj", target=region, value=idx) + index = hdmf.VectorIndex( + name="region_index", description="hgggggggjjjj", target=region, value=idx + ) region._index = index rows = region[1] @@ -594,8 +610,8 @@ def test_mixed_aligned_dynamictable(aligned_table): value_array, index_array = _ragged_array(10) value_array = np.concatenate(value_array) - data = hdmf.VectorData(value=value_array) - index = hdmf.VectorIndex(value=index_array) + data = hdmf.VectorData(name="data", description="", value=value_array) + index = hdmf.VectorIndex(name="data_index", description="", value=index_array) atable = AlignedTable(**cols, extra_col=data, extra_col_index=index) atable[0]