nwb-linkml/nwb_linkml/tests/test_includes/test_hdmf.py

import numpy as np
import pandas as pd
from numpydantic import NDArray, Shape

from nwb_linkml.includes import hdmf
from nwb_linkml.includes.hdmf import DynamicTableMixin, VectorDataMixin, VectorIndexMixin

# FIXME: Make this just be the output of the provider by patching into import machinery
from nwb_linkml.models.pydantic.core.v2_7_0.namespace import (
    DynamicTable,
    DynamicTableRegion,
    ElectrodeGroup,
    VectorIndex,
    VoltageClampStimulusSeries,
)

from .conftest import _ragged_array


def test_dynamictable_indexing(electrical_series):
    """
    Can index values from a dynamictable
    """
    series, electrodes = electrical_series

    colnames = [
        "id",
        "x",
        "y",
        "group",
        "group_name",
        "location",
        "extra_column",
    ]
    dtypes = [
        np.dtype("int64"),
        np.dtype("float64"),
        np.dtype("float64"),
    ] + ([np.dtype("O")] * 4)

    row = electrodes[0]
    # successfully get a single row :)
    assert row.shape == (1, 7)
    assert row.dtypes.values.tolist() == dtypes
    assert row.columns.tolist() == colnames

    # slice a range of rows
    rows = electrodes[0:3]
    assert rows.shape == (3, 7)
    assert rows.dtypes.values.tolist() == dtypes
    assert rows.columns.tolist() == colnames

    # get a single column
    col = electrodes["y"]
    assert all(col.value == [5, 6, 7, 8, 9])

    # get a single cell
    val = electrodes[0, "y"]
    assert val == 5
    val = electrodes[0, 2]
    assert val == 5

    # get a slice of rows and columns
    subsection = electrodes[0:3, 0:3]
    assert subsection.shape == (3, 3)
    assert subsection.columns.tolist() == colnames[0:3]
    assert subsection.dtypes.values.tolist() == dtypes[0:3]


def test_dynamictable_ragged(units):
    """
    Should be able to index ragged arrays using an implicit _index column

    Also tests:
    - passing arrays directly instead of wrapping in vectordata/index specifically,
      if the models in the fixture instantiate then this works
    """
    units, spike_times, spike_idx = units

    # ensure we don't pivot to long when indexing
    assert units[0].shape[0] == 1
    # check that we got the indexing boundaries corrunect
    # (and that we are forwarding attr calls to the dataframe by accessing shape
    for i in range(units.shape[0]):
        assert np.all(units.iloc[i, 0] == spike_times[i])


def test_dynamictable_region_basic(electrical_series):
    """
    DynamicTableRegion should be able to refer to a row or rows of another table
    itself as a column within a table
    """
    series, electrodes = electrical_series
    row = series.electrodes[0]
    # check that we correctly got the 4th row instead of the 0th row,
    # since the indexed table was constructed with inverted indexes because it's a test, ya dummy.
    # we will only vaguely check the basic functionality here bc
    # a) the indexing behavior of the indexed objects is tested above, and
    # b) every other object in the chain is strictly validated,
    # so we assume if we got a right shaped df that it is the correct one.
    # feel free to @ me when i am wrong about this
    assert all(row.id == 4)
    assert row.shape == (1, 7)
    # and we should still be preserving the model that is the contents of the cell of this row
    # so this is a dataframe row with a column "group" that contains an array of ElectrodeGroup
    # objects and that's as far as we are going to chase the recursion in this basic indexing test
    # ElectrodeGroup is strictly validating so an instance check is all we need.
    assert isinstance(row.group.values[0], ElectrodeGroup)

    # getting a list of table rows is actually correct behavior here because
    # this list of table rows is actually the cell of another table
    rows = series.electrodes[0:3]
    assert all([all(row.id == idx) for row, idx in zip(rows, [4, 3, 2])])


def test_dynamictable_region_ragged():
    """
    Dynamictables can also have indexes so that they are ragged arrays of column rows
    """
    spike_times, spike_idx = _ragged_array(24)
    spike_times_flat = np.concatenate(spike_times)

    # construct a secondary index that selects overlapping segments of the first table
    value = np.array([0, 1, 2, 1, 2, 3, 2, 3, 4])
    idx = np.array([3, 6, 9])

    table = DynamicTable(
        name="table",
        description="a table what else would it be",
        id=np.arange(len(spike_idx)),
        timeseries=spike_times_flat,
        timeseries_index=spike_idx,
    )
    region = DynamicTableRegion(
        name="dynamictableregion",
        description="this field should be optional",
        table=table,
        value=value,
    )
    index = VectorIndex(name="index", description="hgggggggjjjj", target=region, value=idx)
    region._index = index
    rows = region[1]
    # i guess this is right?
    # the region should be a set of three rows of the table, with a ragged array column timeseries
    # like...
    #
    #    id                                         timeseries
    # 0   1  [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...
    # 1   2  [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ...
    # 2   3  [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, ...
    assert rows.shape == (3, 2)
    assert all(rows.id == [1, 2, 3])
    assert all([all(row[1].timeseries == i) for i, row in zip([1, 2, 3], rows.iterrows())])


def test_dynamictable_append_column():
    pass


def test_dynamictable_append_row():
    pass


def test_dynamictable_extra_coercion():
    """
    Extra fields should be coerced to VectorData and have their
    indexing relationships handled when passed as plain arrays.
    """


def test_aligned_dynamictable(intracellular_recordings_table):
    """
    Multiple aligned dynamictables should be indexable with a multiindex
    """
    # can get a single row.. (check correctness below)
    row = intracellular_recordings_table[0]
    # can get a single table with its name
    stimuli = intracellular_recordings_table["stimuli"]
    assert stimuli.shape == (10, 1)

    # nab a few rows to make the dataframe
    rows = intracellular_recordings_table[0:3]
    assert all(
        rows.columns
        == pd.MultiIndex.from_tuples(
            [
                ("electrodes", "index"),
                ("electrodes", "electrode"),
                ("stimuli", "index"),
                ("stimuli", "stimulus"),
                ("responses", "index"),
                ("responses", "response"),
            ]
        )
    )

    # ensure that we get the actual values from the TimeSeriesReferenceVectorData
    # also tested separately
    # each individual cell should be an array of VoltageClampStimulusSeries...
    # and then we should be able to index within that as well
    stims = rows["stimuli", "stimulus"][0]
    for i in range(len(stims)):
        assert isinstance(stims[i], VoltageClampStimulusSeries)
        assert all([i == val for val in stims[i][:]])


# --------------------------------------------------
# Direct mixin tests
# --------------------------------------------------


def test_dynamictable_mixin_indexing():
    """
    This is just a placeholder test to say that indexing is tested above
    with actual model objects in case i ever ctrl+f for this
    """
    pass


def test_dynamictable_mixin_colnames():
    """
    Should correctly infer colnames
    """

    class MyDT(DynamicTableMixin):
        existing_col: NDArray[Shape["* col"], int]

    new_col_1 = VectorDataMixin(value=np.arange(10))
    new_col_2 = VectorDataMixin(value=np.arange(10))

    inst = MyDT(existing_col=np.arange(10), new_col_1=new_col_1, new_col_2=new_col_2)
    assert inst.colnames == ["existing_col", "new_col_1", "new_col_2"]


def test_dynamictable_mixin_colnames_index():
    """
    Exclude index columns in colnames
    """

    class MyDT(DynamicTableMixin):
        existing_col: NDArray[Shape["* col"], int]

    cols = {
        "existing_col": np.arange(10),
        "new_col_1": hdmf.VectorData(value=np.arange(10)),
        "new_col_2": hdmf.VectorData(value=np.arange(10)),
    }
    # explicit index with mismatching name
    cols["weirdname_index"] = VectorIndexMixin(value=np.arange(10), target=cols["new_col_1"])
    # implicit index with matching name
    cols["new_col_2_index"] = VectorIndexMixin(value=np.arange(10))

    inst = MyDT(**cols)
    assert inst.colnames == ["existing_col", "new_col_1", "new_col_2"]


def test_dynamictable_mixin_colnames_ordered():
    """
    Should be able to pass explicit order to colnames
    """

    class MyDT(DynamicTableMixin):
        existing_col: NDArray[Shape["* col"], int]

    cols = {
        "existing_col": np.arange(10),
        "new_col_1": hdmf.VectorData(value=np.arange(10)),
        "new_col_2": hdmf.VectorData(value=np.arange(10)),
        "new_col_3": hdmf.VectorData(value=np.arange(10)),
    }
    order = ["new_col_2", "existing_col", "new_col_1", "new_col_3"]

    inst = MyDT(**cols, colnames=order)
    assert inst.colnames == order

    # this should get reflected in the columns selector and the df produces
    assert all([key1 == key2 for key1, key2 in zip(order, inst._columns)])
    assert all(inst[0].columns == order)

    # partial lists should append unnamed columsn at the end
    partial_order = ["new_col_3", "new_col_2"]
    inst = MyDT(**cols, colnames=partial_order)
    assert inst.colnames == [*partial_order, "existing_col", "new_col_1"]


def test_dynamictable_mixin_getattr():
    """
    Dynamictable should forward unknown getattr requests to the df
    """

    class MyDT(DynamicTableMixin):
        existing_col: NDArray[Shape["* col"], int]

    class AModel(DynamicTableMixin):
        col: hdmf.VectorData[NDArray[Shape["3, 3"], int]]

    col = hdmf.VectorData(value=np.arange(10))
    inst = MyDT(existing_col=col)
    # regular lookup for attrs that exist

    # pdb.set_trace()
    # inst.existing_col
    # assert inst.existing_col == col
    # df lookup otherwise
    # inst.columns
wtf and a third commit, i gotta figure out what thats about 2024-07-29 23:22:29 +00:00			`import numpy as np`
working aligned dynamic table and TimeSeriesReferenceVectorData 2024-08-13 05:57:00 +00:00			`import pandas as pd`
Make VectorData and VectorIndex generics to ensure coercion to VectorData for declared columns 2024-08-14 04:25:56 +00:00			`from numpydantic import NDArray, Shape`

			`from nwb_linkml.includes import hdmf`
			`from nwb_linkml.includes.hdmf import DynamicTableMixin, VectorDataMixin, VectorIndexMixin`
lint 2024-07-29 23:28:48 +00:00
add dynamictable, vectordata, vectorindex mixins 2024-07-31 08:13:31 +00:00			`# FIXME: Make this just be the output of the provider by patching into import machinery`
lint 2024-07-29 23:28:48 +00:00			`from nwb_linkml.models.pydantic.core.v2_7_0.namespace import (`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`DynamicTable,`
first impl of dynamictable working! 2024-08-06 03:51:52 +00:00			`DynamicTableRegion,`
add dynamictable, vectordata, vectorindex mixins 2024-07-31 08:13:31 +00:00			`ElectrodeGroup,`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`VectorIndex,`
working aligned dynamic table and TimeSeriesReferenceVectorData 2024-08-13 05:57:00 +00:00			`VoltageClampStimulusSeries,`
lint 2024-07-29 23:28:48 +00:00			`)`
lint 2024-08-13 05:59:15 +00:00
add logging. less janky adapter instantiation using model validators. correctly propagate properties from ancestor classes when building 2024-08-13 01:48:59 +00:00			`from .conftest import _ragged_array`
working ragged array indexing before rebuilding models 2024-08-07 02:44:04 +00:00

first impl of dynamictable working! 2024-08-06 03:51:52 +00:00			`def test_dynamictable_indexing(electrical_series):`
			`"""`
			`Can index values from a dynamictable`
			`"""`
			`series, electrodes = electrical_series`

			`colnames = [`
			`"id",`
			`"x",`
			`"y",`
			`"group",`
			`"group_name",`
			`"location",`
			`"extra_column",`
			`]`
			`dtypes = [`
			`np.dtype("int64"),`
			`np.dtype("float64"),`
			`np.dtype("float64"),`
			`] + ([np.dtype("O")] * 4)`

			`row = electrodes[0]`
			`# successfully get a single row :)`
			`assert row.shape == (1, 7)`
			`assert row.dtypes.values.tolist() == dtypes`
			`assert row.columns.tolist() == colnames`

			`# slice a range of rows`
			`rows = electrodes[0:3]`
			`assert rows.shape == (3, 7)`
			`assert rows.dtypes.values.tolist() == dtypes`
			`assert rows.columns.tolist() == colnames`

			`# get a single column`
			`col = electrodes["y"]`
Make VectorData and VectorIndex generics to ensure coercion to VectorData for declared columns 2024-08-14 04:25:56 +00:00			`assert all(col.value == [5, 6, 7, 8, 9])`
first impl of dynamictable working! 2024-08-06 03:51:52 +00:00
			`# get a single cell`
			`val = electrodes[0, "y"]`
			`assert val == 5`
			`val = electrodes[0, 2]`
			`assert val == 5`

			`# get a slice of rows and columns`
			`subsection = electrodes[0:3, 0:3]`
			`assert subsection.shape == (3, 3)`
			`assert subsection.columns.tolist() == colnames[0:3]`
			`assert subsection.dtypes.values.tolist() == dtypes[0:3]`


actually fix indexing 2024-08-08 02:22:29 +00:00			`def test_dynamictable_ragged(units):`
			`"""`
			`Should be able to index ragged arrays using an implicit _index column`

			`Also tests:`
			`- passing arrays directly instead of wrapping in vectordata/index specifically,`
			`if the models in the fixture instantiate then this works`
			`"""`
			`units, spike_times, spike_idx = units`

			`# ensure we don't pivot to long when indexing`
			`assert units[0].shape[0] == 1`
			`# check that we got the indexing boundaries corrunect`
			`# (and that we are forwarding attr calls to the dataframe by accessing shape`
			`for i in range(units.shape[0]):`
			`assert np.all(units.iloc[i, 0] == spike_times[i])`


my god it works but what have i done 2024-08-07 09:03:04 +00:00			`def test_dynamictable_region_basic(electrical_series):`
			`"""`
			`DynamicTableRegion should be able to refer to a row or rows of another table`
			`itself as a column within a table`
continue removing nptyping, actually fix indexing 2024-08-07 04:40:23 +00:00			`"""`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`series, electrodes = electrical_series`
			`row = series.electrodes[0]`
			`# check that we correctly got the 4th row instead of the 0th row,`
			`# since the indexed table was constructed with inverted indexes because it's a test, ya dummy.`
			`# we will only vaguely check the basic functionality here bc`
			`# a) the indexing behavior of the indexed objects is tested above, and`
			`# b) every other object in the chain is strictly validated,`
			`# so we assume if we got a right shaped df that it is the correct one.`
			`# feel free to @ me when i am wrong about this`
correct test for equality for series 2024-08-08 01:56:01 +00:00			`assert all(row.id == 4)`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`assert row.shape == (1, 7)`
			`# and we should still be preserving the model that is the contents of the cell of this row`
			`# so this is a dataframe row with a column "group" that contains an array of ElectrodeGroup`
			`# objects and that's as far as we are going to chase the recursion in this basic indexing test`
			`# ElectrodeGroup is strictly validating so an instance check is all we need.`
			`assert isinstance(row.group.values[0], ElectrodeGroup)`

			`# getting a list of table rows is actually correct behavior here because`
			`# this list of table rows is actually the cell of another table`
			`rows = series.electrodes[0:3]`
actually fix indexing 2024-08-08 02:22:29 +00:00			`assert all([all(row.id == idx) for row, idx in zip(rows, [4, 3, 2])])`
continue removing nptyping, actually fix indexing 2024-08-07 04:40:23 +00:00

my god it works but what have i done 2024-08-07 09:03:04 +00:00			`def test_dynamictable_region_ragged():`
continue removing nptyping, actually fix indexing 2024-08-07 04:40:23 +00:00			`"""`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`Dynamictables can also have indexes so that they are ragged arrays of column rows`
			`"""`
			`spike_times, spike_idx = _ragged_array(24)`
			`spike_times_flat = np.concatenate(spike_times)`
continue removing nptyping, actually fix indexing 2024-08-07 04:40:23 +00:00
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`# construct a secondary index that selects overlapping segments of the first table`
			`value = np.array([0, 1, 2, 1, 2, 3, 2, 3, 4])`
			`idx = np.array([3, 6, 9])`
continue removing nptyping, actually fix indexing 2024-08-07 04:40:23 +00:00
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`table = DynamicTable(`
			`name="table",`
			`description="a table what else would it be",`
			`id=np.arange(len(spike_idx)),`
coercion for extra columns passed as arrays 2024-08-08 03:23:18 +00:00			`timeseries=spike_times_flat,`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`timeseries_index=spike_idx,`
			`)`
			`region = DynamicTableRegion(`
			`name="dynamictableregion",`
			`description="this field should be optional",`
			`table=table,`
			`value=value,`
			`)`
			`index = VectorIndex(name="index", description="hgggggggjjjj", target=region, value=idx)`
			`region._index = index`
			`rows = region[1]`
			`# i guess this is right?`
			`# the region should be a set of three rows of the table, with a ragged array column timeseries`
			`# like...`
			`#`
			`# id timeseries`
			`# 0 1 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...`
			`# 1 2 [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ...`
			`# 2 3 [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, ...`
oop 2024-08-07 09:28:06 +00:00			`assert rows.shape == (3, 2)`
my god it works but what have i done 2024-08-07 09:03:04 +00:00			`assert all(rows.id == [1, 2, 3])`
			`assert all([all(row[1].timeseries == i) for i, row in zip([1, 2, 3], rows.iterrows())])`


first impl of dynamictable working! 2024-08-06 03:51:52 +00:00			`def test_dynamictable_append_column():`
			`pass`


			`def test_dynamictable_append_row():`
			`pass`
coercion for extra columns passed as arrays 2024-08-08 03:23:18 +00:00

			`def test_dynamictable_extra_coercion():`
			`"""`
			`Extra fields should be coerced to VectorData and have their`
			`indexing relationships handled when passed as plain arrays.`
			`"""`
working aligned dynamic table and TimeSeriesReferenceVectorData 2024-08-13 05:57:00 +00:00

			`def test_aligned_dynamictable(intracellular_recordings_table):`
			`"""`
			`Multiple aligned dynamictables should be indexable with a multiindex`
			`"""`
			`# can get a single row.. (check correctness below)`
			`row = intracellular_recordings_table[0]`
			`# can get a single table with its name`
			`stimuli = intracellular_recordings_table["stimuli"]`
			`assert stimuli.shape == (10, 1)`

			`# nab a few rows to make the dataframe`
			`rows = intracellular_recordings_table[0:3]`
			`assert all(`
			`rows.columns`
			`== pd.MultiIndex.from_tuples(`
			`[`
			`("electrodes", "index"),`
			`("electrodes", "electrode"),`
			`("stimuli", "index"),`
			`("stimuli", "stimulus"),`
			`("responses", "index"),`
			`("responses", "response"),`
			`]`
			`)`
			`)`

			`# ensure that we get the actual values from the TimeSeriesReferenceVectorData`
			`# also tested separately`
			`# each individual cell should be an array of VoltageClampStimulusSeries...`
			`# and then we should be able to index within that as well`
			`stims = rows["stimuli", "stimulus"][0]`
			`for i in range(len(stims)):`
			`assert isinstance(stims[i], VoltageClampStimulusSeries)`
			`assert all([i == val for val in stims[i][:]])`
Make VectorData and VectorIndex generics to ensure coercion to VectorData for declared columns 2024-08-14 04:25:56 +00:00

			`# --------------------------------------------------`
			`# Direct mixin tests`
			`# --------------------------------------------------`


			`def test_dynamictable_mixin_indexing():`
			`"""`
			`This is just a placeholder test to say that indexing is tested above`
			`with actual model objects in case i ever ctrl+f for this`
			`"""`
			`pass`


			`def test_dynamictable_mixin_colnames():`
			`"""`
			`Should correctly infer colnames`
			`"""`

			`class MyDT(DynamicTableMixin):`
			`existing_col: NDArray[Shape["* col"], int]`

			`new_col_1 = VectorDataMixin(value=np.arange(10))`
			`new_col_2 = VectorDataMixin(value=np.arange(10))`

			`inst = MyDT(existing_col=np.arange(10), new_col_1=new_col_1, new_col_2=new_col_2)`
			`assert inst.colnames == ["existing_col", "new_col_1", "new_col_2"]`


			`def test_dynamictable_mixin_colnames_index():`
			`"""`
			`Exclude index columns in colnames`
			`"""`

			`class MyDT(DynamicTableMixin):`
			`existing_col: NDArray[Shape["* col"], int]`

			`cols = {`
			`"existing_col": np.arange(10),`
			`"new_col_1": hdmf.VectorData(value=np.arange(10)),`
			`"new_col_2": hdmf.VectorData(value=np.arange(10)),`
			`}`
			`# explicit index with mismatching name`
			`cols["weirdname_index"] = VectorIndexMixin(value=np.arange(10), target=cols["new_col_1"])`
			`# implicit index with matching name`
			`cols["new_col_2_index"] = VectorIndexMixin(value=np.arange(10))`

			`inst = MyDT(**cols)`
			`assert inst.colnames == ["existing_col", "new_col_1", "new_col_2"]`


			`def test_dynamictable_mixin_colnames_ordered():`
			`"""`
			`Should be able to pass explicit order to colnames`
			`"""`

			`class MyDT(DynamicTableMixin):`
			`existing_col: NDArray[Shape["* col"], int]`

			`cols = {`
			`"existing_col": np.arange(10),`
			`"new_col_1": hdmf.VectorData(value=np.arange(10)),`
			`"new_col_2": hdmf.VectorData(value=np.arange(10)),`
			`"new_col_3": hdmf.VectorData(value=np.arange(10)),`
			`}`
			`order = ["new_col_2", "existing_col", "new_col_1", "new_col_3"]`

			`inst = MyDT(**cols, colnames=order)`
			`assert inst.colnames == order`

			`# this should get reflected in the columns selector and the df produces`
			`assert all([key1 == key2 for key1, key2 in zip(order, inst._columns)])`
			`assert all(inst[0].columns == order)`

			`# partial lists should append unnamed columsn at the end`
			`partial_order = ["new_col_3", "new_col_2"]`
			`inst = MyDT(**cols, colnames=partial_order)`
			`assert inst.colnames == [*partial_order, "existing_col", "new_col_1"]`


			`def test_dynamictable_mixin_getattr():`
			`"""`
			`Dynamictable should forward unknown getattr requests to the df`
			`"""`

			`class MyDT(DynamicTableMixin):`
			`existing_col: NDArray[Shape["* col"], int]`

			`class AModel(DynamicTableMixin):`
			`col: hdmf.VectorData[NDArray[Shape["3, 3"], int]]`

			`col = hdmf.VectorData(value=np.arange(10))`
			`inst = MyDT(existing_col=col)`
			`# regular lookup for attrs that exist`

			`# pdb.set_trace()`
			`# inst.existing_col`
			`# assert inst.existing_col == col`
			`# df lookup otherwise`
			`# inst.columns`