2023-09-14 09:45:01 +00:00
|
|
|
import pdb
|
2023-09-22 07:31:34 +00:00
|
|
|
import h5py
|
2023-09-14 09:45:01 +00:00
|
|
|
|
2023-09-06 07:50:49 +00:00
|
|
|
import pytest
|
|
|
|
from pathlib import Path
|
2023-09-22 07:31:34 +00:00
|
|
|
import numpy as np
|
2023-09-06 07:50:49 +00:00
|
|
|
|
2023-10-12 02:24:08 +00:00
|
|
|
from ..fixtures import tmp_output_dir, data_dir
|
2023-09-14 09:45:01 +00:00
|
|
|
|
2023-09-06 07:50:49 +00:00
|
|
|
from nwb_linkml.io.hdf5 import HDF5IO
|
2023-09-22 07:31:34 +00:00
|
|
|
from nwb_linkml.io.hdf5 import truncate_file
|
2023-09-26 05:03:53 +00:00
|
|
|
|
2023-10-12 05:30:26 +00:00
|
|
|
|
2023-10-06 05:12:27 +00:00
|
|
|
@pytest.mark.parametrize('dset', ['aibs.nwb'])
|
|
|
|
def test_hdf_read(data_dir, dset):
|
|
|
|
NWBFILE = data_dir / dset
|
2023-09-06 07:50:49 +00:00
|
|
|
io = HDF5IO(path=NWBFILE)
|
2023-10-06 05:12:27 +00:00
|
|
|
# the test for now is just whether we can read it lol
|
2023-09-26 05:03:53 +00:00
|
|
|
model = io.read()
|
2023-09-14 09:45:01 +00:00
|
|
|
|
2023-09-22 07:31:34 +00:00
|
|
|
def test_truncate_file(tmp_output_dir):
|
|
|
|
source = tmp_output_dir / 'truncate_source.hdf5'
|
|
|
|
|
|
|
|
# create a dang ol hdf5 file with a big dataset and some softlinks and make sure
|
|
|
|
# we truncate the dataset and preserve softlink
|
|
|
|
|
|
|
|
h5f = h5py.File(str(source), 'w')
|
|
|
|
data_group = h5f.create_group('data')
|
|
|
|
dataset_contig = h5f.create_dataset(
|
|
|
|
'/data/dataset_contig',
|
|
|
|
data=np.zeros((1000,30,40), dtype=np.float64),
|
|
|
|
compression = "gzip",
|
|
|
|
compression_opts = 9
|
|
|
|
)
|
|
|
|
dataset_chunked = h5f.create_dataset(
|
|
|
|
'/data/dataset_chunked',
|
|
|
|
data=np.zeros((1000, 40, 50), dtype=np.float64),
|
|
|
|
compression="gzip",
|
|
|
|
compression_opts=9,
|
|
|
|
chunks=True
|
|
|
|
)
|
|
|
|
dataset_contig.attrs['reference_other'] = dataset_chunked.ref
|
|
|
|
dataset_chunked.attrs['reference_other'] = dataset_contig.ref
|
|
|
|
dataset_contig.attrs['anattr'] = 1
|
|
|
|
|
|
|
|
link_group = h5f.create_group('link/child')
|
|
|
|
link_group.attrs['reference_contig'] = dataset_contig.ref
|
|
|
|
link_group.attrs['reference_chunked'] = dataset_chunked.ref
|
|
|
|
h5f.flush()
|
|
|
|
h5f.close()
|
|
|
|
|
|
|
|
source_size = source.stat().st_size
|
|
|
|
|
|
|
|
# do it without providing target to check that we make filename correctly
|
|
|
|
n = 10
|
|
|
|
target_output = truncate_file(source, n=n)
|
|
|
|
assert target_output == source.parent / (source.stem + '_truncated.hdf5')
|
|
|
|
# check that we actually made it smaller
|
|
|
|
target_size = target_output.stat().st_size
|
|
|
|
# empirically, the source dataset is ~125KB and truncated is ~17KB
|
|
|
|
assert target_size < source_size / 5
|
|
|
|
|
|
|
|
# then check that we have what's expected in the file
|
|
|
|
target_h5f = h5py.File(target_output, 'r')
|
|
|
|
|
|
|
|
# truncation happened
|
|
|
|
assert target_h5f['data']['dataset_contig'].shape == (n, 30, 40)
|
|
|
|
assert target_h5f['data']['dataset_chunked'].shape == (n, 40, 50)
|
|
|
|
# references still work
|
|
|
|
# can't directly assess object identity equality with "is"
|
|
|
|
# so this tests if the referenced dereference and that they dereference to the right place
|
|
|
|
assert target_h5f[target_h5f['data']['dataset_contig'].attrs['reference_other']].name == target_h5f['data']['dataset_chunked'].name
|
|
|
|
assert target_h5f[target_h5f['data']['dataset_chunked'].attrs['reference_other']].name == target_h5f['data']['dataset_contig'].name
|
|
|
|
assert target_h5f[target_h5f['link']['child'].attrs['reference_contig']].name == target_h5f['data']['dataset_contig'].name
|
|
|
|
assert target_h5f[target_h5f['link']['child'].attrs['reference_chunked']].name == target_h5f['data']['dataset_chunked'].name
|
|
|
|
assert target_h5f['data']['dataset_contig'].attrs['anattr'] == 1
|
2023-09-26 05:03:53 +00:00
|
|
|
|
2023-09-22 07:31:34 +00:00
|
|
|
@pytest.mark.skip()
|
|
|
|
def test_flatten_hdf():
|
2023-09-26 05:03:53 +00:00
|
|
|
from nwb_linkml.io.hdf5 import HDF5IO
|
|
|
|
from nwb_linkml.maps.hdf5 import flatten_hdf
|
|
|
|
path = '/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb'
|
2023-09-22 07:31:34 +00:00
|
|
|
import h5py
|
|
|
|
h5f = h5py.File(path)
|
|
|
|
flat = flatten_hdf(h5f)
|
|
|
|
assert not any(['specifications' in v.path for v in flat.values()])
|
2023-09-22 09:48:40 +00:00
|
|
|
pdb.set_trace()
|
2023-09-22 07:31:34 +00:00
|
|
|
raise NotImplementedError('Just a stub for local testing for now, finish me!')
|
|
|
|
|