mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2024-11-14 02:34:28 +00:00
sneakers-the-rat
50e816bad4
Need to make a proxytable model like proxyarray because reading all these tables takes way too fuckin long and it's not what we want to do anyway.
92 lines
3.5 KiB
Python
92 lines
3.5 KiB
Python
import pdb
|
|
import h5py
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
import numpy as np
|
|
|
|
from ..fixtures import tmp_output_dir, set_config_vars
|
|
|
|
from nwb_linkml.io.hdf5 import HDF5IO
|
|
from nwb_linkml.io.hdf5 import truncate_file
|
|
|
|
@pytest.mark.skip()
|
|
def test_hdf_read():
|
|
NWBFILE = Path('/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb')
|
|
if not NWBFILE.exists():
|
|
return
|
|
io = HDF5IO(path=NWBFILE)
|
|
model = io.read()
|
|
|
|
pdb.set_trace()
|
|
|
|
@pytest.mark.skip()
|
|
def test_truncate_file(tmp_output_dir):
|
|
source = tmp_output_dir / 'truncate_source.hdf5'
|
|
|
|
# create a dang ol hdf5 file with a big dataset and some softlinks and make sure
|
|
# we truncate the dataset and preserve softlink
|
|
|
|
h5f = h5py.File(str(source), 'w')
|
|
data_group = h5f.create_group('data')
|
|
dataset_contig = h5f.create_dataset(
|
|
'/data/dataset_contig',
|
|
data=np.zeros((1000,30,40), dtype=np.float64),
|
|
compression = "gzip",
|
|
compression_opts = 9
|
|
)
|
|
dataset_chunked = h5f.create_dataset(
|
|
'/data/dataset_chunked',
|
|
data=np.zeros((1000, 40, 50), dtype=np.float64),
|
|
compression="gzip",
|
|
compression_opts=9,
|
|
chunks=True
|
|
)
|
|
dataset_contig.attrs['reference_other'] = dataset_chunked.ref
|
|
dataset_chunked.attrs['reference_other'] = dataset_contig.ref
|
|
dataset_contig.attrs['anattr'] = 1
|
|
|
|
link_group = h5f.create_group('link/child')
|
|
link_group.attrs['reference_contig'] = dataset_contig.ref
|
|
link_group.attrs['reference_chunked'] = dataset_chunked.ref
|
|
h5f.flush()
|
|
h5f.close()
|
|
|
|
source_size = source.stat().st_size
|
|
|
|
# do it without providing target to check that we make filename correctly
|
|
n = 10
|
|
target_output = truncate_file(source, n=n)
|
|
assert target_output == source.parent / (source.stem + '_truncated.hdf5')
|
|
# check that we actually made it smaller
|
|
target_size = target_output.stat().st_size
|
|
# empirically, the source dataset is ~125KB and truncated is ~17KB
|
|
assert target_size < source_size / 5
|
|
|
|
# then check that we have what's expected in the file
|
|
target_h5f = h5py.File(target_output, 'r')
|
|
|
|
# truncation happened
|
|
assert target_h5f['data']['dataset_contig'].shape == (n, 30, 40)
|
|
assert target_h5f['data']['dataset_chunked'].shape == (n, 40, 50)
|
|
# references still work
|
|
# can't directly assess object identity equality with "is"
|
|
# so this tests if the referenced dereference and that they dereference to the right place
|
|
assert target_h5f[target_h5f['data']['dataset_contig'].attrs['reference_other']].name == target_h5f['data']['dataset_chunked'].name
|
|
assert target_h5f[target_h5f['data']['dataset_chunked'].attrs['reference_other']].name == target_h5f['data']['dataset_contig'].name
|
|
assert target_h5f[target_h5f['link']['child'].attrs['reference_contig']].name == target_h5f['data']['dataset_contig'].name
|
|
assert target_h5f[target_h5f['link']['child'].attrs['reference_chunked']].name == target_h5f['data']['dataset_chunked'].name
|
|
assert target_h5f['data']['dataset_contig'].attrs['anattr'] == 1
|
|
|
|
@pytest.mark.skip()
|
|
def test_flatten_hdf():
|
|
from nwb_linkml.io.hdf5 import HDF5IO
|
|
from nwb_linkml.maps.hdf5 import flatten_hdf
|
|
path = '/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb'
|
|
import h5py
|
|
h5f = h5py.File(path)
|
|
flat = flatten_hdf(h5f)
|
|
assert not any(['specifications' in v.path for v in flat.values()])
|
|
pdb.set_trace()
|
|
raise NotImplementedError('Just a stub for local testing for now, finish me!')
|
|
|