working on grpah loading of nwb files

This commit is contained in:
sneakers-the-rat 2024-08-31 01:47:42 -07:00
parent b555ccb199
commit 49585e467a
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 197 additions and 45 deletions

View file

@ -5,7 +5,7 @@
groups = ["default", "dev", "plot", "tests"] groups = ["default", "dev", "plot", "tests"]
strategy = ["inherit_metadata"] strategy = ["inherit_metadata"]
lock_version = "4.5.0" lock_version = "4.5.0"
content_hash = "sha256:aaf3c34a5f39fc7db0c5dce91a0693eb78358a255d6b0a72f2e1f988eb7e899f" content_hash = "sha256:1c297e11f6dc9e4f6b8d29df872177d2ce65bbd334c0b65aa5175dfb125c4d9f"
[[metadata.targets]] [[metadata.targets]]
requires_python = ">=3.10,<3.13" requires_python = ">=3.10,<3.13"
@ -996,7 +996,7 @@ name = "networkx"
version = "3.3" version = "3.3"
requires_python = ">=3.10" requires_python = ">=3.10"
summary = "Python package for creating and manipulating graphs and networks" summary = "Python package for creating and manipulating graphs and networks"
groups = ["dev", "tests"] groups = ["default", "dev", "tests"]
files = [ files = [
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"}, {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
{file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"}, {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},

View file

@ -25,6 +25,7 @@ dependencies = [
"numpydantic>=1.3.3", "numpydantic>=1.3.3",
"black>=24.4.2", "black>=24.4.2",
"pandas>=2.2.2", "pandas>=2.2.2",
"networkx>=3.3",
] ]
[project.urls] [project.urls]

View file

@ -22,6 +22,7 @@ Other TODO:
import json import json
import os import os
import re
import shutil import shutil
import subprocess import subprocess
import sys import sys
@ -31,11 +32,12 @@ from types import ModuleType
from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload
import h5py import h5py
import networkx as nx
import numpy as np import numpy as np
from pydantic import BaseModel from pydantic import BaseModel
from tqdm import tqdm from tqdm import tqdm
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf, get_references
if TYPE_CHECKING: if TYPE_CHECKING:
from nwb_linkml.providers.schema import SchemaProvider from nwb_linkml.providers.schema import SchemaProvider
@ -47,6 +49,85 @@ else:
from typing_extensions import Never from typing_extensions import Never
def hdf_dependency_graph(h5f: Path | h5py.File) -> nx.DiGraph:
"""
Directed dependency graph of dataset and group nodes in an NWBFile such that
each node ``n_i`` is connected to node ``n_j`` if
* ``n_j`` is ``n_i``'s child
* ``n_i`` contains a reference to ``n_j``
Resolve references in
* Attributes
* Dataset columns
* Compound dtypes
Args:
h5f (:class:`pathlib.Path` | :class:`h5py.File`): NWB file to graph
Returns:
:class:`networkx.DiGraph`
"""
# detect nodes to skip
skip_pattern = re.compile("^/specifications.*")
if isinstance(h5f, (Path, str)):
h5f = h5py.File(h5f, "r")
g = nx.DiGraph()
def _visit_item(name: str, node: h5py.Dataset | h5py.Group) -> None:
if skip_pattern.match(name):
return
# find references in attributes
refs = get_references(node)
if isinstance(node, h5py.Group):
refs.extend([child.name for child in node.values()])
refs = set(refs)
# add edges
edges = [(node.name, ref) for ref in refs]
g.add_edges_from(edges)
# ensure node added to graph
if len(edges) == 0:
g.add_node(node.name)
# store attrs in node
g.nodes[node.name].update(node.attrs)
# apply to root
_visit_item(h5f.name, h5f)
h5f.visititems(_visit_item)
return g
def filter_dependency_graph(g: nx.DiGraph) -> nx.DiGraph:
"""
Remove nodes from a dependency graph if they
* have no neurodata type AND
* have no outbound edges
OR
* are a VectorIndex (which are handled by the dynamictable mixins)
"""
remove_nodes = []
node: str
for node in g.nodes.keys():
ndtype = g.nodes[node].get("neurodata_type", None)
if ndtype == "VectorData":
remove_nodes.append(node)
elif not ndtype and g.out_degree(node) == 0:
remove_nodes.append(node)
g.remove_nodes_from(remove_nodes)
return g
class HDF5IO: class HDF5IO:
""" """
Read (and eventually write) from an NWB HDF5 file. Read (and eventually write) from an NWB HDF5 file.

View file

@ -859,7 +859,7 @@ def get_references(obj: h5py.Dataset | h5py.Group) -> List[str]:
# scalar # scalar
if isinstance(obj[()], h5py.h5r.Reference): if isinstance(obj[()], h5py.h5r.Reference):
refs.append(obj[()]) refs.append(obj[()])
elif isinstance(obj[0], h5py.h5r.Reference): elif len(obj) > 0 and isinstance(obj[0], h5py.h5r.Reference):
# single-column # single-column
refs.extend(obj[:].tolist()) refs.extend(obj[:].tolist())
elif len(obj.dtype) > 1: elif len(obj.dtype) > 1:

View file

@ -1,46 +1,28 @@
# manually transcribed target version of nwb-linkml dataset # manually transcribed target version of nwb-linkml dataset
# matching the one created by fixtures.py:nwb_file # matching the one created by fixtures.py:nwb_file
--- meta:
id: my_dataset id: my_dataset
prefixes: prefixes:
nwbfile: nwbfile:
- path: "test_nwb.nwb" - path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah" - hash: "blake2b:blahblahblahblah"
imports: imports:
core: core:
as: nwb as: nwb
version: "2.7.0" version: "2.7.0"
from: from:
- pypi: - pypi:
package: nwb-models package: nwb-models
---
hdmf-common: hdmf-common:
as: hdmf as: hdmf
version: "1.8.0" version: "1.8.0"
from: from:
- pypi: - pypi:
package: nwb-models package: nwb-models
---
extracellular_ephys: &ecephys extracellular_ephys: &ecephys
electrodes:
group:
- @shank{{i}}
- @shank{{i}}
- @shank{{i}}
# could have expression here like { range(3) } => i
# - ... { range(3) } => i
# or blank ... implies use expression from outer scope
- ...
shank{{i}}:
device: @general.devices.array
...: { range(3) } => i
# expands to
extracellular_ephys:
electrodes: electrodes:
group: group:
- @shank0 - @shank0
@ -54,7 +36,7 @@ extracellular_ephys:
device: @general.devices.array device: @general.devices.array
# etc. # etc.
data: !{{ nwb.NWBFile }} <== :nwbfile data: !nwb.NWBFile
file_create_date: [ 2024-01-01 ] file_create_date: [ 2024-01-01 ]
identifier: "1111-1111-1111-1111" identifier: "1111-1111-1111-1111"
session_description: All that you touch, you change. session_description: All that you touch, you change.
@ -63,11 +45,12 @@ data: !{{ nwb.NWBFile }} <== :nwbfile
devices: devices:
- Heka ITC-1600: - Heka ITC-1600:
- Microscope: - Microscope:
description: My two-photon microscope
manufacturer: The best microscope manufacturer
- array: - array:
description: old reliable description: old reliable
manufacturer: diy manufacturer: diy
extracellular_ephys: *ecephys extracellular_ephys: nwbfile:/general/extracellular_ephys
experiment_description: All that you change, changes you. experiment_description: All that you change, changes you.
experimenter: [ "Lauren Oya Olamina" ] experimenter: [ "Lauren Oya Olamina" ]
institution: Earthseed Research Institute institution: Earthseed Research Institute

View file

@ -0,0 +1,76 @@
# Sketch of a condensed expression syntax for creation with nwb-linkml
# just a sketch! keeping here for continued work but currentl unused.
---
id: my_dataset
prefixes:
nwbfile:
- path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah"
imports:
core:
as: nwb
version: "2.7.0"
from:
- pypi:
package: nwb-models
hdmf-common:
as: hdmf
version: "1.8.0"
from:
- pypi:
package: nwb-models
---
extracellular_ephys: &ecephys
electrodes:
group:
- @shank{{i}}
- @shank{{i}}
- @shank{{i}}
# could have expression here like { range(3) } => i
# - ... { range(3) } => i
# or blank ... implies use expression from outer scope
- ...
shank{{i}}:
device: @general.devices.array
...: { range(3) } => i
# expands to
extracellular_ephys:
electrodes:
group:
- @shank0
- @shank0
- @shank0
- @shank1
- # etc.
shank0:
device: @general.devices.array
shank1:
device: @general.devices.array
# etc.
data: !{{ nwb.NWBFile }} <== :nwbfile
file_create_date: [ 2024-01-01 ]
identifier: "1111-1111-1111-1111"
session_description: All that you touch, you change.
session_start_time: 2024-01-01T01:01:01
general:
devices:
- Heka ITC-1600:
- Microscope:
- array:
description: old reliable
manufacturer: diy
extracellular_ephys: *ecephys
experiment_description: All that you change, changes you.
experimenter: [ "Lauren Oya Olamina" ]
institution: Earthseed Research Institute
keywords:
- behavior
- belief
related_publications: doi:10.1016/j.neuron.2016.12.011

View file

@ -349,6 +349,8 @@ def nwb_file(tmp_output_dir) -> Path:
generator = np.random.default_rng() generator = np.random.default_rng()
nwb_path = tmp_output_dir / "test_nwb.nwb" nwb_path = tmp_output_dir / "test_nwb.nwb"
if nwb_path.exists():
return nwb_path
nwbfile = NWBFile( nwbfile = NWBFile(
session_description="All that you touch, you change.", # required session_description="All that you touch, you change.", # required

View file

@ -4,7 +4,7 @@ import h5py
import numpy as np import numpy as np
import pytest import pytest
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file from nwb_linkml.io.hdf5 import HDF5IO, truncate_file, hdf_dependency_graph, filter_dependency_graph
@pytest.mark.skip() @pytest.mark.skip()
@ -98,3 +98,12 @@ def test_flatten_hdf():
assert not any(["specifications" in v.path for v in flat.values()]) assert not any(["specifications" in v.path for v in flat.values()])
pdb.set_trace() pdb.set_trace()
raise NotImplementedError("Just a stub for local testing for now, finish me!") raise NotImplementedError("Just a stub for local testing for now, finish me!")
def test_dependency_graph(nwb_file):
"""
dependency graph is correctly constructed from an HDF5 file
"""
graph = hdf_dependency_graph(nwb_file)
graph = filter_dependency_graph(graph)
pass