working on grpah loading of nwb files

This commit is contained in:
sneakers-the-rat 2024-08-31 01:47:42 -07:00
parent b555ccb199
commit 49585e467a
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
8 changed files with 197 additions and 45 deletions

View file

@ -5,7 +5,7 @@
groups = ["default", "dev", "plot", "tests"]
strategy = ["inherit_metadata"]
lock_version = "4.5.0"
content_hash = "sha256:aaf3c34a5f39fc7db0c5dce91a0693eb78358a255d6b0a72f2e1f988eb7e899f"
content_hash = "sha256:1c297e11f6dc9e4f6b8d29df872177d2ce65bbd334c0b65aa5175dfb125c4d9f"
[[metadata.targets]]
requires_python = ">=3.10,<3.13"
@ -996,7 +996,7 @@ name = "networkx"
version = "3.3"
requires_python = ">=3.10"
summary = "Python package for creating and manipulating graphs and networks"
groups = ["dev", "tests"]
groups = ["default", "dev", "tests"]
files = [
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
{file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},

View file

@ -25,6 +25,7 @@ dependencies = [
"numpydantic>=1.3.3",
"black>=24.4.2",
"pandas>=2.2.2",
"networkx>=3.3",
]
[project.urls]

View file

@ -22,6 +22,7 @@ Other TODO:
import json
import os
import re
import shutil
import subprocess
import sys
@ -31,11 +32,12 @@ from types import ModuleType
from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload
import h5py
import networkx as nx
import numpy as np
from pydantic import BaseModel
from tqdm import tqdm
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf, get_references
if TYPE_CHECKING:
from nwb_linkml.providers.schema import SchemaProvider
@ -47,6 +49,85 @@ else:
from typing_extensions import Never
def hdf_dependency_graph(h5f: Path | h5py.File) -> nx.DiGraph:
"""
Directed dependency graph of dataset and group nodes in an NWBFile such that
each node ``n_i`` is connected to node ``n_j`` if
* ``n_j`` is ``n_i``'s child
* ``n_i`` contains a reference to ``n_j``
Resolve references in
* Attributes
* Dataset columns
* Compound dtypes
Args:
h5f (:class:`pathlib.Path` | :class:`h5py.File`): NWB file to graph
Returns:
:class:`networkx.DiGraph`
"""
# detect nodes to skip
skip_pattern = re.compile("^/specifications.*")
if isinstance(h5f, (Path, str)):
h5f = h5py.File(h5f, "r")
g = nx.DiGraph()
def _visit_item(name: str, node: h5py.Dataset | h5py.Group) -> None:
if skip_pattern.match(name):
return
# find references in attributes
refs = get_references(node)
if isinstance(node, h5py.Group):
refs.extend([child.name for child in node.values()])
refs = set(refs)
# add edges
edges = [(node.name, ref) for ref in refs]
g.add_edges_from(edges)
# ensure node added to graph
if len(edges) == 0:
g.add_node(node.name)
# store attrs in node
g.nodes[node.name].update(node.attrs)
# apply to root
_visit_item(h5f.name, h5f)
h5f.visititems(_visit_item)
return g
def filter_dependency_graph(g: nx.DiGraph) -> nx.DiGraph:
"""
Remove nodes from a dependency graph if they
* have no neurodata type AND
* have no outbound edges
OR
* are a VectorIndex (which are handled by the dynamictable mixins)
"""
remove_nodes = []
node: str
for node in g.nodes.keys():
ndtype = g.nodes[node].get("neurodata_type", None)
if ndtype == "VectorData":
remove_nodes.append(node)
elif not ndtype and g.out_degree(node) == 0:
remove_nodes.append(node)
g.remove_nodes_from(remove_nodes)
return g
class HDF5IO:
"""
Read (and eventually write) from an NWB HDF5 file.

View file

@ -859,7 +859,7 @@ def get_references(obj: h5py.Dataset | h5py.Group) -> List[str]:
# scalar
if isinstance(obj[()], h5py.h5r.Reference):
refs.append(obj[()])
elif isinstance(obj[0], h5py.h5r.Reference):
elif len(obj) > 0 and isinstance(obj[0], h5py.h5r.Reference):
# single-column
refs.extend(obj[:].tolist())
elif len(obj.dtype) > 1:

View file

@ -1,46 +1,28 @@
# manually transcribed target version of nwb-linkml dataset
# matching the one created by fixtures.py:nwb_file
---
id: my_dataset
meta:
id: my_dataset
prefixes:
prefixes:
nwbfile:
- path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah"
imports:
imports:
core:
as: nwb
version: "2.7.0"
from:
- pypi:
package: nwb-models
---
hdmf-common:
as: hdmf
version: "1.8.0"
from:
- pypi:
package: nwb-models
---
extracellular_ephys: &ecephys
electrodes:
group:
- @shank{{i}}
- @shank{{i}}
- @shank{{i}}
# could have expression here like { range(3) } => i
# - ... { range(3) } => i
# or blank ... implies use expression from outer scope
- ...
shank{{i}}:
device: @general.devices.array
...: { range(3) } => i
# expands to
extracellular_ephys:
electrodes:
group:
- @shank0
@ -54,7 +36,7 @@ extracellular_ephys:
device: @general.devices.array
# etc.
data: !{{ nwb.NWBFile }} <== :nwbfile
data: !nwb.NWBFile
file_create_date: [ 2024-01-01 ]
identifier: "1111-1111-1111-1111"
session_description: All that you touch, you change.
@ -63,11 +45,12 @@ data: !{{ nwb.NWBFile }} <== :nwbfile
devices:
- Heka ITC-1600:
- Microscope:
description: My two-photon microscope
manufacturer: The best microscope manufacturer
- array:
description: old reliable
manufacturer: diy
extracellular_ephys: *ecephys
extracellular_ephys: nwbfile:/general/extracellular_ephys
experiment_description: All that you change, changes you.
experimenter: [ "Lauren Oya Olamina" ]
institution: Earthseed Research Institute

View file

@ -0,0 +1,76 @@
# Sketch of a condensed expression syntax for creation with nwb-linkml
# just a sketch! keeping here for continued work but currentl unused.
---
id: my_dataset
prefixes:
nwbfile:
- path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah"
imports:
core:
as: nwb
version: "2.7.0"
from:
- pypi:
package: nwb-models
hdmf-common:
as: hdmf
version: "1.8.0"
from:
- pypi:
package: nwb-models
---
extracellular_ephys: &ecephys
electrodes:
group:
- @shank{{i}}
- @shank{{i}}
- @shank{{i}}
# could have expression here like { range(3) } => i
# - ... { range(3) } => i
# or blank ... implies use expression from outer scope
- ...
shank{{i}}:
device: @general.devices.array
...: { range(3) } => i
# expands to
extracellular_ephys:
electrodes:
group:
- @shank0
- @shank0
- @shank0
- @shank1
- # etc.
shank0:
device: @general.devices.array
shank1:
device: @general.devices.array
# etc.
data: !{{ nwb.NWBFile }} <== :nwbfile
file_create_date: [ 2024-01-01 ]
identifier: "1111-1111-1111-1111"
session_description: All that you touch, you change.
session_start_time: 2024-01-01T01:01:01
general:
devices:
- Heka ITC-1600:
- Microscope:
- array:
description: old reliable
manufacturer: diy
extracellular_ephys: *ecephys
experiment_description: All that you change, changes you.
experimenter: [ "Lauren Oya Olamina" ]
institution: Earthseed Research Institute
keywords:
- behavior
- belief
related_publications: doi:10.1016/j.neuron.2016.12.011

View file

@ -349,6 +349,8 @@ def nwb_file(tmp_output_dir) -> Path:
generator = np.random.default_rng()
nwb_path = tmp_output_dir / "test_nwb.nwb"
if nwb_path.exists():
return nwb_path
nwbfile = NWBFile(
session_description="All that you touch, you change.", # required

View file

@ -4,7 +4,7 @@ import h5py
import numpy as np
import pytest
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file, hdf_dependency_graph, filter_dependency_graph
@pytest.mark.skip()
@ -98,3 +98,12 @@ def test_flatten_hdf():
assert not any(["specifications" in v.path for v in flat.values()])
pdb.set_trace()
raise NotImplementedError("Just a stub for local testing for now, finish me!")
def test_dependency_graph(nwb_file):
"""
dependency graph is correctly constructed from an HDF5 file
"""
graph = hdf_dependency_graph(nwb_file)
graph = filter_dependency_graph(graph)
pass