mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-09 21:54:27 +00:00
working on grpah loading of nwb files
This commit is contained in:
parent
b555ccb199
commit
49585e467a
8 changed files with 197 additions and 45 deletions
|
@ -5,7 +5,7 @@
|
|||
groups = ["default", "dev", "plot", "tests"]
|
||||
strategy = ["inherit_metadata"]
|
||||
lock_version = "4.5.0"
|
||||
content_hash = "sha256:aaf3c34a5f39fc7db0c5dce91a0693eb78358a255d6b0a72f2e1f988eb7e899f"
|
||||
content_hash = "sha256:1c297e11f6dc9e4f6b8d29df872177d2ce65bbd334c0b65aa5175dfb125c4d9f"
|
||||
|
||||
[[metadata.targets]]
|
||||
requires_python = ">=3.10,<3.13"
|
||||
|
@ -996,7 +996,7 @@ name = "networkx"
|
|||
version = "3.3"
|
||||
requires_python = ">=3.10"
|
||||
summary = "Python package for creating and manipulating graphs and networks"
|
||||
groups = ["dev", "tests"]
|
||||
groups = ["default", "dev", "tests"]
|
||||
files = [
|
||||
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
|
||||
{file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},
|
||||
|
|
|
@ -25,6 +25,7 @@ dependencies = [
|
|||
"numpydantic>=1.3.3",
|
||||
"black>=24.4.2",
|
||||
"pandas>=2.2.2",
|
||||
"networkx>=3.3",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
|
|
@ -22,6 +22,7 @@ Other TODO:
|
|||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -31,11 +32,12 @@ from types import ModuleType
|
|||
from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload
|
||||
|
||||
import h5py
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
from tqdm import tqdm
|
||||
|
||||
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf
|
||||
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf, get_references
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nwb_linkml.providers.schema import SchemaProvider
|
||||
|
@ -47,6 +49,85 @@ else:
|
|||
from typing_extensions import Never
|
||||
|
||||
|
||||
def hdf_dependency_graph(h5f: Path | h5py.File) -> nx.DiGraph:
|
||||
"""
|
||||
Directed dependency graph of dataset and group nodes in an NWBFile such that
|
||||
each node ``n_i`` is connected to node ``n_j`` if
|
||||
|
||||
* ``n_j`` is ``n_i``'s child
|
||||
* ``n_i`` contains a reference to ``n_j``
|
||||
|
||||
Resolve references in
|
||||
|
||||
* Attributes
|
||||
* Dataset columns
|
||||
* Compound dtypes
|
||||
|
||||
Args:
|
||||
h5f (:class:`pathlib.Path` | :class:`h5py.File`): NWB file to graph
|
||||
|
||||
Returns:
|
||||
:class:`networkx.DiGraph`
|
||||
"""
|
||||
# detect nodes to skip
|
||||
skip_pattern = re.compile("^/specifications.*")
|
||||
|
||||
if isinstance(h5f, (Path, str)):
|
||||
h5f = h5py.File(h5f, "r")
|
||||
|
||||
g = nx.DiGraph()
|
||||
|
||||
def _visit_item(name: str, node: h5py.Dataset | h5py.Group) -> None:
|
||||
if skip_pattern.match(name):
|
||||
return
|
||||
# find references in attributes
|
||||
refs = get_references(node)
|
||||
if isinstance(node, h5py.Group):
|
||||
refs.extend([child.name for child in node.values()])
|
||||
refs = set(refs)
|
||||
|
||||
# add edges
|
||||
edges = [(node.name, ref) for ref in refs]
|
||||
g.add_edges_from(edges)
|
||||
|
||||
# ensure node added to graph
|
||||
if len(edges) == 0:
|
||||
g.add_node(node.name)
|
||||
|
||||
# store attrs in node
|
||||
g.nodes[node.name].update(node.attrs)
|
||||
|
||||
# apply to root
|
||||
_visit_item(h5f.name, h5f)
|
||||
|
||||
h5f.visititems(_visit_item)
|
||||
return g
|
||||
|
||||
|
||||
def filter_dependency_graph(g: nx.DiGraph) -> nx.DiGraph:
|
||||
"""
|
||||
Remove nodes from a dependency graph if they
|
||||
|
||||
* have no neurodata type AND
|
||||
* have no outbound edges
|
||||
|
||||
OR
|
||||
|
||||
* are a VectorIndex (which are handled by the dynamictable mixins)
|
||||
"""
|
||||
remove_nodes = []
|
||||
node: str
|
||||
for node in g.nodes.keys():
|
||||
ndtype = g.nodes[node].get("neurodata_type", None)
|
||||
if ndtype == "VectorData":
|
||||
remove_nodes.append(node)
|
||||
elif not ndtype and g.out_degree(node) == 0:
|
||||
remove_nodes.append(node)
|
||||
|
||||
g.remove_nodes_from(remove_nodes)
|
||||
return g
|
||||
|
||||
|
||||
class HDF5IO:
|
||||
"""
|
||||
Read (and eventually write) from an NWB HDF5 file.
|
||||
|
|
|
@ -859,7 +859,7 @@ def get_references(obj: h5py.Dataset | h5py.Group) -> List[str]:
|
|||
# scalar
|
||||
if isinstance(obj[()], h5py.h5r.Reference):
|
||||
refs.append(obj[()])
|
||||
elif isinstance(obj[0], h5py.h5r.Reference):
|
||||
elif len(obj) > 0 and isinstance(obj[0], h5py.h5r.Reference):
|
||||
# single-column
|
||||
refs.extend(obj[:].tolist())
|
||||
elif len(obj.dtype) > 1:
|
||||
|
|
|
@ -1,46 +1,28 @@
|
|||
# manually transcribed target version of nwb-linkml dataset
|
||||
# matching the one created by fixtures.py:nwb_file
|
||||
---
|
||||
id: my_dataset
|
||||
meta:
|
||||
id: my_dataset
|
||||
|
||||
prefixes:
|
||||
prefixes:
|
||||
nwbfile:
|
||||
- path: "test_nwb.nwb"
|
||||
- hash: "blake2b:blahblahblahblah"
|
||||
|
||||
imports:
|
||||
imports:
|
||||
core:
|
||||
as: nwb
|
||||
version: "2.7.0"
|
||||
from:
|
||||
- pypi:
|
||||
package: nwb-models
|
||||
---
|
||||
|
||||
hdmf-common:
|
||||
as: hdmf
|
||||
version: "1.8.0"
|
||||
from:
|
||||
- pypi:
|
||||
package: nwb-models
|
||||
---
|
||||
|
||||
extracellular_ephys: &ecephys
|
||||
electrodes:
|
||||
group:
|
||||
- @shank{{i}}
|
||||
- @shank{{i}}
|
||||
- @shank{{i}}
|
||||
# could have expression here like { range(3) } => i
|
||||
# - ... { range(3) } => i
|
||||
# or blank ... implies use expression from outer scope
|
||||
- ...
|
||||
shank{{i}}:
|
||||
device: @general.devices.array
|
||||
...: { range(3) } => i
|
||||
|
||||
# expands to
|
||||
extracellular_ephys:
|
||||
electrodes:
|
||||
group:
|
||||
- @shank0
|
||||
|
@ -54,7 +36,7 @@ extracellular_ephys:
|
|||
device: @general.devices.array
|
||||
# etc.
|
||||
|
||||
data: !{{ nwb.NWBFile }} <== :nwbfile
|
||||
data: !nwb.NWBFile
|
||||
file_create_date: [ 2024-01-01 ]
|
||||
identifier: "1111-1111-1111-1111"
|
||||
session_description: All that you touch, you change.
|
||||
|
@ -63,11 +45,12 @@ data: !{{ nwb.NWBFile }} <== :nwbfile
|
|||
devices:
|
||||
- Heka ITC-1600:
|
||||
- Microscope:
|
||||
description: My two-photon microscope
|
||||
manufacturer: The best microscope manufacturer
|
||||
- array:
|
||||
description: old reliable
|
||||
manufacturer: diy
|
||||
extracellular_ephys: *ecephys
|
||||
|
||||
extracellular_ephys: nwbfile:/general/extracellular_ephys
|
||||
experiment_description: All that you change, changes you.
|
||||
experimenter: [ "Lauren Oya Olamina" ]
|
||||
institution: Earthseed Research Institute
|
||||
|
|
76
nwb_linkml/tests/data/test_nwb_condensed_sketch.yaml
Normal file
76
nwb_linkml/tests/data/test_nwb_condensed_sketch.yaml
Normal file
|
@ -0,0 +1,76 @@
|
|||
# Sketch of a condensed expression syntax for creation with nwb-linkml
|
||||
# just a sketch! keeping here for continued work but currentl unused.
|
||||
---
|
||||
id: my_dataset
|
||||
|
||||
prefixes:
|
||||
nwbfile:
|
||||
- path: "test_nwb.nwb"
|
||||
- hash: "blake2b:blahblahblahblah"
|
||||
|
||||
imports:
|
||||
core:
|
||||
as: nwb
|
||||
version: "2.7.0"
|
||||
from:
|
||||
- pypi:
|
||||
package: nwb-models
|
||||
hdmf-common:
|
||||
as: hdmf
|
||||
version: "1.8.0"
|
||||
from:
|
||||
- pypi:
|
||||
package: nwb-models
|
||||
---
|
||||
|
||||
extracellular_ephys: &ecephys
|
||||
electrodes:
|
||||
group:
|
||||
- @shank{{i}}
|
||||
- @shank{{i}}
|
||||
- @shank{{i}}
|
||||
# could have expression here like { range(3) } => i
|
||||
# - ... { range(3) } => i
|
||||
# or blank ... implies use expression from outer scope
|
||||
- ...
|
||||
shank{{i}}:
|
||||
device: @general.devices.array
|
||||
...: { range(3) } => i
|
||||
|
||||
# expands to
|
||||
extracellular_ephys:
|
||||
electrodes:
|
||||
group:
|
||||
- @shank0
|
||||
- @shank0
|
||||
- @shank0
|
||||
- @shank1
|
||||
- # etc.
|
||||
shank0:
|
||||
device: @general.devices.array
|
||||
shank1:
|
||||
device: @general.devices.array
|
||||
# etc.
|
||||
|
||||
data: !{{ nwb.NWBFile }} <== :nwbfile
|
||||
file_create_date: [ 2024-01-01 ]
|
||||
identifier: "1111-1111-1111-1111"
|
||||
session_description: All that you touch, you change.
|
||||
session_start_time: 2024-01-01T01:01:01
|
||||
general:
|
||||
devices:
|
||||
- Heka ITC-1600:
|
||||
- Microscope:
|
||||
- array:
|
||||
description: old reliable
|
||||
manufacturer: diy
|
||||
extracellular_ephys: *ecephys
|
||||
|
||||
experiment_description: All that you change, changes you.
|
||||
experimenter: [ "Lauren Oya Olamina" ]
|
||||
institution: Earthseed Research Institute
|
||||
keywords:
|
||||
- behavior
|
||||
- belief
|
||||
related_publications: doi:10.1016/j.neuron.2016.12.011
|
||||
|
|
@ -349,6 +349,8 @@ def nwb_file(tmp_output_dir) -> Path:
|
|||
generator = np.random.default_rng()
|
||||
|
||||
nwb_path = tmp_output_dir / "test_nwb.nwb"
|
||||
if nwb_path.exists():
|
||||
return nwb_path
|
||||
|
||||
nwbfile = NWBFile(
|
||||
session_description="All that you touch, you change.", # required
|
||||
|
|
|
@ -4,7 +4,7 @@ import h5py
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file
|
||||
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file, hdf_dependency_graph, filter_dependency_graph
|
||||
|
||||
|
||||
@pytest.mark.skip()
|
||||
|
@ -98,3 +98,12 @@ def test_flatten_hdf():
|
|||
assert not any(["specifications" in v.path for v in flat.values()])
|
||||
pdb.set_trace()
|
||||
raise NotImplementedError("Just a stub for local testing for now, finish me!")
|
||||
|
||||
|
||||
def test_dependency_graph(nwb_file):
|
||||
"""
|
||||
dependency graph is correctly constructed from an HDF5 file
|
||||
"""
|
||||
graph = hdf_dependency_graph(nwb_file)
|
||||
graph = filter_dependency_graph(graph)
|
||||
pass
|
||||
|
|
Loading…
Reference in a new issue