mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-09 21:54:27 +00:00
working on grpah loading of nwb files
This commit is contained in:
parent
b555ccb199
commit
49585e467a
8 changed files with 197 additions and 45 deletions
|
@ -5,7 +5,7 @@
|
||||||
groups = ["default", "dev", "plot", "tests"]
|
groups = ["default", "dev", "plot", "tests"]
|
||||||
strategy = ["inherit_metadata"]
|
strategy = ["inherit_metadata"]
|
||||||
lock_version = "4.5.0"
|
lock_version = "4.5.0"
|
||||||
content_hash = "sha256:aaf3c34a5f39fc7db0c5dce91a0693eb78358a255d6b0a72f2e1f988eb7e899f"
|
content_hash = "sha256:1c297e11f6dc9e4f6b8d29df872177d2ce65bbd334c0b65aa5175dfb125c4d9f"
|
||||||
|
|
||||||
[[metadata.targets]]
|
[[metadata.targets]]
|
||||||
requires_python = ">=3.10,<3.13"
|
requires_python = ">=3.10,<3.13"
|
||||||
|
@ -996,7 +996,7 @@ name = "networkx"
|
||||||
version = "3.3"
|
version = "3.3"
|
||||||
requires_python = ">=3.10"
|
requires_python = ">=3.10"
|
||||||
summary = "Python package for creating and manipulating graphs and networks"
|
summary = "Python package for creating and manipulating graphs and networks"
|
||||||
groups = ["dev", "tests"]
|
groups = ["default", "dev", "tests"]
|
||||||
files = [
|
files = [
|
||||||
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
|
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
|
||||||
{file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},
|
{file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},
|
||||||
|
|
|
@ -25,6 +25,7 @@ dependencies = [
|
||||||
"numpydantic>=1.3.3",
|
"numpydantic>=1.3.3",
|
||||||
"black>=24.4.2",
|
"black>=24.4.2",
|
||||||
"pandas>=2.2.2",
|
"pandas>=2.2.2",
|
||||||
|
"networkx>=3.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
|
@ -22,6 +22,7 @@ Other TODO:
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
@ -31,11 +32,12 @@ from types import ModuleType
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload
|
from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload
|
||||||
|
|
||||||
import h5py
|
import h5py
|
||||||
|
import networkx as nx
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf
|
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf, get_references
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from nwb_linkml.providers.schema import SchemaProvider
|
from nwb_linkml.providers.schema import SchemaProvider
|
||||||
|
@ -47,6 +49,85 @@ else:
|
||||||
from typing_extensions import Never
|
from typing_extensions import Never
|
||||||
|
|
||||||
|
|
||||||
|
def hdf_dependency_graph(h5f: Path | h5py.File) -> nx.DiGraph:
|
||||||
|
"""
|
||||||
|
Directed dependency graph of dataset and group nodes in an NWBFile such that
|
||||||
|
each node ``n_i`` is connected to node ``n_j`` if
|
||||||
|
|
||||||
|
* ``n_j`` is ``n_i``'s child
|
||||||
|
* ``n_i`` contains a reference to ``n_j``
|
||||||
|
|
||||||
|
Resolve references in
|
||||||
|
|
||||||
|
* Attributes
|
||||||
|
* Dataset columns
|
||||||
|
* Compound dtypes
|
||||||
|
|
||||||
|
Args:
|
||||||
|
h5f (:class:`pathlib.Path` | :class:`h5py.File`): NWB file to graph
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
:class:`networkx.DiGraph`
|
||||||
|
"""
|
||||||
|
# detect nodes to skip
|
||||||
|
skip_pattern = re.compile("^/specifications.*")
|
||||||
|
|
||||||
|
if isinstance(h5f, (Path, str)):
|
||||||
|
h5f = h5py.File(h5f, "r")
|
||||||
|
|
||||||
|
g = nx.DiGraph()
|
||||||
|
|
||||||
|
def _visit_item(name: str, node: h5py.Dataset | h5py.Group) -> None:
|
||||||
|
if skip_pattern.match(name):
|
||||||
|
return
|
||||||
|
# find references in attributes
|
||||||
|
refs = get_references(node)
|
||||||
|
if isinstance(node, h5py.Group):
|
||||||
|
refs.extend([child.name for child in node.values()])
|
||||||
|
refs = set(refs)
|
||||||
|
|
||||||
|
# add edges
|
||||||
|
edges = [(node.name, ref) for ref in refs]
|
||||||
|
g.add_edges_from(edges)
|
||||||
|
|
||||||
|
# ensure node added to graph
|
||||||
|
if len(edges) == 0:
|
||||||
|
g.add_node(node.name)
|
||||||
|
|
||||||
|
# store attrs in node
|
||||||
|
g.nodes[node.name].update(node.attrs)
|
||||||
|
|
||||||
|
# apply to root
|
||||||
|
_visit_item(h5f.name, h5f)
|
||||||
|
|
||||||
|
h5f.visititems(_visit_item)
|
||||||
|
return g
|
||||||
|
|
||||||
|
|
||||||
|
def filter_dependency_graph(g: nx.DiGraph) -> nx.DiGraph:
|
||||||
|
"""
|
||||||
|
Remove nodes from a dependency graph if they
|
||||||
|
|
||||||
|
* have no neurodata type AND
|
||||||
|
* have no outbound edges
|
||||||
|
|
||||||
|
OR
|
||||||
|
|
||||||
|
* are a VectorIndex (which are handled by the dynamictable mixins)
|
||||||
|
"""
|
||||||
|
remove_nodes = []
|
||||||
|
node: str
|
||||||
|
for node in g.nodes.keys():
|
||||||
|
ndtype = g.nodes[node].get("neurodata_type", None)
|
||||||
|
if ndtype == "VectorData":
|
||||||
|
remove_nodes.append(node)
|
||||||
|
elif not ndtype and g.out_degree(node) == 0:
|
||||||
|
remove_nodes.append(node)
|
||||||
|
|
||||||
|
g.remove_nodes_from(remove_nodes)
|
||||||
|
return g
|
||||||
|
|
||||||
|
|
||||||
class HDF5IO:
|
class HDF5IO:
|
||||||
"""
|
"""
|
||||||
Read (and eventually write) from an NWB HDF5 file.
|
Read (and eventually write) from an NWB HDF5 file.
|
||||||
|
|
|
@ -859,7 +859,7 @@ def get_references(obj: h5py.Dataset | h5py.Group) -> List[str]:
|
||||||
# scalar
|
# scalar
|
||||||
if isinstance(obj[()], h5py.h5r.Reference):
|
if isinstance(obj[()], h5py.h5r.Reference):
|
||||||
refs.append(obj[()])
|
refs.append(obj[()])
|
||||||
elif isinstance(obj[0], h5py.h5r.Reference):
|
elif len(obj) > 0 and isinstance(obj[0], h5py.h5r.Reference):
|
||||||
# single-column
|
# single-column
|
||||||
refs.extend(obj[:].tolist())
|
refs.extend(obj[:].tolist())
|
||||||
elif len(obj.dtype) > 1:
|
elif len(obj.dtype) > 1:
|
||||||
|
|
|
@ -1,46 +1,28 @@
|
||||||
# manually transcribed target version of nwb-linkml dataset
|
# manually transcribed target version of nwb-linkml dataset
|
||||||
# matching the one created by fixtures.py:nwb_file
|
# matching the one created by fixtures.py:nwb_file
|
||||||
---
|
meta:
|
||||||
id: my_dataset
|
id: my_dataset
|
||||||
|
|
||||||
prefixes:
|
prefixes:
|
||||||
nwbfile:
|
nwbfile:
|
||||||
- path: "test_nwb.nwb"
|
- path: "test_nwb.nwb"
|
||||||
- hash: "blake2b:blahblahblahblah"
|
- hash: "blake2b:blahblahblahblah"
|
||||||
|
|
||||||
imports:
|
imports:
|
||||||
core:
|
core:
|
||||||
as: nwb
|
as: nwb
|
||||||
version: "2.7.0"
|
version: "2.7.0"
|
||||||
from:
|
from:
|
||||||
- pypi:
|
- pypi:
|
||||||
package: nwb-models
|
package: nwb-models
|
||||||
---
|
hdmf-common:
|
||||||
|
as: hdmf
|
||||||
hdmf-common:
|
version: "1.8.0"
|
||||||
as: hdmf
|
from:
|
||||||
version: "1.8.0"
|
- pypi:
|
||||||
from:
|
package: nwb-models
|
||||||
- pypi:
|
|
||||||
package: nwb-models
|
|
||||||
---
|
|
||||||
|
|
||||||
extracellular_ephys: &ecephys
|
extracellular_ephys: &ecephys
|
||||||
electrodes:
|
|
||||||
group:
|
|
||||||
- @shank{{i}}
|
|
||||||
- @shank{{i}}
|
|
||||||
- @shank{{i}}
|
|
||||||
# could have expression here like { range(3) } => i
|
|
||||||
# - ... { range(3) } => i
|
|
||||||
# or blank ... implies use expression from outer scope
|
|
||||||
- ...
|
|
||||||
shank{{i}}:
|
|
||||||
device: @general.devices.array
|
|
||||||
...: { range(3) } => i
|
|
||||||
|
|
||||||
# expands to
|
|
||||||
extracellular_ephys:
|
|
||||||
electrodes:
|
electrodes:
|
||||||
group:
|
group:
|
||||||
- @shank0
|
- @shank0
|
||||||
|
@ -54,7 +36,7 @@ extracellular_ephys:
|
||||||
device: @general.devices.array
|
device: @general.devices.array
|
||||||
# etc.
|
# etc.
|
||||||
|
|
||||||
data: !{{ nwb.NWBFile }} <== :nwbfile
|
data: !nwb.NWBFile
|
||||||
file_create_date: [ 2024-01-01 ]
|
file_create_date: [ 2024-01-01 ]
|
||||||
identifier: "1111-1111-1111-1111"
|
identifier: "1111-1111-1111-1111"
|
||||||
session_description: All that you touch, you change.
|
session_description: All that you touch, you change.
|
||||||
|
@ -63,11 +45,12 @@ data: !{{ nwb.NWBFile }} <== :nwbfile
|
||||||
devices:
|
devices:
|
||||||
- Heka ITC-1600:
|
- Heka ITC-1600:
|
||||||
- Microscope:
|
- Microscope:
|
||||||
|
description: My two-photon microscope
|
||||||
|
manufacturer: The best microscope manufacturer
|
||||||
- array:
|
- array:
|
||||||
description: old reliable
|
description: old reliable
|
||||||
manufacturer: diy
|
manufacturer: diy
|
||||||
extracellular_ephys: *ecephys
|
extracellular_ephys: nwbfile:/general/extracellular_ephys
|
||||||
|
|
||||||
experiment_description: All that you change, changes you.
|
experiment_description: All that you change, changes you.
|
||||||
experimenter: [ "Lauren Oya Olamina" ]
|
experimenter: [ "Lauren Oya Olamina" ]
|
||||||
institution: Earthseed Research Institute
|
institution: Earthseed Research Institute
|
||||||
|
|
76
nwb_linkml/tests/data/test_nwb_condensed_sketch.yaml
Normal file
76
nwb_linkml/tests/data/test_nwb_condensed_sketch.yaml
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
# Sketch of a condensed expression syntax for creation with nwb-linkml
|
||||||
|
# just a sketch! keeping here for continued work but currentl unused.
|
||||||
|
---
|
||||||
|
id: my_dataset
|
||||||
|
|
||||||
|
prefixes:
|
||||||
|
nwbfile:
|
||||||
|
- path: "test_nwb.nwb"
|
||||||
|
- hash: "blake2b:blahblahblahblah"
|
||||||
|
|
||||||
|
imports:
|
||||||
|
core:
|
||||||
|
as: nwb
|
||||||
|
version: "2.7.0"
|
||||||
|
from:
|
||||||
|
- pypi:
|
||||||
|
package: nwb-models
|
||||||
|
hdmf-common:
|
||||||
|
as: hdmf
|
||||||
|
version: "1.8.0"
|
||||||
|
from:
|
||||||
|
- pypi:
|
||||||
|
package: nwb-models
|
||||||
|
---
|
||||||
|
|
||||||
|
extracellular_ephys: &ecephys
|
||||||
|
electrodes:
|
||||||
|
group:
|
||||||
|
- @shank{{i}}
|
||||||
|
- @shank{{i}}
|
||||||
|
- @shank{{i}}
|
||||||
|
# could have expression here like { range(3) } => i
|
||||||
|
# - ... { range(3) } => i
|
||||||
|
# or blank ... implies use expression from outer scope
|
||||||
|
- ...
|
||||||
|
shank{{i}}:
|
||||||
|
device: @general.devices.array
|
||||||
|
...: { range(3) } => i
|
||||||
|
|
||||||
|
# expands to
|
||||||
|
extracellular_ephys:
|
||||||
|
electrodes:
|
||||||
|
group:
|
||||||
|
- @shank0
|
||||||
|
- @shank0
|
||||||
|
- @shank0
|
||||||
|
- @shank1
|
||||||
|
- # etc.
|
||||||
|
shank0:
|
||||||
|
device: @general.devices.array
|
||||||
|
shank1:
|
||||||
|
device: @general.devices.array
|
||||||
|
# etc.
|
||||||
|
|
||||||
|
data: !{{ nwb.NWBFile }} <== :nwbfile
|
||||||
|
file_create_date: [ 2024-01-01 ]
|
||||||
|
identifier: "1111-1111-1111-1111"
|
||||||
|
session_description: All that you touch, you change.
|
||||||
|
session_start_time: 2024-01-01T01:01:01
|
||||||
|
general:
|
||||||
|
devices:
|
||||||
|
- Heka ITC-1600:
|
||||||
|
- Microscope:
|
||||||
|
- array:
|
||||||
|
description: old reliable
|
||||||
|
manufacturer: diy
|
||||||
|
extracellular_ephys: *ecephys
|
||||||
|
|
||||||
|
experiment_description: All that you change, changes you.
|
||||||
|
experimenter: [ "Lauren Oya Olamina" ]
|
||||||
|
institution: Earthseed Research Institute
|
||||||
|
keywords:
|
||||||
|
- behavior
|
||||||
|
- belief
|
||||||
|
related_publications: doi:10.1016/j.neuron.2016.12.011
|
||||||
|
|
|
@ -349,6 +349,8 @@ def nwb_file(tmp_output_dir) -> Path:
|
||||||
generator = np.random.default_rng()
|
generator = np.random.default_rng()
|
||||||
|
|
||||||
nwb_path = tmp_output_dir / "test_nwb.nwb"
|
nwb_path = tmp_output_dir / "test_nwb.nwb"
|
||||||
|
if nwb_path.exists():
|
||||||
|
return nwb_path
|
||||||
|
|
||||||
nwbfile = NWBFile(
|
nwbfile = NWBFile(
|
||||||
session_description="All that you touch, you change.", # required
|
session_description="All that you touch, you change.", # required
|
||||||
|
|
|
@ -4,7 +4,7 @@ import h5py
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file
|
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file, hdf_dependency_graph, filter_dependency_graph
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip()
|
@pytest.mark.skip()
|
||||||
|
@ -98,3 +98,12 @@ def test_flatten_hdf():
|
||||||
assert not any(["specifications" in v.path for v in flat.values()])
|
assert not any(["specifications" in v.path for v in flat.values()])
|
||||||
pdb.set_trace()
|
pdb.set_trace()
|
||||||
raise NotImplementedError("Just a stub for local testing for now, finish me!")
|
raise NotImplementedError("Just a stub for local testing for now, finish me!")
|
||||||
|
|
||||||
|
|
||||||
|
def test_dependency_graph(nwb_file):
|
||||||
|
"""
|
||||||
|
dependency graph is correctly constructed from an HDF5 file
|
||||||
|
"""
|
||||||
|
graph = hdf_dependency_graph(nwb_file)
|
||||||
|
graph = filter_dependency_graph(graph)
|
||||||
|
pass
|
||||||
|
|
Loading…
Reference in a new issue