mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 14:14:27 +00:00
Refactor dataset adapter to use check/apply style maps!
General cleaning around maps module, getting ready to remove translate module
This commit is contained in:
parent
adaf939497
commit
42e64dce75
11 changed files with 414 additions and 377 deletions
|
@ -152,7 +152,8 @@ class ClassAdapter(Adapter):
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
def handle_dtype(self, dtype: DTypeType | None) -> str:
|
@classmethod
|
||||||
|
def handle_dtype(cls, dtype: DTypeType | None) -> str:
|
||||||
if isinstance(dtype, ReferenceDtype):
|
if isinstance(dtype, ReferenceDtype):
|
||||||
return dtype.target_type
|
return dtype.target_type
|
||||||
elif dtype is None or dtype == []:
|
elif dtype is None or dtype == []:
|
||||||
|
|
|
@ -4,6 +4,7 @@ Adapter for NWB datasets to linkml Classes
|
||||||
import pdb
|
import pdb
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
import warnings
|
import warnings
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
||||||
from pydantic import PrivateAttr
|
from pydantic import PrivateAttr
|
||||||
|
@ -11,150 +12,142 @@ from pydantic import PrivateAttr
|
||||||
from nwb_schema_language import Dataset, ReferenceDtype, CompoundDtype, DTypeType
|
from nwb_schema_language import Dataset, ReferenceDtype, CompoundDtype, DTypeType
|
||||||
from nwb_linkml.adapters.classes import ClassAdapter
|
from nwb_linkml.adapters.classes import ClassAdapter
|
||||||
from nwb_linkml.maps.naming import camel_to_snake
|
from nwb_linkml.maps.naming import camel_to_snake
|
||||||
|
from nwb_linkml.maps.dtype import flat_to_linkml
|
||||||
from nwb_linkml.adapters.adapter import BuildResult
|
from nwb_linkml.adapters.adapter import BuildResult
|
||||||
from nwb_linkml.maps import QUANTITY_MAP
|
from nwb_linkml.maps import QUANTITY_MAP, Map
|
||||||
|
|
||||||
class DatasetAdapter(ClassAdapter):
|
class DatasetMap(Map):
|
||||||
cls: Dataset
|
|
||||||
|
|
||||||
_handlers: List[str] = PrivateAttr(default_factory=list)
|
@classmethod
|
||||||
"""Keep track of which handlers have been called"""
|
@abstractmethod
|
||||||
|
def check(c, cls:Dataset) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
|
pass
|
||||||
|
|
||||||
def build(self) -> BuildResult:
|
class MapScalar(DatasetMap):
|
||||||
res = self.build_base()
|
"""
|
||||||
|
Datasets that are just a single value should just be a scalar value, not an array with size 1
|
||||||
|
|
||||||
res = self.drop_dynamic_table(res)
|
Replace the built class with
|
||||||
res = self.handle_arraylike(res, self.cls, self._get_full_name())
|
"""
|
||||||
res = self.handle_1d_vector(res)
|
|
||||||
res = self.handle_listlike(res)
|
|
||||||
res = self.handle_scalar(res)
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def check(c, cls:Dataset) -> bool:
|
||||||
|
if cls.neurodata_type_inc != 'VectorData' and \
|
||||||
|
not cls.neurodata_type_inc and \
|
||||||
|
not cls.attributes and \
|
||||||
|
not cls.dims and \
|
||||||
|
not cls.shape and \
|
||||||
|
cls.name:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
if len(self._handlers) > 1:
|
@classmethod
|
||||||
raise RuntimeError(f"Only one handler should have been triggered, instead triggered {self._handlers}")
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
def handle_scalar(self, res:BuildResult) -> BuildResult:
|
|
||||||
|
|
||||||
# Simplify datasets that are just a single value
|
|
||||||
if self.cls.neurodata_type_inc != 'VectorData' and \
|
|
||||||
not self.cls.neurodata_type_inc and \
|
|
||||||
not self.cls.attributes and \
|
|
||||||
not self.cls.dims and \
|
|
||||||
not self.cls.shape and \
|
|
||||||
self.cls.name:
|
|
||||||
self._handlers.append('scalar')
|
|
||||||
# throw out the class that would have been made for us
|
|
||||||
# we just need a slot
|
|
||||||
this_slot = SlotDefinition(
|
this_slot = SlotDefinition(
|
||||||
name=self.cls.name,
|
name=cls.name,
|
||||||
description=self.cls.doc,
|
description=cls.doc,
|
||||||
range=self.handle_dtype(self.cls.dtype),
|
range=ClassAdapter.handle_dtype(cls.dtype),
|
||||||
**QUANTITY_MAP[self.cls.quantity]
|
**QUANTITY_MAP[cls.quantity]
|
||||||
)
|
)
|
||||||
res = BuildResult(slots=[this_slot])
|
res = BuildResult(slots=[this_slot])
|
||||||
|
return res
|
||||||
|
|
||||||
# if the scalar-valued class has attributes, append a
|
class MapScalarAttributes(DatasetMap):
|
||||||
# 'value' slot that holds the (scalar) value of the dataset
|
"""
|
||||||
elif self.cls.neurodata_type_inc != 'VectorData' and \
|
A scalar with attributes gets an additional slot "value" that contains the actual scalar
|
||||||
not self.cls.neurodata_type_inc and \
|
value of this field
|
||||||
self.cls.attributes and \
|
"""
|
||||||
not self.cls.dims and \
|
@classmethod
|
||||||
not self.cls.shape and \
|
def check(c, cls:Dataset) -> bool:
|
||||||
self.cls.name:
|
if cls.neurodata_type_inc != 'VectorData' and \
|
||||||
self._handlers.append('scalar_class')
|
not cls.neurodata_type_inc and \
|
||||||
|
cls.attributes and \
|
||||||
|
not cls.dims and \
|
||||||
|
not cls.shape and \
|
||||||
|
cls.name:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
# quantity (including requirement) is handled by the
|
@classmethod
|
||||||
# parent slot - the value is required if the value class is
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
# supplied.
|
|
||||||
# ie.
|
|
||||||
# Optional[ScalarClass] = None
|
|
||||||
# class ScalarClass:
|
|
||||||
# value: dtype
|
|
||||||
value_slot = SlotDefinition(
|
value_slot = SlotDefinition(
|
||||||
name='value',
|
name='value',
|
||||||
range=self.handle_dtype(self.cls.dtype),
|
range=ClassAdapter.handle_dtype(cls.dtype),
|
||||||
required=True
|
required=True
|
||||||
)
|
)
|
||||||
res.classes[0].attributes['value'] = value_slot
|
res.classes[0].attributes['value'] = value_slot
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
class MapListlike(DatasetMap):
|
||||||
|
"""
|
||||||
|
Datasets that refer to other datasets (that handle their own arrays)
|
||||||
|
"""
|
||||||
|
@classmethod
|
||||||
|
def check(c, cls:Dataset) -> bool:
|
||||||
|
dtype = ClassAdapter.handle_dtype(cls.dtype)
|
||||||
|
if is_1d(cls) and dtype != 'AnyType' and dtype not in flat_to_linkml.keys():
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
def handle_1d_vector(self, res: BuildResult) -> BuildResult:
|
@classmethod
|
||||||
# handle the special case where `VectorData` is subclasssed without any dims or attributes
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
# which just gets instantiated as a 1-d array in HDF5
|
dtype = camel_to_snake(ClassAdapter.handle_dtype(cls.dtype))
|
||||||
if self.cls.neurodata_type_inc == 'VectorData' and \
|
slot = SlotDefinition(
|
||||||
not self.cls.dims and \
|
name=dtype,
|
||||||
not self.cls.shape and \
|
multivalued=True,
|
||||||
not self.cls.attributes \
|
range=ClassAdapter.handle_dtype(cls.dtype),
|
||||||
and self.cls.name:
|
description=cls.doc,
|
||||||
self._handlers.append('1d_vector')
|
required=False if cls.quantity in ('*', '?') else True
|
||||||
this_slot = SlotDefinition(
|
|
||||||
name=self.cls.name,
|
|
||||||
description=self.cls.doc,
|
|
||||||
range=self.handle_dtype(self.cls.dtype),
|
|
||||||
multivalued=True
|
|
||||||
)
|
)
|
||||||
# No need to make a class for us, so we replace the existing build results
|
res.classes[0].attributes[dtype] = slot
|
||||||
res = BuildResult(slots=[this_slot])
|
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def handle_listlike(self, res:BuildResult) -> BuildResult:
|
class MapArraylike(DatasetMap):
|
||||||
"""
|
"""
|
||||||
Handle cases where the dataset is just a list of a specific type.
|
Datasets without any additional attributes don't create their own subclass,
|
||||||
|
they're just an array :).
|
||||||
Examples:
|
|
||||||
|
|
||||||
datasets:
|
|
||||||
- name: file_create_date
|
|
||||||
dtype: isodatetime
|
|
||||||
dims:
|
|
||||||
- num_modifications
|
|
||||||
shape:
|
|
||||||
- null
|
|
||||||
|
|
||||||
|
Replace the base class with the array class, and make a slot that refers to it.
|
||||||
"""
|
"""
|
||||||
if self.cls.name and len(self.cls.attributes) == 0 and ((
|
@classmethod
|
||||||
# single-layer list
|
def check(c, cls:Dataset) -> bool:
|
||||||
not any([isinstance(dim, list) for dim in self.cls.dims]) and
|
if cls.name and all([cls.dims, cls.shape]) and not has_attrs(cls):
|
||||||
len(self.cls.dims) == 1
|
return True
|
||||||
) or (
|
else:
|
||||||
# nested list
|
return False
|
||||||
all([isinstance(dim, list) for dim in self.cls.dims]) and
|
|
||||||
len(self.cls.dims) == 1 and
|
@classmethod
|
||||||
len(self.cls.dims[0]) == 1
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
)):
|
array_class = make_arraylike(cls, name)
|
||||||
|
name = camel_to_snake(cls.name)
|
||||||
res = BuildResult(
|
res = BuildResult(
|
||||||
slots=[
|
slots=[
|
||||||
SlotDefinition(
|
SlotDefinition(
|
||||||
name = self.cls.name,
|
name=name,
|
||||||
multivalued=True,
|
multivalued=False,
|
||||||
range=self.handle_dtype(self.cls.dtype),
|
range=array_class.name,
|
||||||
description=self.cls.doc,
|
description=cls.doc,
|
||||||
required=False if self.cls.quantity in ('*', '?') else True
|
required=False if cls.quantity in ('*', '?') else True
|
||||||
)
|
)
|
||||||
]
|
],
|
||||||
|
classes=[array_class]
|
||||||
)
|
)
|
||||||
return res
|
return res
|
||||||
else:
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
class MapArrayLikeAttributes(DatasetMap):
|
||||||
def handle_arraylike(self, res: BuildResult, dataset: Dataset, name: Optional[str] = None) -> BuildResult:
|
|
||||||
"""
|
"""
|
||||||
Handling the
|
The most general case - treat everything that isn't handled by one of the special cases
|
||||||
|
as an array!
|
||||||
|
|
||||||
- dims
|
Specifically, we make an ``Arraylike`` class such that:
|
||||||
- shape
|
|
||||||
- dtype
|
|
||||||
|
|
||||||
fields as they are used in datasets. We'll use the :class:`.Arraylike` class to imitate them.
|
|
||||||
|
|
||||||
Specifically:
|
|
||||||
|
|
||||||
- Each slot within a subclass indicates a possible dimension.
|
- Each slot within a subclass indicates a possible dimension.
|
||||||
- Only dimensions that are present in all the dimension specifiers in the
|
- Only dimensions that are present in all the dimension specifiers in the
|
||||||
|
@ -162,32 +155,39 @@ class DatasetAdapter(ClassAdapter):
|
||||||
- Shape requirements are indicated using max/min cardinalities on the slot.
|
- Shape requirements are indicated using max/min cardinalities on the slot.
|
||||||
- The arraylike object should be stored in the `array` slot on the containing class
|
- The arraylike object should be stored in the `array` slot on the containing class
|
||||||
(since there are already properties named `data`)
|
(since there are already properties named `data`)
|
||||||
|
|
||||||
If any of `dims`, `shape`, or `dtype` are undefined, return `None`
|
|
||||||
|
|
||||||
Args:
|
|
||||||
dataset (:class:`nwb_schema_language.Dataset`): The dataset defining the arraylike
|
|
||||||
name (str): If present, override the name of the class before appending _Array
|
|
||||||
(we don't use _get_full_name here because we want to eventually decouple these functions from this adapter
|
|
||||||
class, which is sort of a development crutch. Ideally all these methods would just work on base nwb schema language types)
|
|
||||||
"""
|
"""
|
||||||
if not any((dataset.dims, dataset.shape)):
|
NEEDS_NAME = True
|
||||||
# none of the required properties are defined, that's fine.
|
|
||||||
return res
|
@classmethod
|
||||||
elif not all((dataset.dims, dataset.shape)):
|
def check(c, cls:Dataset) -> bool:
|
||||||
# need to have both if one is present!
|
dtype = ClassAdapter.handle_dtype(cls.dtype)
|
||||||
warnings.warn(f"A dataset needs both dims and shape to define an arraylike object. This is allowed for compatibility with some badly formatted NWB files, but should in general be avoided. Treating like we dont have an array")
|
if all([cls.dims, cls.shape]) and \
|
||||||
return res
|
cls.neurodata_type_inc != 'VectorData' and \
|
||||||
|
has_attrs(cls) and \
|
||||||
# Special cases
|
(dtype is 'AnyType' or dtype in flat_to_linkml):
|
||||||
if dataset.neurodata_type_inc == 'VectorData':
|
return True
|
||||||
# Handle this in `handle_vectorlike` instead
|
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
|
array_class = make_arraylike(cls, name)
|
||||||
|
# make a slot for the arraylike class
|
||||||
|
array_slot = SlotDefinition(
|
||||||
|
name='array',
|
||||||
|
range=array_class.name
|
||||||
|
)
|
||||||
|
|
||||||
|
res.classes.append(array_class)
|
||||||
|
res.classes[0].attributes.update({'array': array_slot})
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def make_arraylike(cls:Dataset, name:Optional[str] = None) -> ClassDefinition:
|
||||||
# The schema language doesn't have a way of specifying a dataset/group is "abstract"
|
# The schema language doesn't have a way of specifying a dataset/group is "abstract"
|
||||||
# and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
|
# and yet hdmf-common says you don't need a dtype if the dataset is "abstract"
|
||||||
# so....
|
# so....
|
||||||
dtype = self.handle_dtype(dataset.dtype)
|
dtype = ClassAdapter.handle_dtype(cls.dtype)
|
||||||
|
|
||||||
# dims and shape are lists of lists. First we couple them
|
# dims and shape are lists of lists. First we couple them
|
||||||
# (so each dim has its corresponding shape)..
|
# (so each dim has its corresponding shape)..
|
||||||
|
@ -195,7 +195,7 @@ class DatasetAdapter(ClassAdapter):
|
||||||
# (dicts are ordered by default in recent pythons,
|
# (dicts are ordered by default in recent pythons,
|
||||||
# while set() doesn't preserve order)
|
# while set() doesn't preserve order)
|
||||||
dims_shape = []
|
dims_shape = []
|
||||||
for inner_dim, inner_shape in zip(dataset.dims, dataset.shape):
|
for inner_dim, inner_shape in zip(cls.dims, cls.shape):
|
||||||
if isinstance(inner_dim, list):
|
if isinstance(inner_dim, list):
|
||||||
# list of lists
|
# list of lists
|
||||||
dims_shape.extend([(dim, shape) for dim, shape in zip(inner_dim, inner_shape)])
|
dims_shape.extend([(dim, shape) for dim, shape in zip(inner_dim, inner_shape)])
|
||||||
|
@ -208,20 +208,6 @@ class DatasetAdapter(ClassAdapter):
|
||||||
|
|
||||||
dims_shape = tuple(dict.fromkeys(dims_shape).keys())
|
dims_shape = tuple(dict.fromkeys(dims_shape).keys())
|
||||||
|
|
||||||
# if we only have one possible dimension, it's equivalent to a list, so we just return the slot
|
|
||||||
# if len(dims_shape) == 1 and self.parent:
|
|
||||||
# quantity = QUANTITY_MAP[dataset.quantity]
|
|
||||||
# slot = SlotDefinition(
|
|
||||||
# name=dataset.name,
|
|
||||||
# range=dtype,
|
|
||||||
# description=dataset.doc,
|
|
||||||
# required=quantity['required'],
|
|
||||||
# multivalued=True
|
|
||||||
# )
|
|
||||||
# res.classes[0].attributes.update({dataset.name: slot})
|
|
||||||
# self._handlers.append('arraylike-1d')
|
|
||||||
# return res
|
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# SPECIAL CASE - allen institute's ndx-aibs-ecephys.extension
|
# SPECIAL CASE - allen institute's ndx-aibs-ecephys.extension
|
||||||
# confuses "dims" with "shape" , eg shape = [None], dims = [3].
|
# confuses "dims" with "shape" , eg shape = [None], dims = [3].
|
||||||
|
@ -230,15 +216,14 @@ class DatasetAdapter(ClassAdapter):
|
||||||
if len(dims_shape) == 1 and isinstance(dims_shape[0][0], int) and dims_shape[0][1] is None:
|
if len(dims_shape) == 1 and isinstance(dims_shape[0][0], int) and dims_shape[0][1] is None:
|
||||||
dims_shape = (('dim', dims_shape[0][0]),)
|
dims_shape = (('dim', dims_shape[0][0]),)
|
||||||
|
|
||||||
|
|
||||||
# now make slots for each of them
|
# now make slots for each of them
|
||||||
slots = []
|
slots = []
|
||||||
for dims, shape in dims_shape:
|
for dims, shape in dims_shape:
|
||||||
# if there is just a single list of possible dimensions, it's required
|
# if there is just a single list of possible dimensions, it's required
|
||||||
if not any([isinstance(inner_dim, list) for inner_dim in dataset.dims]):
|
if not any([isinstance(inner_dim, list) for inner_dim in cls.dims]):
|
||||||
required = True
|
required = True
|
||||||
# if a dim is present in all possible combinations of dims, make it required
|
# if a dim is present in all possible combinations of dims, make it required
|
||||||
elif all([dims in inner_dim for inner_dim in dataset.dims]):
|
elif all([dims in inner_dim for inner_dim in cls.dims]):
|
||||||
required = True
|
required = True
|
||||||
else:
|
else:
|
||||||
required = False
|
required = False
|
||||||
|
@ -260,10 +245,10 @@ class DatasetAdapter(ClassAdapter):
|
||||||
# and then the class is just a subclass of `Arraylist` (which is imported by default from `nwb.language.yaml`)
|
# and then the class is just a subclass of `Arraylist` (which is imported by default from `nwb.language.yaml`)
|
||||||
if name:
|
if name:
|
||||||
pass
|
pass
|
||||||
elif dataset.neurodata_type_def:
|
elif cls.neurodata_type_def:
|
||||||
name = dataset.neurodata_type_def
|
name = cls.neurodata_type_def
|
||||||
elif dataset.name:
|
elif cls.name:
|
||||||
name = dataset.name
|
name = cls.name
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Dataset has no name or type definition, what do call it?")
|
raise ValueError(f"Dataset has no name or type definition, what do call it?")
|
||||||
|
|
||||||
|
@ -274,58 +259,108 @@ class DatasetAdapter(ClassAdapter):
|
||||||
is_a="Arraylike",
|
is_a="Arraylike",
|
||||||
attributes=slots
|
attributes=slots
|
||||||
)
|
)
|
||||||
# make a slot for the arraylike class
|
return array_class
|
||||||
array_slot = SlotDefinition(
|
|
||||||
name='array',
|
|
||||||
range=array_class.name
|
|
||||||
)
|
|
||||||
|
|
||||||
res.classes.append(array_class)
|
def is_1d(cls:Dataset) -> bool:
|
||||||
res.classes[0].attributes.update({'array': array_slot})
|
if (
|
||||||
#res.slots.append(array_slot)
|
not any([isinstance(dim, list) for dim in cls.dims]) and
|
||||||
self._handlers.append('arraylike')
|
len(cls.dims) == 1
|
||||||
|
) or ( # nested list
|
||||||
|
all([isinstance(dim, list) for dim in cls.dims]) and
|
||||||
|
len(cls.dims) == 1 and
|
||||||
|
len(cls.dims[0]) == 1
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
return res
|
def has_attrs(cls:Dataset) -> bool:
|
||||||
|
if len(cls.attributes) > 0 and \
|
||||||
|
all([not a.value for a in cls.attributes]):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
def drop_dynamic_table(self, res:BuildResult) -> BuildResult:
|
# --------------------------------------------------
|
||||||
|
# DynamicTable special cases
|
||||||
|
# --------------------------------------------------
|
||||||
|
|
||||||
|
class Map1DVector(DatasetMap):
|
||||||
"""
|
"""
|
||||||
DynamicTables in hdmf are so special-cased that we have to just special-case them ourselves.
|
``VectorData`` is subclassed with a name but without dims or attributes, treat this as a normal 1D array
|
||||||
|
slot that replaces any class that would be built for this
|
||||||
Typically they include a '*' quantitied, unnamed VectorData object to contain arbitrary columns,
|
|
||||||
this would normally get converted to its own container class, but since they're unnamed they conflict with
|
|
||||||
names in the containing scope.
|
|
||||||
|
|
||||||
We just convert them into multivalued slots and don't use them
|
|
||||||
"""
|
"""
|
||||||
if self.cls.name is None and \
|
@classmethod
|
||||||
self.cls.neurodata_type_def is None and \
|
def check(c, cls:Dataset) -> bool:
|
||||||
self.cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \
|
if cls.neurodata_type_inc == 'VectorData' and \
|
||||||
self.cls.quantity == '*':
|
not cls.dims and \
|
||||||
self._handlers.append('dynamic_table')
|
not cls.shape and \
|
||||||
|
not cls.attributes \
|
||||||
|
and cls.name:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
this_slot = SlotDefinition(
|
this_slot = SlotDefinition(
|
||||||
name=camel_to_snake(self.cls.neurodata_type_inc),
|
name=cls.name,
|
||||||
description=self.cls.doc,
|
description=cls.doc,
|
||||||
range=self.cls.neurodata_type_inc,
|
range=ClassAdapter.handle_dtype(cls.dtype),
|
||||||
required=False,
|
|
||||||
multivalued=True
|
multivalued=True
|
||||||
)
|
)
|
||||||
# No need to make a class for us, so we replace the existing build results
|
# No need to make a class for us, so we replace the existing build results
|
||||||
res = BuildResult(slots=[this_slot])
|
res = BuildResult(slots=[this_slot])
|
||||||
return res
|
return res
|
||||||
elif self.cls.name is None and \
|
|
||||||
self.cls.neurodata_type_def is None and \
|
class MapNVectors(DatasetMap):
|
||||||
self.cls.neurodata_type_inc and \
|
"""
|
||||||
self.cls.quantity in ('*', '+'):
|
An unnamed container that indicates an arbitrary quantity of some other neurodata type.
|
||||||
self._handlers.append('generic_container')
|
|
||||||
|
Most commonly: ``VectorData`` is subclassed without a name and with a '*' quantity to indicate
|
||||||
|
arbitrary columns.
|
||||||
|
"""
|
||||||
|
@classmethod
|
||||||
|
def check(c, cls:Dataset) -> bool:
|
||||||
|
if cls.name is None and \
|
||||||
|
cls.neurodata_type_def is None and \
|
||||||
|
cls.neurodata_type_inc and \
|
||||||
|
cls.quantity in ('*', '+'):
|
||||||
|
#cls.neurodata_type_inc in ('VectorIndex', 'VectorData') and \
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def apply(c, res: BuildResult, cls:Dataset, name:Optional[str] = None) -> BuildResult:
|
||||||
this_slot = SlotDefinition(
|
this_slot = SlotDefinition(
|
||||||
name=camel_to_snake(self.cls.neurodata_type_inc),
|
name=camel_to_snake(cls.neurodata_type_inc),
|
||||||
description=self.cls.doc,
|
description=cls.doc,
|
||||||
range=self.cls.neurodata_type_inc,
|
range=cls.neurodata_type_inc,
|
||||||
**QUANTITY_MAP[self.cls.quantity]
|
**QUANTITY_MAP[cls.quantity]
|
||||||
)
|
)
|
||||||
# No need to make a class for us, so we replace the existing build results
|
# No need to make a class for us, so we replace the existing build results
|
||||||
res = BuildResult(slots=[this_slot])
|
res = BuildResult(slots=[this_slot])
|
||||||
return res
|
return res
|
||||||
else:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetAdapter(ClassAdapter):
|
||||||
|
cls: Dataset
|
||||||
|
|
||||||
|
def build(self) -> BuildResult:
|
||||||
|
res = self.build_base()
|
||||||
|
|
||||||
|
# find a map to use
|
||||||
|
matches = [m for m in DatasetMap.__subclasses__() if m.check(self.cls)]
|
||||||
|
|
||||||
|
if len(matches) > 1:
|
||||||
|
raise RuntimeError(f"Only one map should apply to a dataset, you need to refactor the maps! Got maps: {matches}")
|
||||||
|
|
||||||
|
# apply matching maps
|
||||||
|
for m in matches:
|
||||||
|
res = m.apply(res, self.cls, self._get_full_name())
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,6 @@ from tqdm import tqdm
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from nwb_linkml.maps.hdf5 import H5SourceItem, flatten_hdf, ReadPhases, ReadQueue
|
from nwb_linkml.maps.hdf5 import H5SourceItem, flatten_hdf, ReadPhases, ReadQueue
|
||||||
from nwb_linkml.translate import generate_from_nwbfile
|
|
||||||
#from nwb_linkml.models.core_nwb_file import NWBFile
|
#from nwb_linkml.models.core_nwb_file import NWBFile
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from nwb_linkml.models import NWBFile
|
from nwb_linkml.models import NWBFile
|
||||||
|
@ -50,12 +49,6 @@ class HDF5IO():
|
||||||
self.path = Path(path)
|
self.path = Path(path)
|
||||||
self._modules: Dict[str, ModuleType] = {}
|
self._modules: Dict[str, ModuleType] = {}
|
||||||
|
|
||||||
@property
|
|
||||||
def modules(self) -> Dict[str, ModuleType]:
|
|
||||||
if len(self._modules) == 0:
|
|
||||||
self._modules = generate_from_nwbfile(self.path)
|
|
||||||
return self._modules
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def read(self, path:None) -> 'NWBFile': ...
|
def read(self, path:None) -> 'NWBFile': ...
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ import yaml
|
||||||
|
|
||||||
from nwb_schema_language import Namespaces, Group, Dataset
|
from nwb_schema_language import Namespaces, Group, Dataset
|
||||||
from nwb_linkml.providers.git import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
|
from nwb_linkml.providers.git import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
|
||||||
from nwb_linkml.map import PHASES, Map
|
from nwb_linkml.maps.postload import PHASES, KeyMap, apply_postload
|
||||||
from nwb_linkml.adapters.namespaces import NamespacesAdapter
|
from nwb_linkml.adapters.namespaces import NamespacesAdapter
|
||||||
from nwb_linkml.adapters.schema import SchemaAdapter
|
from nwb_linkml.adapters.schema import SchemaAdapter
|
||||||
|
|
||||||
|
@ -18,11 +18,7 @@ from nwb_linkml.adapters.schema import SchemaAdapter
|
||||||
def load_yaml(path:Path) -> dict:
|
def load_yaml(path:Path) -> dict:
|
||||||
with open(path, 'r') as file:
|
with open(path, 'r') as file:
|
||||||
ns_dict = yaml.safe_load(file)
|
ns_dict = yaml.safe_load(file)
|
||||||
|
ns_dict = apply_postload(ns_dict)
|
||||||
# apply maps
|
|
||||||
maps = [m for m in Map.instances if m.phase == PHASES.postload]
|
|
||||||
for amap in maps:
|
|
||||||
ns_dict = amap.apply(ns_dict)
|
|
||||||
return ns_dict
|
return ns_dict
|
||||||
|
|
||||||
def _load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
|
def _load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
|
||||||
|
@ -38,10 +34,7 @@ def _load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
|
||||||
def load_schema_file(path:Path, yaml:Optional[dict] = None) -> SchemaAdapter:
|
def load_schema_file(path:Path, yaml:Optional[dict] = None) -> SchemaAdapter:
|
||||||
if yaml is not None:
|
if yaml is not None:
|
||||||
source = yaml
|
source = yaml
|
||||||
# apply maps
|
source = apply_postload(source)
|
||||||
maps = [m for m in Map.instances if m.phase == PHASES.postload]
|
|
||||||
for amap in maps:
|
|
||||||
source = amap.apply(source)
|
|
||||||
else:
|
else:
|
||||||
source = load_yaml(path)
|
source = load_yaml(path)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# Import everything so it's defined, but shoudlnt' necessarily be used from here
|
# Import everything so it's defined, but shoudlnt' necessarily be used from here
|
||||||
from nwb_linkml.maps.preload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
|
from nwb_linkml.maps.map import Map
|
||||||
|
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
|
||||||
from nwb_linkml.maps.quantity import QUANTITY_MAP
|
from nwb_linkml.maps.quantity import QUANTITY_MAP
|
||||||
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping
|
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_npytyping
|
|
@ -6,7 +6,7 @@ so we will make our own mapping class here and re-evaluate whether they should b
|
||||||
"""
|
"""
|
||||||
import datetime
|
import datetime
|
||||||
import pdb
|
import pdb
|
||||||
from abc import ABC, abstractmethod
|
from abc import abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal, List, Dict, Optional, Type, Union, Tuple
|
from typing import Literal, List, Dict, Optional, Type, Union, Tuple
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ from enum import StrEnum
|
||||||
from pydantic import BaseModel, Field, ConfigDict
|
from pydantic import BaseModel, Field, ConfigDict
|
||||||
|
|
||||||
from nwb_linkml.providers.schema import SchemaProvider
|
from nwb_linkml.providers.schema import SchemaProvider
|
||||||
|
from nwb_linkml.maps import Map
|
||||||
from nwb_linkml.maps.hdmf import dynamictable_to_model
|
from nwb_linkml.maps.hdmf import dynamictable_to_model
|
||||||
from nwb_linkml.types.hdf5 import HDF5_Path
|
from nwb_linkml.types.hdf5 import HDF5_Path
|
||||||
from nwb_linkml.types.ndarray import NDArrayProxy
|
from nwb_linkml.types.ndarray import NDArrayProxy
|
||||||
|
@ -115,7 +116,7 @@ class H5ReadResult(BaseModel):
|
||||||
FlatH5 = Dict[str, H5SourceItem]
|
FlatH5 = Dict[str, H5SourceItem]
|
||||||
|
|
||||||
|
|
||||||
class HDF5Map(ABC):
|
class HDF5Map(Map):
|
||||||
phase: ReadPhases
|
phase: ReadPhases
|
||||||
priority: int = 0
|
priority: int = 0
|
||||||
"""
|
"""
|
||||||
|
|
21
nwb_linkml/src/nwb_linkml/maps/map.py
Normal file
21
nwb_linkml/src/nwb_linkml/maps/map.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
from typing import Any
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
|
class Map(ABC):
|
||||||
|
"""
|
||||||
|
The generic top-level mapping class is just a classmethod for checking if the map applies and a
|
||||||
|
method for applying the check if it does
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def check(cls, *args, **kwargs) -> bool:
|
||||||
|
"""Check if this map applies to the given item to read"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def apply(cls, *args, **kwargs) -> Any:
|
||||||
|
"""Actually apply the map!"""
|
||||||
|
|
||||||
|
|
|
@ -1,24 +1,26 @@
|
||||||
|
"""
|
||||||
|
Maps to change the loaded .yaml from nwb schema before it's given to the nwb_schema_language models
|
||||||
|
"""
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import ClassVar, List, Optional
|
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
import ast
|
from typing import Optional, ClassVar, List
|
||||||
import re
|
import re
|
||||||
|
import ast
|
||||||
|
|
||||||
|
from nwb_linkml.maps import Map
|
||||||
|
|
||||||
class MAP_TYPES(StrEnum):
|
|
||||||
key = 'key'
|
|
||||||
"""Mapping the name of one key to another key"""
|
|
||||||
|
|
||||||
class SCOPE_TYPES(StrEnum):
|
class SCOPE_TYPES(StrEnum):
|
||||||
namespace = 'namespace'
|
namespace = 'namespace'
|
||||||
|
|
||||||
|
|
||||||
class PHASES(StrEnum):
|
class PHASES(StrEnum):
|
||||||
postload = "postload"
|
postload = "postload"
|
||||||
"""After the YAML for a model has been loaded"""
|
"""After the YAML for a model has been loaded"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Map:
|
class KeyMap():
|
||||||
scope: str
|
scope: str
|
||||||
"""The namespace that the map is relevant to"""
|
"""The namespace that the map is relevant to"""
|
||||||
scope_type: SCOPE_TYPES
|
scope_type: SCOPE_TYPES
|
||||||
|
@ -36,24 +38,11 @@ class Map:
|
||||||
phase: Optional[PHASES] = None
|
phase: Optional[PHASES] = None
|
||||||
|
|
||||||
|
|
||||||
instances: ClassVar[List['Map']] = []
|
instances: ClassVar[List['KeyMap']] = []
|
||||||
"""
|
"""
|
||||||
Maps that get defined!!!
|
Maps that get defined!!!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def apply(self):
|
|
||||||
raise NotImplementedError('do this in a subclass')
|
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
self.instances.append(self)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# def replace_keys(input: dict, source: str, target: str) -> dict:
|
|
||||||
# """Recursively change keys in a dictionary"""
|
|
||||||
|
|
||||||
|
|
||||||
class KeyMap(Map):
|
|
||||||
def apply(self, input: dict) -> dict:
|
def apply(self, input: dict) -> dict:
|
||||||
"""
|
"""
|
||||||
Change all keys from source to target in a super naive way.
|
Change all keys from source to target in a super naive way.
|
||||||
|
@ -65,9 +54,34 @@ class KeyMap(Map):
|
||||||
out = ast.literal_eval(input_str)
|
out = ast.literal_eval(input_str)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
self.instances.append(self)
|
||||||
|
|
||||||
def apply_preload(ns_dict) -> dict:
|
|
||||||
maps = [m for m in Map.instances if m.phase == PHASES.postload]
|
MAP_HDMF_DATATYPE_DEF = KeyMap(
|
||||||
|
source="\'data_type_def\'",
|
||||||
|
target="\'neurodata_type_def\'",
|
||||||
|
scope='hdmf-common',
|
||||||
|
scope_type=SCOPE_TYPES.namespace,
|
||||||
|
phase=PHASES.postload
|
||||||
|
)
|
||||||
|
|
||||||
|
MAP_HDMF_DATATYPE_INC = KeyMap(
|
||||||
|
source="\'data_type_inc\'",
|
||||||
|
target="\'neurodata_type_inc\'",
|
||||||
|
scope='hdmf-common',
|
||||||
|
scope_type=SCOPE_TYPES.namespace,
|
||||||
|
phase=PHASES.postload
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MAP_TYPES(StrEnum):
|
||||||
|
key = 'key'
|
||||||
|
"""Mapping the name of one key to another key"""
|
||||||
|
|
||||||
|
|
||||||
|
def apply_postload(ns_dict) -> dict:
|
||||||
|
maps = [m for m in KeyMap.instances if m.phase == PHASES.postload]
|
||||||
for amap in maps:
|
for amap in maps:
|
||||||
ns_dict = amap.apply(ns_dict)
|
ns_dict = amap.apply(ns_dict)
|
||||||
return ns_dict
|
return ns_dict
|
|
@ -1,22 +0,0 @@
|
||||||
"""
|
|
||||||
Maps to change the loaded .yaml from nwb schema before it's given to the nwb_schema_language models
|
|
||||||
"""
|
|
||||||
|
|
||||||
from nwb_linkml.map import KeyMap, SCOPE_TYPES, PHASES
|
|
||||||
|
|
||||||
MAP_HDMF_DATATYPE_DEF = KeyMap(
|
|
||||||
source="\'data_type_def\'",
|
|
||||||
target="\'neurodata_type_def\'",
|
|
||||||
scope='hdmf-common',
|
|
||||||
scope_type=SCOPE_TYPES.namespace,
|
|
||||||
phase=PHASES.postload
|
|
||||||
)
|
|
||||||
|
|
||||||
MAP_HDMF_DATATYPE_INC = KeyMap(
|
|
||||||
source="\'data_type_inc\'",
|
|
||||||
target="\'neurodata_type_inc\'",
|
|
||||||
scope='hdmf-common',
|
|
||||||
scope_type=SCOPE_TYPES.namespace,
|
|
||||||
phase=PHASES.postload
|
|
||||||
)
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ from linkml_runtime.dumpers import yaml_dumper
|
||||||
from nwb_schema_language import Namespaces
|
from nwb_schema_language import Namespaces
|
||||||
from nwb_linkml.io.schema import load_schema_file
|
from nwb_linkml.io.schema import load_schema_file
|
||||||
from nwb_linkml.generators.pydantic import NWBPydanticGenerator
|
from nwb_linkml.generators.pydantic import NWBPydanticGenerator
|
||||||
from nwb_linkml.map import apply_preload
|
from nwb_linkml.maps.postload import apply_preload
|
||||||
from nwb_linkml.adapters import SchemaAdapter, NamespacesAdapter
|
from nwb_linkml.adapters import SchemaAdapter, NamespacesAdapter
|
||||||
#from nwb_linkml.models import core, hdmf_common
|
#from nwb_linkml.models import core, hdmf_common
|
||||||
|
|
||||||
|
|
|
@ -2,11 +2,11 @@ import pdb
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from nwb_linkml.translate import generate_from_nwbfile
|
#from nwb_linkml.translate import generate_from_nwbfile
|
||||||
|
|
||||||
def test_generate_pydantic():
|
# def test_generate_pydantic():
|
||||||
# pass until we rig up smaller test data
|
# # pass until we rig up smaller test data
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
#NWBFILE = Path('/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb')
|
#NWBFILE = Path('/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb')
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue