mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 14:14:27 +00:00
Correct schema for hdf5 path type
dont double MRO for basemodels in generated dynamictable
This commit is contained in:
parent
6bbf56d1a0
commit
eca7a5ec2e
4 changed files with 44 additions and 30 deletions
|
@ -20,7 +20,7 @@ Other TODO:
|
||||||
"""
|
"""
|
||||||
import pdb
|
import pdb
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Optional, Dict, overload, Type
|
from typing import Optional, Dict, overload, Type, Union
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
from typing import TYPE_CHECKING, NamedTuple
|
from typing import TYPE_CHECKING, NamedTuple
|
||||||
|
@ -58,7 +58,7 @@ class HDF5IO():
|
||||||
@overload
|
@overload
|
||||||
def read(self, path:str) -> BaseModel | Dict[str, BaseModel]: ...
|
def read(self, path:str) -> BaseModel | Dict[str, BaseModel]: ...
|
||||||
|
|
||||||
def read(self, path:Optional[str] = None):
|
def read(self, path:Optional[str] = None) -> Union['NWBFile', BaseModel, Dict[str, BaseModel]]:
|
||||||
print('starting read')
|
print('starting read')
|
||||||
provider = self.make_provider()
|
provider = self.make_provider()
|
||||||
print('provider made')
|
print('provider made')
|
||||||
|
@ -95,6 +95,8 @@ class HDF5IO():
|
||||||
|
|
||||||
|
|
||||||
queue.apply_phase(ReadPhases.construct)
|
queue.apply_phase(ReadPhases.construct)
|
||||||
|
|
||||||
|
pdb.set_trace()
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# FIXME: Hardcoding top-level file reading just for the win
|
# FIXME: Hardcoding top-level file reading just for the win
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
|
|
|
@ -4,7 +4,6 @@ Maps for reading and writing from HDF5
|
||||||
We have sort of diverged from the initial idea of a generalized map as in :class:`linkml.map.Map` ,
|
We have sort of diverged from the initial idea of a generalized map as in :class:`linkml.map.Map` ,
|
||||||
so we will make our own mapping class here and re-evaluate whether they should be unified later
|
so we will make our own mapping class here and re-evaluate whether they should be unified later
|
||||||
"""
|
"""
|
||||||
import pdb
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal, List, Dict, Optional, Type, Union
|
from typing import Literal, List, Dict, Optional, Type, Union
|
||||||
|
@ -12,8 +11,7 @@ from typing import Literal, List, Dict, Optional, Type, Union
|
||||||
import h5py
|
import h5py
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, ConfigDict, ValidationError
|
from pydantic import BaseModel, Field, ConfigDict
|
||||||
import dask.array as da
|
|
||||||
|
|
||||||
from nwb_linkml.providers.schema import SchemaProvider
|
from nwb_linkml.providers.schema import SchemaProvider
|
||||||
from nwb_linkml.maps.hdmf import dynamictable_to_model
|
from nwb_linkml.maps.hdmf import dynamictable_to_model
|
||||||
|
@ -30,7 +28,11 @@ class ReadPhases(StrEnum):
|
||||||
"""After reading, casting the results of the read into their models"""
|
"""After reading, casting the results of the read into their models"""
|
||||||
|
|
||||||
class H5SourceItem(BaseModel):
|
class H5SourceItem(BaseModel):
|
||||||
"""Tuple of items for each element when flattening an hdf5 file"""
|
"""
|
||||||
|
Descriptor of items for each element when :func:`.flatten_hdf` flattens an hdf5 file.
|
||||||
|
|
||||||
|
Consumed by :class:`.HDF5Map` classes, orchestrated by :class:`.ReadQueue`
|
||||||
|
"""
|
||||||
path: str
|
path: str
|
||||||
"""Absolute hdf5 path of element"""
|
"""Absolute hdf5 path of element"""
|
||||||
h5f_path: str
|
h5f_path: str
|
||||||
|
@ -55,7 +57,11 @@ class H5SourceItem(BaseModel):
|
||||||
return self.path.split('/')
|
return self.path.split('/')
|
||||||
|
|
||||||
class H5ReadResult(BaseModel):
|
class H5ReadResult(BaseModel):
|
||||||
"""Result returned by each of our mapping operations"""
|
"""
|
||||||
|
Result returned by each of our mapping operations.
|
||||||
|
|
||||||
|
Also used as the source for operations in the ``construct`` :class:`.ReadPhases`
|
||||||
|
"""
|
||||||
path: str
|
path: str
|
||||||
"""absolute hdf5 path of element"""
|
"""absolute hdf5 path of element"""
|
||||||
source: Union[H5SourceItem, 'H5ReadResult']
|
source: Union[H5SourceItem, 'H5ReadResult']
|
||||||
|
@ -78,9 +84,9 @@ class H5ReadResult(BaseModel):
|
||||||
"""
|
"""
|
||||||
The model that this item should be cast into
|
The model that this item should be cast into
|
||||||
"""
|
"""
|
||||||
completes: List[str] = Field(default_factory=list)
|
completes: List[HDF5_Path] = Field(default_factory=list)
|
||||||
"""
|
"""
|
||||||
If this result completes any other fields, we remove them from the build queue
|
If this result completes any other fields, we remove them from the build queue.
|
||||||
"""
|
"""
|
||||||
namespace: Optional[str] = None
|
namespace: Optional[str] = None
|
||||||
"""
|
"""
|
||||||
|
@ -92,7 +98,7 @@ class H5ReadResult(BaseModel):
|
||||||
"""
|
"""
|
||||||
applied: List[str] = Field(default_factory=list)
|
applied: List[str] = Field(default_factory=list)
|
||||||
"""
|
"""
|
||||||
Which stages were applied to this item
|
Which map operations were applied to this item
|
||||||
"""
|
"""
|
||||||
errors: List[str] = Field(default_factory=list)
|
errors: List[str] = Field(default_factory=list)
|
||||||
"""
|
"""
|
||||||
|
@ -109,19 +115,20 @@ FlatH5 = Dict[str, H5SourceItem]
|
||||||
|
|
||||||
class HDF5Map(ABC):
|
class HDF5Map(ABC):
|
||||||
phase: ReadPhases
|
phase: ReadPhases
|
||||||
"""
|
|
||||||
If ``True``, if the check is fulfilled, no other maps can be applied this phase
|
|
||||||
"""
|
|
||||||
priority: int = 0
|
priority: int = 0
|
||||||
|
"""
|
||||||
|
Within a phase, sort mapping operations from low to high priority
|
||||||
|
(maybe this should be renamed because highest priority last doesnt make a lot of sense)
|
||||||
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def check(cls, src: H5SourceItem, provider:SchemaProvider, completed: Dict[str, H5ReadResult]) -> bool:
|
def check(cls, src: H5SourceItem|H5ReadResult, provider:SchemaProvider, completed: Dict[str, H5ReadResult]) -> bool:
|
||||||
"""Check if this map applies to the given item to read"""
|
"""Check if this map applies to the given item to read"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def apply(cls, src: H5SourceItem, provider:SchemaProvider, completed: Dict[str, H5ReadResult]) -> H5ReadResult:
|
def apply(cls, src: H5SourceItem|H5ReadResult, provider:SchemaProvider, completed: Dict[str, H5ReadResult]) -> H5ReadResult:
|
||||||
"""Actually apply the map!"""
|
"""Actually apply the map!"""
|
||||||
|
|
||||||
|
|
||||||
|
@ -157,11 +164,10 @@ class ResolveDynamicTable(HDF5Map):
|
||||||
and then we include the datasets as :class:`~.nwb_linkml.types.ndarray.NDArrayProxy` objects which
|
and then we include the datasets as :class:`~.nwb_linkml.types.ndarray.NDArrayProxy` objects which
|
||||||
lazy load the arrays in a thread/process safe way.
|
lazy load the arrays in a thread/process safe way.
|
||||||
|
|
||||||
This map also resolves
|
This map also resolves the child elements, indicating so by the ``completes`` field in the :class:`.ReadResult`
|
||||||
"""
|
"""
|
||||||
phase = ReadPhases.read
|
phase = ReadPhases.read
|
||||||
priority = 1
|
priority = 1
|
||||||
exclusive = True
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check(cls, src: H5SourceItem, provider:SchemaProvider, completed: Dict[str, H5ReadResult]) -> bool:
|
def check(cls, src: H5SourceItem, provider:SchemaProvider, completed: Dict[str, H5ReadResult]) -> bool:
|
||||||
if src.h5_type == 'dataset':
|
if src.h5_type == 'dataset':
|
||||||
|
@ -191,7 +197,7 @@ class ResolveDynamicTable(HDF5Map):
|
||||||
base_model = provider.get_class(src.namespace, src.neurodata_type)
|
base_model = provider.get_class(src.namespace, src.neurodata_type)
|
||||||
model = dynamictable_to_model(obj, base=base_model)
|
model = dynamictable_to_model(obj, base=base_model)
|
||||||
|
|
||||||
completes = ['/'.join([src.path, child]) for child in obj.keys()]
|
completes = [HDF5_Path(child.name) for child in obj.values()]
|
||||||
|
|
||||||
return H5ReadResult(
|
return H5ReadResult(
|
||||||
path=src.path,
|
path=src.path,
|
||||||
|
|
|
@ -69,12 +69,12 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
|
||||||
#types[col] = (List[type_ | None], ...)
|
#types[col] = (List[type_ | None], ...)
|
||||||
types[col] = (type_, None)
|
types[col] = (type_, None)
|
||||||
|
|
||||||
if base is None:
|
# if base is None:
|
||||||
#base = DataFrame
|
# #base = DataFrame
|
||||||
base = BaseModel
|
# base = BaseModel
|
||||||
else:
|
# else:
|
||||||
base = (BaseModel, base)
|
# base = (BaseModel, base)
|
||||||
#base = (DataFrame, base)
|
# #base = (DataFrame, base)
|
||||||
|
|
||||||
|
|
||||||
model = create_model(group.name.split('/')[-1], **types, __base__=base)
|
model = create_model(group.name.split('/')[-1], **types, __base__=base)
|
||||||
|
@ -83,12 +83,12 @@ def model_from_dynamictable(group:h5py.Group, base:Optional[BaseModel] = None) -
|
||||||
|
|
||||||
def dynamictable_to_model(
|
def dynamictable_to_model(
|
||||||
group:h5py.Group,
|
group:h5py.Group,
|
||||||
model:Optional[Type[DataFrame]]=None,
|
model:Optional[Type[BaseModel]]=None,
|
||||||
base:Optional[BaseModel] = None) -> BaseModel:
|
base:Optional[Type[BaseModel]] = None) -> BaseModel:
|
||||||
"""
|
"""
|
||||||
Instantiate a dynamictable model
|
Instantiate a dynamictable model
|
||||||
|
|
||||||
Calls :func:`.model_from_dynamictable` if model is not provided.
|
Calls :func:`.model_from_dynamictable` if ``model`` is not provided.
|
||||||
"""
|
"""
|
||||||
if model is None:
|
if model is None:
|
||||||
model = model_from_dynamictable(group, base)
|
model = model_from_dynamictable(group, base)
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
from typing import Annotated
|
from typing import Any
|
||||||
|
from pydantic_core import CoreSchema, core_schema
|
||||||
|
from pydantic import GetCoreSchemaHandler
|
||||||
|
|
||||||
class HDF5_Path(str):
|
class HDF5_Path(str):
|
||||||
"""Trivial subclass of string to indicate that it is a reference to a location within an HDF5 file"""
|
"""Trivial subclass of string to indicate that it is a reference to a location within an HDF5 file"""
|
||||||
pass
|
@classmethod
|
||||||
|
def __get_pydantic_core_schema__(
|
||||||
|
cls, source_type: Any, handler: GetCoreSchemaHandler
|
||||||
|
) -> CoreSchema:
|
||||||
|
return core_schema.no_info_after_validator_function(cls, handler(str))
|
Loading…
Reference in a new issue