mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2024-11-15 03:04:30 +00:00
a bit of tidying
This commit is contained in:
parent
77a852913c
commit
9560b9f839
6 changed files with 35 additions and 197 deletions
|
@ -4,10 +4,23 @@ Base class for adapters
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod, ABC
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from typing import Any, Generator, List, Literal, Optional, Tuple, Type, TypeVar, Union, overload
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Generator,
|
||||||
|
List,
|
||||||
|
Literal,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
|
TypeVar,
|
||||||
|
Union,
|
||||||
|
overload,
|
||||||
|
Sequence,
|
||||||
|
Mapping,
|
||||||
|
)
|
||||||
|
|
||||||
from linkml_runtime.dumpers import yaml_dumper
|
from linkml_runtime.dumpers import yaml_dumper
|
||||||
from linkml_runtime.linkml_model import (
|
from linkml_runtime.linkml_model import (
|
||||||
|
@ -273,6 +286,23 @@ class Adapter(BaseModel):
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
|
|
||||||
|
class Map(ABC):
|
||||||
|
"""
|
||||||
|
The generic top-level mapping class is just a classmethod for checking if the map applies and a
|
||||||
|
method for applying the check if it does
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def check(cls, *args: Sequence, **kwargs: Mapping) -> bool:
|
||||||
|
"""Check if this map applies to the given item to read"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def apply(cls, *args: Sequence, **kwargs: Mapping) -> Any:
|
||||||
|
"""Actually apply the map!"""
|
||||||
|
|
||||||
|
|
||||||
def is_1d(cls: Dataset | Attribute) -> bool:
|
def is_1d(cls: Dataset | Attribute) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if the values of a dataset are 1-dimensional.
|
Check if the values of a dataset are 1-dimensional.
|
||||||
|
|
|
@ -7,9 +7,8 @@ from typing import ClassVar, Optional, Type, TypedDict
|
||||||
|
|
||||||
from linkml_runtime.linkml_model.meta import SlotDefinition
|
from linkml_runtime.linkml_model.meta import SlotDefinition
|
||||||
|
|
||||||
from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d
|
from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d, Map
|
||||||
from nwb_linkml.adapters.array import ArrayAdapter
|
from nwb_linkml.adapters.array import ArrayAdapter
|
||||||
from nwb_linkml.maps import Map
|
|
||||||
from nwb_linkml.maps.dtype import handle_dtype, inlined
|
from nwb_linkml.maps.dtype import handle_dtype, inlined
|
||||||
from nwb_schema_language import Attribute
|
from nwb_schema_language import Attribute
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,10 @@ from typing import ClassVar, Optional, Type
|
||||||
|
|
||||||
from linkml_runtime.linkml_model.meta import ArrayExpression, SlotDefinition
|
from linkml_runtime.linkml_model.meta import ArrayExpression, SlotDefinition
|
||||||
|
|
||||||
from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound
|
from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound, Map
|
||||||
from nwb_linkml.adapters.array import ArrayAdapter
|
from nwb_linkml.adapters.array import ArrayAdapter
|
||||||
from nwb_linkml.adapters.classes import ClassAdapter
|
from nwb_linkml.adapters.classes import ClassAdapter
|
||||||
from nwb_linkml.maps import QUANTITY_MAP, Map
|
from nwb_linkml.maps import QUANTITY_MAP
|
||||||
from nwb_linkml.maps.dtype import flat_to_linkml, handle_dtype, inlined
|
from nwb_linkml.maps.dtype import flat_to_linkml, handle_dtype, inlined
|
||||||
from nwb_linkml.maps.naming import camel_to_snake
|
from nwb_linkml.maps.naming import camel_to_snake
|
||||||
from nwb_schema_language import Dataset
|
from nwb_schema_language import Dataset
|
||||||
|
|
|
@ -3,7 +3,6 @@ Mapping from one domain to another
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np, linkml_reprs
|
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np, linkml_reprs
|
||||||
from nwb_linkml.maps.map import Map
|
|
||||||
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
|
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
|
||||||
from nwb_linkml.maps.quantity import QUANTITY_MAP
|
from nwb_linkml.maps.quantity import QUANTITY_MAP
|
||||||
|
|
||||||
|
@ -11,7 +10,6 @@ __all__ = [
|
||||||
"MAP_HDMF_DATATYPE_DEF",
|
"MAP_HDMF_DATATYPE_DEF",
|
||||||
"MAP_HDMF_DATATYPE_INC",
|
"MAP_HDMF_DATATYPE_INC",
|
||||||
"QUANTITY_MAP",
|
"QUANTITY_MAP",
|
||||||
"Map",
|
|
||||||
"flat_to_linkml",
|
"flat_to_linkml",
|
||||||
"flat_to_np",
|
"flat_to_np",
|
||||||
"linkml_reprs",
|
"linkml_reprs",
|
||||||
|
|
|
@ -104,19 +104,6 @@ MAP_HDMF_DATATYPE_INC = KeyMap(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MAP_TYPES(StrEnum):
|
|
||||||
"""
|
|
||||||
Types of mapping that can exist
|
|
||||||
|
|
||||||
.. todo::
|
|
||||||
|
|
||||||
This is likely deprecated, check usage.
|
|
||||||
"""
|
|
||||||
|
|
||||||
key = "key"
|
|
||||||
"""Mapping the name of one key to another key"""
|
|
||||||
|
|
||||||
|
|
||||||
def apply_postload(ns_dict: dict) -> dict:
|
def apply_postload(ns_dict: dict) -> dict:
|
||||||
"""Apply all post-load maps to a YAML schema"""
|
"""Apply all post-load maps to a YAML schema"""
|
||||||
maps = [m for m in KeyMap.instances if m.phase == PHASES.postload]
|
maps = [m for m in KeyMap.instances if m.phase == PHASES.postload]
|
||||||
|
|
|
@ -1,176 +0,0 @@
|
||||||
"""
|
|
||||||
Pydantic models that behave like pandas dataframes
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
This is currently unused but kept in place as a stub in case it is worth
|
|
||||||
revisiting in the future.
|
|
||||||
It turned out to be too momentarily difficult to make lazy-loading work with
|
|
||||||
dask arrays per column
|
|
||||||
while still keeping pandas-like API intact. In the future we should investigate modifying the
|
|
||||||
:func:`dask.dataframe.read_hdf` function to treat individual hdf5 datasets like columns
|
|
||||||
|
|
||||||
pandas has been removed from dependencies for now, as it not used elsewhere, but it is
|
|
||||||
left in this module since it is necessary for it to make sense.
|
|
||||||
"""
|
|
||||||
|
|
||||||
#
|
|
||||||
# class DataFrame(BaseModel, pd.DataFrame):
|
|
||||||
# """
|
|
||||||
# Pydantic model root class that mimics a pandas dataframe.
|
|
||||||
#
|
|
||||||
# Notes:
|
|
||||||
#
|
|
||||||
# The synchronization between the underlying lists in the pydantic model
|
|
||||||
# and the derived dataframe is partial, and at the moment unidirectional.
|
|
||||||
# This class is primarily intended for reading from tables stored in
|
|
||||||
# NWB files rather than being able to manipulate them.
|
|
||||||
#
|
|
||||||
# The dataframe IS updated when new values are *assigned* to a field.
|
|
||||||
#
|
|
||||||
# eg.::
|
|
||||||
#
|
|
||||||
# MyModel.fieldval = [1,2,3]
|
|
||||||
#
|
|
||||||
# But the dataframe is NOT updated when existing values are updated.
|
|
||||||
#
|
|
||||||
# eg.::
|
|
||||||
#
|
|
||||||
# MyModel.fieldval.append(4)
|
|
||||||
#
|
|
||||||
# In that case you need to call :meth:`.update_df` manually.
|
|
||||||
#
|
|
||||||
# Additionally, if the dataframe is modified, the underlying lists are NOT updated,
|
|
||||||
# but when the model is dumped to a dictionary or serialized, the dataframe IS used,
|
|
||||||
# so changes will be reflected then.
|
|
||||||
#
|
|
||||||
# Fields that shadow pandas methods WILL prevent them from being usable, except
|
|
||||||
# by directly accessing the dataframe like ``mymodel._df``
|
|
||||||
#
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# _df: pd.DataFrame = None
|
|
||||||
# model_config = ConfigDict(validate_assignment=True)
|
|
||||||
#
|
|
||||||
# def __init__(self, **kwargs):
|
|
||||||
# # pdb.set_trace()
|
|
||||||
# super().__init__(**kwargs)
|
|
||||||
#
|
|
||||||
# self._df = self.__make_df()
|
|
||||||
#
|
|
||||||
# def __make_df(self) -> pd.DataFrame:
|
|
||||||
# # make dict that can handle ragged arrays and NoneTypes
|
|
||||||
# items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
|
|
||||||
#
|
|
||||||
# df_dict = {
|
|
||||||
# k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
|
|
||||||
# }
|
|
||||||
# df = pd.DataFrame(df_dict)
|
|
||||||
# # replace Nans with None
|
|
||||||
# df = df.fillna(np.nan).replace([np.nan], [None])
|
|
||||||
# return df
|
|
||||||
#
|
|
||||||
# def update_df(self) -> None:
|
|
||||||
# """
|
|
||||||
# Update the internal dataframe in the case that the model values are changed
|
|
||||||
# in a way that we can't detect, like appending to one of the lists.
|
|
||||||
#
|
|
||||||
# """
|
|
||||||
# self._df = self.__make_df()
|
|
||||||
#
|
|
||||||
# def __getattr__(self, item: str):
|
|
||||||
# """
|
|
||||||
# Mimic pandas dataframe and pydantic model behavior
|
|
||||||
# """
|
|
||||||
# if item in ("df", "_df"):
|
|
||||||
# return self.__pydantic_private__["_df"]
|
|
||||||
# elif item in self.model_fields:
|
|
||||||
# return self._df[item]
|
|
||||||
# else:
|
|
||||||
# try:
|
|
||||||
# return object.__getattribute__(self._df, item)
|
|
||||||
# except AttributeError:
|
|
||||||
# return object.__getattribute__(self, item)
|
|
||||||
#
|
|
||||||
# @model_validator(mode="after")
|
|
||||||
# def recreate_df(self) -> None:
|
|
||||||
# """
|
|
||||||
# Remake DF when validating (eg. when updating values on assignment)
|
|
||||||
# """
|
|
||||||
# self.update_df()
|
|
||||||
#
|
|
||||||
# @model_serializer(mode="wrap", when_used="always")
|
|
||||||
# def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
|
|
||||||
# """
|
|
||||||
# We don't handle values that are changed on the dataframe by directly
|
|
||||||
# updating the underlying model lists, but we implicitly handle them
|
|
||||||
# by using the dataframe as the source when serializing
|
|
||||||
# """
|
|
||||||
# if self._df is None:
|
|
||||||
# return nxt(self)
|
|
||||||
# else:
|
|
||||||
# out = self._df.to_dict("list")
|
|
||||||
# # remove Nones
|
|
||||||
# out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
|
|
||||||
# return nxt(self.__class__(**out))
|
|
||||||
|
|
||||||
#
|
|
||||||
# def dynamictable_to_df(
|
|
||||||
# group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
|
|
||||||
# ) -> DataFrame:
|
|
||||||
# """Generate a dataframe from an NDB DynamicTable"""
|
|
||||||
# if model is None:
|
|
||||||
# model = model_from_dynamictable(group, base)
|
|
||||||
#
|
|
||||||
# items = {}
|
|
||||||
# for col, _col_type in model.model_fields.items():
|
|
||||||
# if col not in group:
|
|
||||||
# continue
|
|
||||||
# idxname = col + "_index"
|
|
||||||
# if idxname in group:
|
|
||||||
# idx = group.get(idxname)[:]
|
|
||||||
# data = group.get(col)[idx - 1]
|
|
||||||
# else:
|
|
||||||
# data = group.get(col)[:]
|
|
||||||
#
|
|
||||||
# # Handle typing inside of list
|
|
||||||
# if isinstance(data[0], bytes):
|
|
||||||
# data = data.astype("unicode")
|
|
||||||
# if isinstance(data[0], str):
|
|
||||||
# # lists and other compound data types can get flattened out to strings when stored
|
|
||||||
# # so we try and literal eval and recover them
|
|
||||||
# try:
|
|
||||||
# eval_type = type(ast.literal_eval(data[0]))
|
|
||||||
# except (ValueError, SyntaxError):
|
|
||||||
# eval_type = str
|
|
||||||
#
|
|
||||||
# # if we've found one of those, get the data type within it.
|
|
||||||
# if eval_type is not str:
|
|
||||||
# eval_list = []
|
|
||||||
# for item in data.tolist():
|
|
||||||
# try:
|
|
||||||
# eval_list.append(ast.literal_eval(item))
|
|
||||||
# except ValueError:
|
|
||||||
# eval_list.append(None)
|
|
||||||
# data = eval_list
|
|
||||||
# elif isinstance(data[0], h5py.h5r.Reference):
|
|
||||||
# data = [HDF5_Path(group[d].name) for d in data]
|
|
||||||
# elif isinstance(data[0], tuple) and any(
|
|
||||||
# [isinstance(d, h5py.h5r.Reference) for d in data[0]]
|
|
||||||
# ):
|
|
||||||
# # references stored inside a tuple, reference + location.
|
|
||||||
# # dereference them!?
|
|
||||||
# dset = group.get(col)
|
|
||||||
# names = dset.dtype.names
|
|
||||||
# if names is not None and names[0] == "idx_start" and names[1] == "count":
|
|
||||||
# data = dereference_reference_vector(dset, data)
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# data = data.tolist()
|
|
||||||
#
|
|
||||||
# # After list, check if we need to put this thing inside of
|
|
||||||
# # another class, as indicated by the enclosing model
|
|
||||||
#
|
|
||||||
# items[col] = data
|
|
||||||
#
|
|
||||||
# return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)
|
|
Loading…
Reference in a new issue