a bit of tidying

2024-11-15 03:04:30 +00:00 · 2024-10-03 00:09:43 -07:00 · 2024-10-03 00:09:43 -07:00 · 9560b9f839
commit 9560b9f839
parent 77a852913c
6 changed files with 35 additions and 197 deletions
--- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py
@ -4,10 +4,23 @@ Base class for adapters
 import os
 import sys
-from abc import abstractmethod
+from abc import abstractmethod, ABC
 from dataclasses import dataclass, field
 from logging import Logger
-from typing import Any, Generator, List, Literal, Optional, Tuple, Type, TypeVar, Union, overload
+from typing import (
    Any,
    Generator,
    List,
    Literal,
    Optional,
    Tuple,
    Type,
    TypeVar,
    Union,
    overload,
    Sequence,
    Mapping,
 )
 from linkml_runtime.dumpers import yaml_dumper
 from linkml_runtime.linkml_model import (
@ -273,6 +286,23 @@ class Adapter(BaseModel):
                yield item
 class Map(ABC):
    """
    The generic top-level mapping class is just a classmethod for checking if the map applies and a
    method for applying the check if it does
    """
    @classmethod
    @abstractmethod
    def check(cls, *args: Sequence, **kwargs: Mapping) -> bool:
        """Check if this map applies to the given item to read"""
    @classmethod
    @abstractmethod
    def apply(cls, *args: Sequence, **kwargs: Mapping) -> Any:
        """Actually apply the map!"""
 def is_1d(cls: Dataset | Attribute) -> bool:
    """
    Check if the values of a dataset are 1-dimensional.
--- a/nwb_linkml/src/nwb_linkml/adapters/attribute.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/attribute.py
@ -7,9 +7,8 @@ from typing import ClassVar, Optional, Type, TypedDict
 from linkml_runtime.linkml_model.meta import SlotDefinition
-from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d
+from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d, Map
 from nwb_linkml.adapters.array import ArrayAdapter
 from nwb_linkml.maps import Map
 from nwb_linkml.maps.dtype import handle_dtype, inlined
 from nwb_schema_language import Attribute
--- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py
+++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py
@ -7,10 +7,10 @@ from typing import ClassVar, Optional, Type
 from linkml_runtime.linkml_model.meta import ArrayExpression, SlotDefinition
-from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound
+from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound, Map
 from nwb_linkml.adapters.array import ArrayAdapter
 from nwb_linkml.adapters.classes import ClassAdapter
-from nwb_linkml.maps import QUANTITY_MAP, Map
+from nwb_linkml.maps import QUANTITY_MAP
 from nwb_linkml.maps.dtype import flat_to_linkml, handle_dtype, inlined
 from nwb_linkml.maps.naming import camel_to_snake
 from nwb_schema_language import Dataset
--- a/nwb_linkml/src/nwb_linkml/maps/init.py
+++ b/nwb_linkml/src/nwb_linkml/maps/init.py
@ -3,7 +3,6 @@ Mapping from one domain to another
 """
 from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np, linkml_reprs
 from nwb_linkml.maps.map import Map
 from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
 from nwb_linkml.maps.quantity import QUANTITY_MAP
@ -11,7 +10,6 @@ __all__ = [
    "MAP_HDMF_DATATYPE_DEF",
    "MAP_HDMF_DATATYPE_INC",
    "QUANTITY_MAP",
    "Map",
    "flat_to_linkml",
    "flat_to_np",
    "linkml_reprs",
--- a/nwb_linkml/src/nwb_linkml/maps/postload.py
+++ b/nwb_linkml/src/nwb_linkml/maps/postload.py
@ -104,19 +104,6 @@ MAP_HDMF_DATATYPE_INC = KeyMap(
 )
 class MAP_TYPES(StrEnum):
    """
    Types of mapping that can exist
    .. todo::
        This is likely deprecated, check usage.
    """
    key = "key"
    """Mapping the name of one key to another key"""
 def apply_postload(ns_dict: dict) -> dict:
    """Apply all post-load maps to a YAML schema"""
    maps = [m for m in KeyMap.instances if m.phase == PHASES.postload]
--- a/nwb_linkml/src/nwb_linkml/types/df.py
+++ b/nwb_linkml/src/nwb_linkml/types/df.py
@ -1,176 +0,0 @@
 """
 Pydantic models that behave like pandas dataframes
 .. note::
    This is currently unused but kept in place as a stub in case it is worth
    revisiting in the future.
    It turned out to be too momentarily difficult to make lazy-loading work with
    dask arrays per column
    while still keeping pandas-like API intact. In the future we should investigate modifying the
    :func:`dask.dataframe.read_hdf` function to treat individual hdf5 datasets like columns
    pandas has been removed from dependencies for now, as it not used elsewhere, but it is
    left in this module since it is necessary for it to make sense.
 """
 #
 # class DataFrame(BaseModel, pd.DataFrame):
 #     """
 #     Pydantic model root class that mimics a pandas dataframe.
 #
 #     Notes:
 #
 #         The synchronization between the underlying lists in the pydantic model
 #         and the derived dataframe is partial, and at the moment unidirectional.
 #         This class is primarily intended for reading from tables stored in
 #         NWB files rather than being able to manipulate them.
 #
 #         The dataframe IS updated when new values are *assigned* to a field.
 #
 #         eg.::
 #
 #             MyModel.fieldval = [1,2,3]
 #
 #         But the dataframe is NOT updated when existing values are updated.
 #
 #         eg.::
 #
 #             MyModel.fieldval.append(4)
 #
 #         In that case you need to call :meth:`.update_df` manually.
 #
 #         Additionally, if the dataframe is modified, the underlying lists are NOT updated,
 #         but when the model is dumped to a dictionary or serialized, the dataframe IS used,
 #         so changes will be reflected then.
 #
 #         Fields that shadow pandas methods WILL prevent them from being usable, except
 #         by directly accessing the dataframe like ``mymodel._df``
 #
 #     """
 #
 #     _df: pd.DataFrame = None
 #     model_config = ConfigDict(validate_assignment=True)
 #
 #     def __init__(self, **kwargs):
 #         # pdb.set_trace()
 #         super().__init__(**kwargs)
 #
 #         self._df = self.__make_df()
 #
 #     def __make_df(self) -> pd.DataFrame:
 #         # make dict that can handle ragged arrays and NoneTypes
 #         items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
 #
 #         df_dict = {
 #             k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
 #         }
 #         df = pd.DataFrame(df_dict)
 #         # replace Nans with None
 #         df = df.fillna(np.nan).replace([np.nan], [None])
 #         return df
 #
 #     def update_df(self) -> None:
 #         """
 #         Update the internal dataframe in the case that the model values are changed
 #         in a way that we can't detect, like appending to one of the lists.
 #
 #         """
 #         self._df = self.__make_df()
 #
 #     def __getattr__(self, item: str):
 #         """
 #         Mimic pandas dataframe and pydantic model behavior
 #         """
 #         if item in ("df", "_df"):
 #             return self.__pydantic_private__["_df"]
 #         elif item in self.model_fields:
 #             return self._df[item]
 #         else:
 #             try:
 #                 return object.__getattribute__(self._df, item)
 #             except AttributeError:
 #                 return object.__getattribute__(self, item)
 #
 #     @model_validator(mode="after")
 #     def recreate_df(self) -> None:
 #         """
 #         Remake DF when validating (eg. when updating values on assignment)
 #         """
 #         self.update_df()
 #
 #     @model_serializer(mode="wrap", when_used="always")
 #     def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
 #         """
 #         We don't handle values that are changed on the dataframe by directly
 #         updating the underlying model lists, but we implicitly handle them
 #         by using the dataframe as the source when serializing
 #         """
 #         if self._df is None:
 #             return nxt(self)
 #         else:
 #             out = self._df.to_dict("list")
 #             # remove Nones
 #             out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
 #             return nxt(self.__class__(**out))
 #
 # def dynamictable_to_df(
 #     group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
 # ) -> DataFrame:
 #     """Generate a dataframe from an NDB DynamicTable"""
 #     if model is None:
 #         model = model_from_dynamictable(group, base)
 #
 #     items = {}
 #     for col, _col_type in model.model_fields.items():
 #         if col not in group:
 #             continue
 #         idxname = col + "_index"
 #         if idxname in group:
 #             idx = group.get(idxname)[:]
 #             data = group.get(col)[idx - 1]
 #         else:
 #             data = group.get(col)[:]
 #
 #         # Handle typing inside of list
 #         if isinstance(data[0], bytes):
 #             data = data.astype("unicode")
 #         if isinstance(data[0], str):
 #             # lists and other compound data types can get flattened out to strings when stored
 #             # so we try and literal eval and recover them
 #             try:
 #                 eval_type = type(ast.literal_eval(data[0]))
 #             except (ValueError, SyntaxError):
 #                 eval_type = str
 #
 #             # if we've found one of those, get the data type within it.
 #             if eval_type is not str:
 #                 eval_list = []
 #                 for item in data.tolist():
 #                     try:
 #                         eval_list.append(ast.literal_eval(item))
 #                     except ValueError:
 #                         eval_list.append(None)
 #                 data = eval_list
 #         elif isinstance(data[0], h5py.h5r.Reference):
 #             data = [HDF5_Path(group[d].name) for d in data]
 #         elif isinstance(data[0], tuple) and any(
 #             [isinstance(d, h5py.h5r.Reference) for d in data[0]]
 #         ):
 #             # references stored inside a tuple, reference + location.
 #             # dereference them!?
 #             dset = group.get(col)
 #             names = dset.dtype.names
 #             if names is not None and names[0] == "idx_start" and names[1] == "count":
 #                 data = dereference_reference_vector(dset, data)
 #
 #         else:
 #             data = data.tolist()
 #
 #         # After list, check if we need to put this thing inside of
 #         # another class, as indicated by the enclosing model
 #
 #         items[col] = data
 #
 #     return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)