diff --git a/nwb_linkml/src/nwb_linkml/adapters/adapter.py b/nwb_linkml/src/nwb_linkml/adapters/adapter.py index 07c5231..86395f3 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/adapter.py +++ b/nwb_linkml/src/nwb_linkml/adapters/adapter.py @@ -4,10 +4,23 @@ Base class for adapters import os import sys -from abc import abstractmethod +from abc import abstractmethod, ABC from dataclasses import dataclass, field from logging import Logger -from typing import Any, Generator, List, Literal, Optional, Tuple, Type, TypeVar, Union, overload +from typing import ( + Any, + Generator, + List, + Literal, + Optional, + Tuple, + Type, + TypeVar, + Union, + overload, + Sequence, + Mapping, +) from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.linkml_model import ( @@ -273,6 +286,23 @@ class Adapter(BaseModel): yield item +class Map(ABC): + """ + The generic top-level mapping class is just a classmethod for checking if the map applies and a + method for applying the check if it does + """ + + @classmethod + @abstractmethod + def check(cls, *args: Sequence, **kwargs: Mapping) -> bool: + """Check if this map applies to the given item to read""" + + @classmethod + @abstractmethod + def apply(cls, *args: Sequence, **kwargs: Mapping) -> Any: + """Actually apply the map!""" + + def is_1d(cls: Dataset | Attribute) -> bool: """ Check if the values of a dataset are 1-dimensional. diff --git a/nwb_linkml/src/nwb_linkml/adapters/attribute.py b/nwb_linkml/src/nwb_linkml/adapters/attribute.py index 8326a51..db2999b 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/attribute.py +++ b/nwb_linkml/src/nwb_linkml/adapters/attribute.py @@ -7,9 +7,8 @@ from typing import ClassVar, Optional, Type, TypedDict from linkml_runtime.linkml_model.meta import SlotDefinition -from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d +from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d, Map from nwb_linkml.adapters.array import ArrayAdapter -from nwb_linkml.maps import Map from nwb_linkml.maps.dtype import handle_dtype, inlined from nwb_schema_language import Attribute diff --git a/nwb_linkml/src/nwb_linkml/adapters/dataset.py b/nwb_linkml/src/nwb_linkml/adapters/dataset.py index 6a6e954..44167c5 100644 --- a/nwb_linkml/src/nwb_linkml/adapters/dataset.py +++ b/nwb_linkml/src/nwb_linkml/adapters/dataset.py @@ -7,10 +7,10 @@ from typing import ClassVar, Optional, Type from linkml_runtime.linkml_model.meta import ArrayExpression, SlotDefinition -from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound +from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound, Map from nwb_linkml.adapters.array import ArrayAdapter from nwb_linkml.adapters.classes import ClassAdapter -from nwb_linkml.maps import QUANTITY_MAP, Map +from nwb_linkml.maps import QUANTITY_MAP from nwb_linkml.maps.dtype import flat_to_linkml, handle_dtype, inlined from nwb_linkml.maps.naming import camel_to_snake from nwb_schema_language import Dataset diff --git a/nwb_linkml/src/nwb_linkml/maps/__init__.py b/nwb_linkml/src/nwb_linkml/maps/__init__.py index cdad7d0..a5e66e7 100644 --- a/nwb_linkml/src/nwb_linkml/maps/__init__.py +++ b/nwb_linkml/src/nwb_linkml/maps/__init__.py @@ -3,7 +3,6 @@ Mapping from one domain to another """ from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np, linkml_reprs -from nwb_linkml.maps.map import Map from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC from nwb_linkml.maps.quantity import QUANTITY_MAP @@ -11,7 +10,6 @@ __all__ = [ "MAP_HDMF_DATATYPE_DEF", "MAP_HDMF_DATATYPE_INC", "QUANTITY_MAP", - "Map", "flat_to_linkml", "flat_to_np", "linkml_reprs", diff --git a/nwb_linkml/src/nwb_linkml/maps/postload.py b/nwb_linkml/src/nwb_linkml/maps/postload.py index 15d8dcc..c4ce63f 100644 --- a/nwb_linkml/src/nwb_linkml/maps/postload.py +++ b/nwb_linkml/src/nwb_linkml/maps/postload.py @@ -104,19 +104,6 @@ MAP_HDMF_DATATYPE_INC = KeyMap( ) -class MAP_TYPES(StrEnum): - """ - Types of mapping that can exist - - .. todo:: - - This is likely deprecated, check usage. - """ - - key = "key" - """Mapping the name of one key to another key""" - - def apply_postload(ns_dict: dict) -> dict: """Apply all post-load maps to a YAML schema""" maps = [m for m in KeyMap.instances if m.phase == PHASES.postload] diff --git a/nwb_linkml/src/nwb_linkml/types/df.py b/nwb_linkml/src/nwb_linkml/types/df.py deleted file mode 100644 index 19d36a5..0000000 --- a/nwb_linkml/src/nwb_linkml/types/df.py +++ /dev/null @@ -1,176 +0,0 @@ -""" -Pydantic models that behave like pandas dataframes - -.. note:: - - This is currently unused but kept in place as a stub in case it is worth - revisiting in the future. - It turned out to be too momentarily difficult to make lazy-loading work with - dask arrays per column - while still keeping pandas-like API intact. In the future we should investigate modifying the - :func:`dask.dataframe.read_hdf` function to treat individual hdf5 datasets like columns - - pandas has been removed from dependencies for now, as it not used elsewhere, but it is - left in this module since it is necessary for it to make sense. -""" - -# -# class DataFrame(BaseModel, pd.DataFrame): -# """ -# Pydantic model root class that mimics a pandas dataframe. -# -# Notes: -# -# The synchronization between the underlying lists in the pydantic model -# and the derived dataframe is partial, and at the moment unidirectional. -# This class is primarily intended for reading from tables stored in -# NWB files rather than being able to manipulate them. -# -# The dataframe IS updated when new values are *assigned* to a field. -# -# eg.:: -# -# MyModel.fieldval = [1,2,3] -# -# But the dataframe is NOT updated when existing values are updated. -# -# eg.:: -# -# MyModel.fieldval.append(4) -# -# In that case you need to call :meth:`.update_df` manually. -# -# Additionally, if the dataframe is modified, the underlying lists are NOT updated, -# but when the model is dumped to a dictionary or serialized, the dataframe IS used, -# so changes will be reflected then. -# -# Fields that shadow pandas methods WILL prevent them from being usable, except -# by directly accessing the dataframe like ``mymodel._df`` -# -# """ -# -# _df: pd.DataFrame = None -# model_config = ConfigDict(validate_assignment=True) -# -# def __init__(self, **kwargs): -# # pdb.set_trace() -# super().__init__(**kwargs) -# -# self._df = self.__make_df() -# -# def __make_df(self) -> pd.DataFrame: -# # make dict that can handle ragged arrays and NoneTypes -# items = {k: v for k, v in self.__dict__.items() if k in self.model_fields} -# -# df_dict = { -# k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items() -# } -# df = pd.DataFrame(df_dict) -# # replace Nans with None -# df = df.fillna(np.nan).replace([np.nan], [None]) -# return df -# -# def update_df(self) -> None: -# """ -# Update the internal dataframe in the case that the model values are changed -# in a way that we can't detect, like appending to one of the lists. -# -# """ -# self._df = self.__make_df() -# -# def __getattr__(self, item: str): -# """ -# Mimic pandas dataframe and pydantic model behavior -# """ -# if item in ("df", "_df"): -# return self.__pydantic_private__["_df"] -# elif item in self.model_fields: -# return self._df[item] -# else: -# try: -# return object.__getattribute__(self._df, item) -# except AttributeError: -# return object.__getattribute__(self, item) -# -# @model_validator(mode="after") -# def recreate_df(self) -> None: -# """ -# Remake DF when validating (eg. when updating values on assignment) -# """ -# self.update_df() -# -# @model_serializer(mode="wrap", when_used="always") -# def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]: -# """ -# We don't handle values that are changed on the dataframe by directly -# updating the underlying model lists, but we implicitly handle them -# by using the dataframe as the source when serializing -# """ -# if self._df is None: -# return nxt(self) -# else: -# out = self._df.to_dict("list") -# # remove Nones -# out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()} -# return nxt(self.__class__(**out)) - -# -# def dynamictable_to_df( -# group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None -# ) -> DataFrame: -# """Generate a dataframe from an NDB DynamicTable""" -# if model is None: -# model = model_from_dynamictable(group, base) -# -# items = {} -# for col, _col_type in model.model_fields.items(): -# if col not in group: -# continue -# idxname = col + "_index" -# if idxname in group: -# idx = group.get(idxname)[:] -# data = group.get(col)[idx - 1] -# else: -# data = group.get(col)[:] -# -# # Handle typing inside of list -# if isinstance(data[0], bytes): -# data = data.astype("unicode") -# if isinstance(data[0], str): -# # lists and other compound data types can get flattened out to strings when stored -# # so we try and literal eval and recover them -# try: -# eval_type = type(ast.literal_eval(data[0])) -# except (ValueError, SyntaxError): -# eval_type = str -# -# # if we've found one of those, get the data type within it. -# if eval_type is not str: -# eval_list = [] -# for item in data.tolist(): -# try: -# eval_list.append(ast.literal_eval(item)) -# except ValueError: -# eval_list.append(None) -# data = eval_list -# elif isinstance(data[0], h5py.h5r.Reference): -# data = [HDF5_Path(group[d].name) for d in data] -# elif isinstance(data[0], tuple) and any( -# [isinstance(d, h5py.h5r.Reference) for d in data[0]] -# ): -# # references stored inside a tuple, reference + location. -# # dereference them!? -# dset = group.get(col) -# names = dset.dtype.names -# if names is not None and names[0] == "idx_start" and names[1] == "count": -# data = dereference_reference_vector(dset, data) -# -# else: -# data = data.tolist() -# -# # After list, check if we need to put this thing inside of -# # another class, as indicated by the enclosing model -# -# items[col] = data -# -# return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)