a bit of tidying

This commit is contained in:
sneakers-the-rat 2024-10-03 00:09:43 -07:00
parent 77a852913c
commit 9560b9f839
Signed by untrusted user who does not match committer: jonny
GPG key ID: 6DCB96EF1E4D232D
6 changed files with 35 additions and 197 deletions

View file

@ -4,10 +4,23 @@ Base class for adapters
import os
import sys
from abc import abstractmethod
from abc import abstractmethod, ABC
from dataclasses import dataclass, field
from logging import Logger
from typing import Any, Generator, List, Literal, Optional, Tuple, Type, TypeVar, Union, overload
from typing import (
Any,
Generator,
List,
Literal,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
Sequence,
Mapping,
)
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.linkml_model import (
@ -273,6 +286,23 @@ class Adapter(BaseModel):
yield item
class Map(ABC):
"""
The generic top-level mapping class is just a classmethod for checking if the map applies and a
method for applying the check if it does
"""
@classmethod
@abstractmethod
def check(cls, *args: Sequence, **kwargs: Mapping) -> bool:
"""Check if this map applies to the given item to read"""
@classmethod
@abstractmethod
def apply(cls, *args: Sequence, **kwargs: Mapping) -> Any:
"""Actually apply the map!"""
def is_1d(cls: Dataset | Attribute) -> bool:
"""
Check if the values of a dataset are 1-dimensional.

View file

@ -7,9 +7,8 @@ from typing import ClassVar, Optional, Type, TypedDict
from linkml_runtime.linkml_model.meta import SlotDefinition
from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d
from nwb_linkml.adapters.adapter import Adapter, BuildResult, defaults, is_1d, Map
from nwb_linkml.adapters.array import ArrayAdapter
from nwb_linkml.maps import Map
from nwb_linkml.maps.dtype import handle_dtype, inlined
from nwb_schema_language import Attribute

View file

@ -7,10 +7,10 @@ from typing import ClassVar, Optional, Type
from linkml_runtime.linkml_model.meta import ArrayExpression, SlotDefinition
from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound
from nwb_linkml.adapters.adapter import BuildResult, defaults, has_attrs, is_1d, is_compound, Map
from nwb_linkml.adapters.array import ArrayAdapter
from nwb_linkml.adapters.classes import ClassAdapter
from nwb_linkml.maps import QUANTITY_MAP, Map
from nwb_linkml.maps import QUANTITY_MAP
from nwb_linkml.maps.dtype import flat_to_linkml, handle_dtype, inlined
from nwb_linkml.maps.naming import camel_to_snake
from nwb_schema_language import Dataset

View file

@ -3,7 +3,6 @@ Mapping from one domain to another
"""
from nwb_linkml.maps.dtype import flat_to_linkml, flat_to_np, linkml_reprs
from nwb_linkml.maps.map import Map
from nwb_linkml.maps.postload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
from nwb_linkml.maps.quantity import QUANTITY_MAP
@ -11,7 +10,6 @@ __all__ = [
"MAP_HDMF_DATATYPE_DEF",
"MAP_HDMF_DATATYPE_INC",
"QUANTITY_MAP",
"Map",
"flat_to_linkml",
"flat_to_np",
"linkml_reprs",

View file

@ -104,19 +104,6 @@ MAP_HDMF_DATATYPE_INC = KeyMap(
)
class MAP_TYPES(StrEnum):
"""
Types of mapping that can exist
.. todo::
This is likely deprecated, check usage.
"""
key = "key"
"""Mapping the name of one key to another key"""
def apply_postload(ns_dict: dict) -> dict:
"""Apply all post-load maps to a YAML schema"""
maps = [m for m in KeyMap.instances if m.phase == PHASES.postload]

View file

@ -1,176 +0,0 @@
"""
Pydantic models that behave like pandas dataframes
.. note::
This is currently unused but kept in place as a stub in case it is worth
revisiting in the future.
It turned out to be too momentarily difficult to make lazy-loading work with
dask arrays per column
while still keeping pandas-like API intact. In the future we should investigate modifying the
:func:`dask.dataframe.read_hdf` function to treat individual hdf5 datasets like columns
pandas has been removed from dependencies for now, as it not used elsewhere, but it is
left in this module since it is necessary for it to make sense.
"""
#
# class DataFrame(BaseModel, pd.DataFrame):
# """
# Pydantic model root class that mimics a pandas dataframe.
#
# Notes:
#
# The synchronization between the underlying lists in the pydantic model
# and the derived dataframe is partial, and at the moment unidirectional.
# This class is primarily intended for reading from tables stored in
# NWB files rather than being able to manipulate them.
#
# The dataframe IS updated when new values are *assigned* to a field.
#
# eg.::
#
# MyModel.fieldval = [1,2,3]
#
# But the dataframe is NOT updated when existing values are updated.
#
# eg.::
#
# MyModel.fieldval.append(4)
#
# In that case you need to call :meth:`.update_df` manually.
#
# Additionally, if the dataframe is modified, the underlying lists are NOT updated,
# but when the model is dumped to a dictionary or serialized, the dataframe IS used,
# so changes will be reflected then.
#
# Fields that shadow pandas methods WILL prevent them from being usable, except
# by directly accessing the dataframe like ``mymodel._df``
#
# """
#
# _df: pd.DataFrame = None
# model_config = ConfigDict(validate_assignment=True)
#
# def __init__(self, **kwargs):
# # pdb.set_trace()
# super().__init__(**kwargs)
#
# self._df = self.__make_df()
#
# def __make_df(self) -> pd.DataFrame:
# # make dict that can handle ragged arrays and NoneTypes
# items = {k: v for k, v in self.__dict__.items() if k in self.model_fields}
#
# df_dict = {
# k: (pd.Series(v) if isinstance(v, list) else pd.Series([v])) for k, v in items.items()
# }
# df = pd.DataFrame(df_dict)
# # replace Nans with None
# df = df.fillna(np.nan).replace([np.nan], [None])
# return df
#
# def update_df(self) -> None:
# """
# Update the internal dataframe in the case that the model values are changed
# in a way that we can't detect, like appending to one of the lists.
#
# """
# self._df = self.__make_df()
#
# def __getattr__(self, item: str):
# """
# Mimic pandas dataframe and pydantic model behavior
# """
# if item in ("df", "_df"):
# return self.__pydantic_private__["_df"]
# elif item in self.model_fields:
# return self._df[item]
# else:
# try:
# return object.__getattribute__(self._df, item)
# except AttributeError:
# return object.__getattribute__(self, item)
#
# @model_validator(mode="after")
# def recreate_df(self) -> None:
# """
# Remake DF when validating (eg. when updating values on assignment)
# """
# self.update_df()
#
# @model_serializer(mode="wrap", when_used="always")
# def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
# """
# We don't handle values that are changed on the dataframe by directly
# updating the underlying model lists, but we implicitly handle them
# by using the dataframe as the source when serializing
# """
# if self._df is None:
# return nxt(self)
# else:
# out = self._df.to_dict("list")
# # remove Nones
# out = {k: [inner_v for inner_v in v if inner_v is not None] for k, v in out.items()}
# return nxt(self.__class__(**out))
#
# def dynamictable_to_df(
# group: h5py.Group, model: Optional[Type[DataFrame]] = None, base: Optional[BaseModel] = None
# ) -> DataFrame:
# """Generate a dataframe from an NDB DynamicTable"""
# if model is None:
# model = model_from_dynamictable(group, base)
#
# items = {}
# for col, _col_type in model.model_fields.items():
# if col not in group:
# continue
# idxname = col + "_index"
# if idxname in group:
# idx = group.get(idxname)[:]
# data = group.get(col)[idx - 1]
# else:
# data = group.get(col)[:]
#
# # Handle typing inside of list
# if isinstance(data[0], bytes):
# data = data.astype("unicode")
# if isinstance(data[0], str):
# # lists and other compound data types can get flattened out to strings when stored
# # so we try and literal eval and recover them
# try:
# eval_type = type(ast.literal_eval(data[0]))
# except (ValueError, SyntaxError):
# eval_type = str
#
# # if we've found one of those, get the data type within it.
# if eval_type is not str:
# eval_list = []
# for item in data.tolist():
# try:
# eval_list.append(ast.literal_eval(item))
# except ValueError:
# eval_list.append(None)
# data = eval_list
# elif isinstance(data[0], h5py.h5r.Reference):
# data = [HDF5_Path(group[d].name) for d in data]
# elif isinstance(data[0], tuple) and any(
# [isinstance(d, h5py.h5r.Reference) for d in data[0]]
# ):
# # references stored inside a tuple, reference + location.
# # dereference them!?
# dset = group.get(col)
# names = dset.dtype.names
# if names is not None and names[0] == "idx_start" and names[1] == "count":
# data = dereference_reference_vector(dset, data)
#
# else:
# data = data.tolist()
#
# # After list, check if we need to put this thing inside of
# # another class, as indicated by the enclosing model
#
# items[col] = data
#
# return model(hdf5_path=group.name, name=group.name.split("/")[-1], **items)