pandas dataframe mimic

This commit is contained in:
sneakers-the-rat 2023-09-23 00:08:59 -07:00
parent aac0c7abdd
commit 57fa3d34a2
8 changed files with 317 additions and 5 deletions

View file

@ -3,3 +3,4 @@
Stuff to keep track of that might have been manually overrided that needs to be fixed pre-release Stuff to keep track of that might have been manually overrided that needs to be fixed pre-release
- Coerce all listlike things into lists if they are passed as single elements! - Coerce all listlike things into lists if they are passed as single elements!
- Use [fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html) to interface with DANDI!

134
nwb_linkml/poetry.lock generated
View file

@ -1103,6 +1103,47 @@ files = [
{file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
] ]
[[package]]
name = "numpy"
version = "1.26.0"
description = "Fundamental package for array computing in Python"
optional = false
python-versions = "<3.13,>=3.9"
files = [
{file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"},
{file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"},
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"},
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"},
{file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"},
{file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"},
{file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"},
{file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"},
{file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"},
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"},
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"},
{file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"},
{file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"},
{file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"},
{file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"},
{file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"},
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"},
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"},
{file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"},
{file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"},
{file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"},
{file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"},
{file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"},
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"},
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"},
{file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"},
{file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"},
{file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"},
{file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"},
{file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"},
{file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"},
{file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"},
]
[[package]] [[package]]
name = "nwb-schema-language" name = "nwb-schema-language"
version = "0.1.1" version = "0.1.1"
@ -1143,6 +1184,73 @@ files = [
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
] ]
[[package]]
name = "pandas"
version = "2.1.1"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
files = [
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
]
[package.dependencies]
numpy = [
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
tzdata = ">=2022.1"
[package.extras]
all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
aws = ["s3fs (>=2022.05.0)"]
clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
compression = ["zstandard (>=0.17.0)"]
computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
feather = ["pyarrow (>=7.0.0)"]
fss = ["fsspec (>=2022.05.0)"]
gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
hdf5 = ["tables (>=3.7.0)"]
html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
parquet = ["pyarrow (>=7.0.0)"]
performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
plot = ["matplotlib (>=3.6.1)"]
postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
spss = ["pyreadstat (>=1.1.5)"]
sql-other = ["SQLAlchemy (>=1.4.36)"]
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
xml = ["lxml (>=4.8.0)"]
[[package]] [[package]]
name = "parse" name = "parse"
version = "1.19.1" version = "1.19.1"
@ -1626,6 +1734,17 @@ files = [
[package.dependencies] [package.dependencies]
sortedcontainers = "*" sortedcontainers = "*"
[[package]]
name = "pytz"
version = "2023.3.post1"
description = "World timezone definitions, modern and historical"
optional = false
python-versions = "*"
files = [
{file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
]
[[package]] [[package]]
name = "pyyaml" name = "pyyaml"
version = "6.0.1" version = "6.0.1"
@ -2179,6 +2298,17 @@ files = [
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
] ]
[[package]]
name = "tzdata"
version = "2023.3"
description = "Provider of IANA time zone data"
optional = false
python-versions = ">=2"
files = [
{file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
]
[[package]] [[package]]
name = "uri-template" name = "uri-template"
version = "1.3.0" version = "1.3.0"
@ -2386,5 +2516,5 @@ tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pyt
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = ">=3.11,<3.13"
content-hash = "7a4e1c3b66143e4f4e8392238051241f25274ebd597183ef64168055949074f4" content-hash = "0f2d9fc76cf3788fbdefc6f7b06afb7267c5fe2967970389907a5a9c4864334a"

View file

@ -11,7 +11,7 @@ packages = [
] ]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11" python = ">=3.11,<3.13"
pyyaml = "^6.0" pyyaml = "^6.0"
linkml-runtime = "^1.5.6" linkml-runtime = "^1.5.6"
nwb_schema_language = "^0.1.1" nwb_schema_language = "^0.1.1"
@ -30,6 +30,7 @@ pytest-cov = {version = "^4.1.0", optional = true}
coveralls = {version = "^3.3.1", optional = true} coveralls = {version = "^3.3.1", optional = true}
pytest-profiling = {version = "^1.7.0", optional = true} pytest-profiling = {version = "^1.7.0", optional = true}
pydantic-settings = "^2.0.3" pydantic-settings = "^2.0.3"
pandas = "^2.1.1"
[tool.poetry.extras] [tool.poetry.extras]
tests = [ tests = [

View file

@ -8,6 +8,15 @@ field more so that at each pass i can work through the items whose dependencies
have been solved from the bottom up. have been solved from the bottom up.
""" """
from typing import List
from nwb_linkml.types.df import DataFrame
class MyDf(DataFrame):
ints: List[int]
a = MyDf(ints=[1,2,3])
from nwb_linkml.io.hdf5 import HDF5IO, flatten_hdf from nwb_linkml.io.hdf5 import HDF5IO, flatten_hdf
import h5py import h5py
from typing import NamedTuple, Tuple, Optional from typing import NamedTuple, Tuple, Optional

View file

@ -1 +1,2 @@
from nwb_linkml.types.ndarray import NDArray from nwb_linkml.types.ndarray import NDArray
from nwb_linkml.types.df import DataFrame

View file

@ -0,0 +1,111 @@
"""
Pydantic models that behave like pandas dataframes
"""
import pdb
from typing import List, Any, get_origin, get_args, Union, Optional, Dict
from types import NoneType
import numpy as np
import pandas as pd
from pydantic import (
BaseModel,
model_serializer,
SerializerFunctionWrapHandler,
ConfigDict,
model_validator
)
class DataFrame(BaseModel, pd.DataFrame):
"""
Pydantic model root class that mimics a pandas dataframe.
Notes:
The synchronization between the underlying lists in the pydantic model
and the derived dataframe is partial, and at the moment unidirectional.
This class is primarily intended for reading from tables stored in
NWB files rather than being able to manipulate them.
The dataframe IS updated when new values are *assigned* to a field.
eg.::
MyModel.fieldval = [1,2,3]
But the dataframe is NOT updated when existing values are updated.
eg.::
MyModel.fieldval.append(4)
In that case you need to call :meth:`.update_df` manually.
Additionally, if the dataframe is modified, the underlying lists are NOT updated,
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
so changes will be reflected then.
"""
_df: pd.DataFrame = None
model_config = ConfigDict(validate_assignment=True)
def __init__(self, **kwargs):
# pdb.set_trace()
super().__init__(**kwargs)
self._df = self.__make_df()
def __make_df(self) -> pd.DataFrame:
# make dict that can handle ragged arrays and NoneTypes
items = {k:v for k,v in self.__dict__.items() if k in self.model_fields}
df_dict = {k: (pd.Series(v) if isinstance(v, list) else pd.Series([v]))
for k,v in items.items()}
df = pd.DataFrame(df_dict)
# replace Nans with None
df = df.fillna(np.nan).replace([np.nan], [None])
return df
def update_df(self):
"""
Update the internal dataframe in the case that the model values are changed
in a way that we can't detect, like appending to one of the lists.
"""
self._df = self.__make_df()
def __getattr__(self, item: str):
"""
Mimic pandas dataframe and pydantic model behavior
"""
if item in ('df', '_df'):
return self.__pydantic_private__['_df']
elif item in self.model_fields.keys():
return self._df[item]
else:
try:
return object.__getattribute__(self._df, item)
except AttributeError:
return object.__getattribute__(self, item)
@model_validator(mode='after')
def recreate_df(self):
"""Remake DF when validating (eg. when updating values on assignment)"""
self.update_df()
@model_serializer(mode='wrap', when_used='always')
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
"""
We don't handle values that are changed
"""
if self._df is None:
return nxt(self)
else:
out = self._df.to_dict('list')
# remove Nones
out = {
k: [inner_v for inner_v in v if inner_v is not None]
for k, v in out.items()
}
return nxt(self.__class__(**out))

View file

@ -0,0 +1,59 @@
import pytest
import pandas as pd
from pydantic import BaseModel, ValidationError
from typing import List, Union, Optional
from nwb_linkml.types import DataFrame
def test_df():
"""
Dataframe class should behave like both a pydantic model and a dataframe
"""
class MyDf(DataFrame):
ints: List[int]
strings: List[str]
multi: List[int | str]
opts: Optional[List[int]] = None
good_kwargs = {
'ints': [1,2,3],
'strings': ['a','b','c'],
'multi': [1,2,'a','d'],
'opts': []
}
bad_kwargs = {
'ints': ['a','b','c'],
'strings': [1,2,3],
'multi': 'd'
}
df = MyDf(**good_kwargs)
assert isinstance(df, BaseModel)
assert isinstance(df, pd.DataFrame)
with pytest.raises(ValidationError):
bad_df = MyDf(**bad_kwargs)
# can we do pydantic stuff
assert df.model_dump() == good_kwargs
# these throw when they fail
_ = df.model_dump_json()
_ = df.model_json_schema()
# can we do pandas stuff
assert df['ints'].sum() == 6
assert df.loc[2].to_list() == [3, 'c', 'a', None]
# lmao
# we don't include the model when dumping/doing the schema
assert 'df' not in df.model_json_schema()
assert '_df' not in df.model_json_schema()
# we update our dataframe when we assign
assert df.ints == good_kwargs['ints']
assert df['ints'].tolist()[0:3] == good_kwargs['ints']
df.ints = [1,2,3,4]
assert df.ints == [1,2,3,4]
assert (df['ints'] == pd.Series([1,2,3,4])).all()
df['ints'] = df['ints']._append(pd.Series(5))