mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-09 13:44:27 +00:00
pandas dataframe mimic
This commit is contained in:
parent
aac0c7abdd
commit
57fa3d34a2
8 changed files with 317 additions and 5 deletions
|
@ -2,4 +2,5 @@
|
|||
|
||||
Stuff to keep track of that might have been manually overrided that needs to be fixed pre-release
|
||||
|
||||
- Coerce all listlike things into lists if they are passed as single elements!
|
||||
- Coerce all listlike things into lists if they are passed as single elements!
|
||||
- Use [fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html) to interface with DANDI!
|
134
nwb_linkml/poetry.lock
generated
134
nwb_linkml/poetry.lock
generated
|
@ -1103,6 +1103,47 @@ files = [
|
|||
{file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "1.26.0"
|
||||
description = "Fundamental package for array computing in Python"
|
||||
optional = false
|
||||
python-versions = "<3.13,>=3.9"
|
||||
files = [
|
||||
{file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"},
|
||||
{file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"},
|
||||
{file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"},
|
||||
{file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"},
|
||||
{file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nwb-schema-language"
|
||||
version = "0.1.1"
|
||||
|
@ -1143,6 +1184,73 @@ files = [
|
|||
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pandas"
|
||||
version = "2.1.1"
|
||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
|
||||
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||
]
|
||||
python-dateutil = ">=2.8.2"
|
||||
pytz = ">=2020.1"
|
||||
tzdata = ">=2022.1"
|
||||
|
||||
[package.extras]
|
||||
all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
|
||||
aws = ["s3fs (>=2022.05.0)"]
|
||||
clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
|
||||
compression = ["zstandard (>=0.17.0)"]
|
||||
computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
|
||||
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
|
||||
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
|
||||
feather = ["pyarrow (>=7.0.0)"]
|
||||
fss = ["fsspec (>=2022.05.0)"]
|
||||
gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
|
||||
hdf5 = ["tables (>=3.7.0)"]
|
||||
html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
|
||||
mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
|
||||
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
|
||||
parquet = ["pyarrow (>=7.0.0)"]
|
||||
performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
|
||||
plot = ["matplotlib (>=3.6.1)"]
|
||||
postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
|
||||
spss = ["pyreadstat (>=1.1.5)"]
|
||||
sql-other = ["SQLAlchemy (>=1.4.36)"]
|
||||
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
|
||||
xml = ["lxml (>=4.8.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "parse"
|
||||
version = "1.19.1"
|
||||
|
@ -1626,6 +1734,17 @@ files = [
|
|||
[package.dependencies]
|
||||
sortedcontainers = "*"
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2023.3.post1"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
|
||||
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.1"
|
||||
|
@ -2179,6 +2298,17 @@ files = [
|
|||
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tzdata"
|
||||
version = "2023.3"
|
||||
description = "Provider of IANA time zone data"
|
||||
optional = false
|
||||
python-versions = ">=2"
|
||||
files = [
|
||||
{file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
|
||||
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "uri-template"
|
||||
version = "1.3.0"
|
||||
|
@ -2386,5 +2516,5 @@ tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pyt
|
|||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "7a4e1c3b66143e4f4e8392238051241f25274ebd597183ef64168055949074f4"
|
||||
python-versions = ">=3.11,<3.13"
|
||||
content-hash = "0f2d9fc76cf3788fbdefc6f7b06afb7267c5fe2967970389907a5a9c4864334a"
|
||||
|
|
|
@ -11,7 +11,7 @@ packages = [
|
|||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
python = ">=3.11,<3.13"
|
||||
pyyaml = "^6.0"
|
||||
linkml-runtime = "^1.5.6"
|
||||
nwb_schema_language = "^0.1.1"
|
||||
|
@ -30,6 +30,7 @@ pytest-cov = {version = "^4.1.0", optional = true}
|
|||
coveralls = {version = "^3.3.1", optional = true}
|
||||
pytest-profiling = {version = "^1.7.0", optional = true}
|
||||
pydantic-settings = "^2.0.3"
|
||||
pandas = "^2.1.1"
|
||||
|
||||
[tool.poetry.extras]
|
||||
tests = [
|
||||
|
|
|
@ -8,6 +8,15 @@ field more so that at each pass i can work through the items whose dependencies
|
|||
have been solved from the bottom up.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from nwb_linkml.types.df import DataFrame
|
||||
|
||||
class MyDf(DataFrame):
|
||||
ints: List[int]
|
||||
|
||||
a = MyDf(ints=[1,2,3])
|
||||
|
||||
|
||||
from nwb_linkml.io.hdf5 import HDF5IO, flatten_hdf
|
||||
import h5py
|
||||
from typing import NamedTuple, Tuple, Optional
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
from nwb_linkml.types.ndarray import NDArray
|
||||
from nwb_linkml.types.ndarray import NDArray
|
||||
from nwb_linkml.types.df import DataFrame
|
111
nwb_linkml/src/nwb_linkml/types/df.py
Normal file
111
nwb_linkml/src/nwb_linkml/types/df.py
Normal file
|
@ -0,0 +1,111 @@
|
|||
"""
|
||||
Pydantic models that behave like pandas dataframes
|
||||
"""
|
||||
import pdb
|
||||
from typing import List, Any, get_origin, get_args, Union, Optional, Dict
|
||||
from types import NoneType
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
model_serializer,
|
||||
SerializerFunctionWrapHandler,
|
||||
ConfigDict,
|
||||
model_validator
|
||||
)
|
||||
|
||||
class DataFrame(BaseModel, pd.DataFrame):
|
||||
"""
|
||||
Pydantic model root class that mimics a pandas dataframe.
|
||||
|
||||
Notes:
|
||||
|
||||
The synchronization between the underlying lists in the pydantic model
|
||||
and the derived dataframe is partial, and at the moment unidirectional.
|
||||
This class is primarily intended for reading from tables stored in
|
||||
NWB files rather than being able to manipulate them.
|
||||
|
||||
The dataframe IS updated when new values are *assigned* to a field.
|
||||
|
||||
eg.::
|
||||
|
||||
MyModel.fieldval = [1,2,3]
|
||||
|
||||
But the dataframe is NOT updated when existing values are updated.
|
||||
|
||||
eg.::
|
||||
|
||||
MyModel.fieldval.append(4)
|
||||
|
||||
In that case you need to call :meth:`.update_df` manually.
|
||||
|
||||
Additionally, if the dataframe is modified, the underlying lists are NOT updated,
|
||||
but when the model is dumped to a dictionary or serialized, the dataframe IS used,
|
||||
so changes will be reflected then.
|
||||
|
||||
"""
|
||||
|
||||
_df: pd.DataFrame = None
|
||||
model_config = ConfigDict(validate_assignment=True)
|
||||
def __init__(self, **kwargs):
|
||||
# pdb.set_trace()
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self._df = self.__make_df()
|
||||
|
||||
|
||||
def __make_df(self) -> pd.DataFrame:
|
||||
# make dict that can handle ragged arrays and NoneTypes
|
||||
items = {k:v for k,v in self.__dict__.items() if k in self.model_fields}
|
||||
|
||||
df_dict = {k: (pd.Series(v) if isinstance(v, list) else pd.Series([v]))
|
||||
for k,v in items.items()}
|
||||
df = pd.DataFrame(df_dict)
|
||||
# replace Nans with None
|
||||
df = df.fillna(np.nan).replace([np.nan], [None])
|
||||
return df
|
||||
|
||||
def update_df(self):
|
||||
"""
|
||||
Update the internal dataframe in the case that the model values are changed
|
||||
in a way that we can't detect, like appending to one of the lists.
|
||||
|
||||
"""
|
||||
self._df = self.__make_df()
|
||||
|
||||
def __getattr__(self, item: str):
|
||||
"""
|
||||
Mimic pandas dataframe and pydantic model behavior
|
||||
"""
|
||||
if item in ('df', '_df'):
|
||||
return self.__pydantic_private__['_df']
|
||||
elif item in self.model_fields.keys():
|
||||
return self._df[item]
|
||||
else:
|
||||
try:
|
||||
return object.__getattribute__(self._df, item)
|
||||
except AttributeError:
|
||||
return object.__getattribute__(self, item)
|
||||
@model_validator(mode='after')
|
||||
def recreate_df(self):
|
||||
"""Remake DF when validating (eg. when updating values on assignment)"""
|
||||
self.update_df()
|
||||
|
||||
@model_serializer(mode='wrap', when_used='always')
|
||||
def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
|
||||
"""
|
||||
We don't handle values that are changed
|
||||
|
||||
"""
|
||||
if self._df is None:
|
||||
return nxt(self)
|
||||
else:
|
||||
out = self._df.to_dict('list')
|
||||
# remove Nones
|
||||
out = {
|
||||
k: [inner_v for inner_v in v if inner_v is not None]
|
||||
for k, v in out.items()
|
||||
}
|
||||
|
||||
return nxt(self.__class__(**out))
|
59
nwb_linkml/tests/test_types/test_df.py
Normal file
59
nwb_linkml/tests/test_types/test_df.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel, ValidationError
|
||||
from typing import List, Union, Optional
|
||||
from nwb_linkml.types import DataFrame
|
||||
|
||||
def test_df():
|
||||
"""
|
||||
Dataframe class should behave like both a pydantic model and a dataframe
|
||||
"""
|
||||
|
||||
class MyDf(DataFrame):
|
||||
ints: List[int]
|
||||
strings: List[str]
|
||||
multi: List[int | str]
|
||||
opts: Optional[List[int]] = None
|
||||
|
||||
good_kwargs = {
|
||||
'ints': [1,2,3],
|
||||
'strings': ['a','b','c'],
|
||||
'multi': [1,2,'a','d'],
|
||||
'opts': []
|
||||
}
|
||||
bad_kwargs = {
|
||||
'ints': ['a','b','c'],
|
||||
'strings': [1,2,3],
|
||||
'multi': 'd'
|
||||
}
|
||||
df = MyDf(**good_kwargs)
|
||||
assert isinstance(df, BaseModel)
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
with pytest.raises(ValidationError):
|
||||
bad_df = MyDf(**bad_kwargs)
|
||||
|
||||
# can we do pydantic stuff
|
||||
assert df.model_dump() == good_kwargs
|
||||
# these throw when they fail
|
||||
_ = df.model_dump_json()
|
||||
_ = df.model_json_schema()
|
||||
|
||||
# can we do pandas stuff
|
||||
assert df['ints'].sum() == 6
|
||||
assert df.loc[2].to_list() == [3, 'c', 'a', None]
|
||||
# lmao
|
||||
|
||||
# we don't include the model when dumping/doing the schema
|
||||
assert 'df' not in df.model_json_schema()
|
||||
assert '_df' not in df.model_json_schema()
|
||||
|
||||
# we update our dataframe when we assign
|
||||
assert df.ints == good_kwargs['ints']
|
||||
assert df['ints'].tolist()[0:3] == good_kwargs['ints']
|
||||
df.ints = [1,2,3,4]
|
||||
assert df.ints == [1,2,3,4]
|
||||
assert (df['ints'] == pd.Series([1,2,3,4])).all()
|
||||
|
||||
df['ints'] = df['ints']._append(pd.Series(5))
|
||||
|
Loading…
Reference in a new issue