From 57fa3d34a276114ccf6fb32c2ea1286444599cfa Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <JLSaunders987@gmail.com>
Date: Sat, 23 Sep 2023 00:08:59 -0700
Subject: [PATCH] pandas dataframe mimic

---
 docs/_notes/todo.md                           |   3 +-
 nwb_linkml/poetry.lock                        | 134 +++++++++++++++++-
 nwb_linkml/pyproject.toml                     |   3 +-
 nwb_linkml/src/nwb_linkml/io/hdf5_scratch.py  |   9 ++
 nwb_linkml/src/nwb_linkml/types/__init__.py   |   3 +-
 nwb_linkml/src/nwb_linkml/types/df.py         | 111 +++++++++++++++
 nwb_linkml/tests/test_types/test_df.py        |  59 ++++++++
 .../{ndarray.py => test_ndarray.py}           |   0
 8 files changed, 317 insertions(+), 5 deletions(-)
 create mode 100644 nwb_linkml/src/nwb_linkml/types/df.py
 create mode 100644 nwb_linkml/tests/test_types/test_df.py
 rename nwb_linkml/tests/test_types/{ndarray.py => test_ndarray.py} (100%)

diff --git a/docs/_notes/todo.md b/docs/_notes/todo.md
index fad2cf1..019de15 100644
--- a/docs/_notes/todo.md
+++ b/docs/_notes/todo.md
@@ -2,4 +2,5 @@
 
 Stuff to keep track of that might have been manually overrided that needs to be fixed pre-release
 
-- Coerce all listlike things into lists if they are passed as single elements!
\ No newline at end of file
+- Coerce all listlike things into lists if they are passed as single elements!
+- Use [fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html) to interface with DANDI!
\ No newline at end of file
diff --git a/nwb_linkml/poetry.lock b/nwb_linkml/poetry.lock
index cbba7b4..9b624fa 100644
--- a/nwb_linkml/poetry.lock
+++ b/nwb_linkml/poetry.lock
@@ -1103,6 +1103,47 @@ files = [
     {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
 ]
 
+[[package]]
+name = "numpy"
+version = "1.26.0"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = "<3.13,>=3.9"
+files = [
+    {file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"},
+    {file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"},
+    {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"},
+    {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"},
+    {file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"},
+    {file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"},
+    {file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"},
+    {file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"},
+    {file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"},
+    {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"},
+    {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"},
+    {file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"},
+    {file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"},
+    {file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"},
+    {file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"},
+    {file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"},
+    {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"},
+    {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"},
+    {file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"},
+    {file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"},
+    {file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"},
+    {file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"},
+    {file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"},
+    {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"},
+    {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"},
+    {file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"},
+    {file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"},
+    {file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"},
+    {file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"},
+    {file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"},
+    {file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"},
+    {file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"},
+]
+
 [[package]]
 name = "nwb-schema-language"
 version = "0.1.1"
@@ -1143,6 +1184,73 @@ files = [
     {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
 ]
 
+[[package]]
+name = "pandas"
+version = "2.1.1"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
+    {file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
+    {file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
+    {file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
+    {file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
+    {file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
+    {file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
+    {file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
+    {file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
+    {file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
+    {file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
+    {file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
+    {file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
+    {file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
+    {file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
+    {file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
+    {file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
+    {file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
+    {file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
+    {file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
+    {file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
+    {file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
+    {file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
+    {file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
+    {file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+]
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.1"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
+aws = ["s3fs (>=2022.05.0)"]
+clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
+compression = ["zstandard (>=0.17.0)"]
+computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
+feather = ["pyarrow (>=7.0.0)"]
+fss = ["fsspec (>=2022.05.0)"]
+gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
+hdf5 = ["tables (>=3.7.0)"]
+html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
+mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
+parquet = ["pyarrow (>=7.0.0)"]
+performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
+plot = ["matplotlib (>=3.6.1)"]
+postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
+spss = ["pyreadstat (>=1.1.5)"]
+sql-other = ["SQLAlchemy (>=1.4.36)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.8.0)"]
+
 [[package]]
 name = "parse"
 version = "1.19.1"
@@ -1626,6 +1734,17 @@ files = [
 [package.dependencies]
 sortedcontainers = "*"
 
+[[package]]
+name = "pytz"
+version = "2023.3.post1"
+description = "World timezone definitions, modern and historical"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
+    {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.1"
@@ -2179,6 +2298,17 @@ files = [
     {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
 ]
 
+[[package]]
+name = "tzdata"
+version = "2023.3"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+files = [
+    {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
+    {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
+]
+
 [[package]]
 name = "uri-template"
 version = "1.3.0"
@@ -2386,5 +2516,5 @@ tests = ["coverage", "coveralls", "pytest", "pytest-cov", "pytest-depends", "pyt
 
 [metadata]
 lock-version = "2.0"
-python-versions = "^3.11"
-content-hash = "7a4e1c3b66143e4f4e8392238051241f25274ebd597183ef64168055949074f4"
+python-versions = ">=3.11,<3.13"
+content-hash = "0f2d9fc76cf3788fbdefc6f7b06afb7267c5fe2967970389907a5a9c4864334a"
diff --git a/nwb_linkml/pyproject.toml b/nwb_linkml/pyproject.toml
index f43701b..1f59cf5 100644
--- a/nwb_linkml/pyproject.toml
+++ b/nwb_linkml/pyproject.toml
@@ -11,7 +11,7 @@ packages = [
 ]
 
 [tool.poetry.dependencies]
-python = "^3.11"
+python = ">=3.11,<3.13"
 pyyaml = "^6.0"
 linkml-runtime = "^1.5.6"
 nwb_schema_language = "^0.1.1"
@@ -30,6 +30,7 @@ pytest-cov = {version = "^4.1.0", optional = true}
 coveralls = {version = "^3.3.1", optional = true}
 pytest-profiling = {version = "^1.7.0", optional = true}
 pydantic-settings = "^2.0.3"
+pandas = "^2.1.1"
 
 [tool.poetry.extras]
 tests = [
diff --git a/nwb_linkml/src/nwb_linkml/io/hdf5_scratch.py b/nwb_linkml/src/nwb_linkml/io/hdf5_scratch.py
index ed6c2d0..5580661 100644
--- a/nwb_linkml/src/nwb_linkml/io/hdf5_scratch.py
+++ b/nwb_linkml/src/nwb_linkml/io/hdf5_scratch.py
@@ -8,6 +8,15 @@ field more so that at each pass i can work through the items whose dependencies
 have been solved from the bottom up.
 """
 
+from typing import List
+from nwb_linkml.types.df import DataFrame
+
+class MyDf(DataFrame):
+    ints: List[int]
+
+a = MyDf(ints=[1,2,3])
+
+
 from nwb_linkml.io.hdf5 import HDF5IO, flatten_hdf
 import h5py
 from typing import NamedTuple, Tuple, Optional
diff --git a/nwb_linkml/src/nwb_linkml/types/__init__.py b/nwb_linkml/src/nwb_linkml/types/__init__.py
index 801a327..b0523c0 100644
--- a/nwb_linkml/src/nwb_linkml/types/__init__.py
+++ b/nwb_linkml/src/nwb_linkml/types/__init__.py
@@ -1 +1,2 @@
-from nwb_linkml.types.ndarray import NDArray
\ No newline at end of file
+from nwb_linkml.types.ndarray import NDArray
+from nwb_linkml.types.df import DataFrame
\ No newline at end of file
diff --git a/nwb_linkml/src/nwb_linkml/types/df.py b/nwb_linkml/src/nwb_linkml/types/df.py
new file mode 100644
index 0000000..8a5f47a
--- /dev/null
+++ b/nwb_linkml/src/nwb_linkml/types/df.py
@@ -0,0 +1,111 @@
+"""
+Pydantic models that behave like pandas dataframes
+"""
+import pdb
+from typing import List, Any, get_origin, get_args, Union, Optional, Dict
+from types import NoneType
+
+import numpy as np
+import pandas as pd
+from pydantic import (
+    BaseModel,
+    model_serializer,
+    SerializerFunctionWrapHandler,
+    ConfigDict,
+    model_validator
+)
+
+class DataFrame(BaseModel, pd.DataFrame):
+    """
+    Pydantic model root class that mimics a pandas dataframe.
+
+    Notes:
+
+        The synchronization between the underlying lists in the pydantic model
+        and the derived dataframe is partial, and at the moment unidirectional.
+        This class is primarily intended for reading from tables stored in
+        NWB files rather than being able to manipulate them.
+
+        The dataframe IS updated when new values are *assigned* to a field.
+
+        eg.::
+
+            MyModel.fieldval = [1,2,3]
+
+        But the dataframe is NOT updated when existing values are updated.
+
+        eg.::
+
+            MyModel.fieldval.append(4)
+
+        In that case you need to call :meth:`.update_df` manually.
+
+        Additionally, if the dataframe is modified, the underlying lists are NOT updated,
+        but when the model is dumped to a dictionary or serialized, the dataframe IS used,
+        so changes will be reflected then.
+
+    """
+
+    _df: pd.DataFrame = None
+    model_config = ConfigDict(validate_assignment=True)
+    def __init__(self, **kwargs):
+        # pdb.set_trace()
+        super().__init__(**kwargs)
+
+        self._df = self.__make_df()
+
+
+    def __make_df(self) -> pd.DataFrame:
+        # make dict that can handle ragged arrays and NoneTypes
+        items = {k:v for k,v in self.__dict__.items() if k in self.model_fields}
+
+        df_dict = {k: (pd.Series(v) if isinstance(v, list) else pd.Series([v]))
+                   for k,v in items.items()}
+        df = pd.DataFrame(df_dict)
+        # replace Nans with None
+        df = df.fillna(np.nan).replace([np.nan], [None])
+        return df
+
+    def update_df(self):
+        """
+        Update the internal dataframe in the case that the model values are changed
+        in a way that we can't detect, like appending to one of the lists.
+
+        """
+        self._df = self.__make_df()
+
+    def __getattr__(self, item: str):
+        """
+        Mimic pandas dataframe and pydantic model behavior
+        """
+        if item in ('df', '_df'):
+            return self.__pydantic_private__['_df']
+        elif item in self.model_fields.keys():
+            return self._df[item]
+        else:
+            try:
+                return object.__getattribute__(self._df, item)
+            except AttributeError:
+                return object.__getattribute__(self, item)
+    @model_validator(mode='after')
+    def recreate_df(self):
+        """Remake DF when validating (eg. when updating values on assignment)"""
+        self.update_df()
+
+    @model_serializer(mode='wrap', when_used='always')
+    def serialize_model(self, nxt: SerializerFunctionWrapHandler) -> Dict[str, Any]:
+        """
+        We don't handle values that are changed
+
+        """
+        if self._df is None:
+            return nxt(self)
+        else:
+            out = self._df.to_dict('list')
+            # remove Nones
+            out = {
+                k: [inner_v for inner_v in v if inner_v is not None]
+                for k, v in out.items()
+            }
+
+            return nxt(self.__class__(**out))
diff --git a/nwb_linkml/tests/test_types/test_df.py b/nwb_linkml/tests/test_types/test_df.py
new file mode 100644
index 0000000..d4bef18
--- /dev/null
+++ b/nwb_linkml/tests/test_types/test_df.py
@@ -0,0 +1,59 @@
+import pytest
+
+import pandas as pd
+from pydantic import BaseModel, ValidationError
+from typing import List, Union, Optional
+from nwb_linkml.types import DataFrame
+
+def test_df():
+    """
+    Dataframe class should behave like both a pydantic model and a dataframe
+    """
+
+class MyDf(DataFrame):
+    ints: List[int]
+    strings: List[str]
+    multi:  List[int | str]
+    opts: Optional[List[int]] = None
+
+    good_kwargs = {
+        'ints': [1,2,3],
+        'strings': ['a','b','c'],
+        'multi': [1,2,'a','d'],
+        'opts': []
+    }
+    bad_kwargs = {
+        'ints': ['a','b','c'],
+        'strings': [1,2,3],
+        'multi': 'd'
+    }
+    df = MyDf(**good_kwargs)
+    assert isinstance(df, BaseModel)
+    assert isinstance(df, pd.DataFrame)
+    with pytest.raises(ValidationError):
+        bad_df = MyDf(**bad_kwargs)
+
+    # can we do pydantic stuff
+    assert df.model_dump() == good_kwargs
+    # these throw when they fail
+    _ = df.model_dump_json()
+    _ = df.model_json_schema()
+
+    # can we do pandas stuff
+    assert df['ints'].sum() == 6
+    assert df.loc[2].to_list() == [3, 'c', 'a', None]
+    # lmao
+
+    # we don't include the model when dumping/doing the schema
+    assert 'df' not in df.model_json_schema()
+    assert '_df' not in df.model_json_schema()
+
+    # we update our dataframe when we assign
+    assert df.ints == good_kwargs['ints']
+    assert df['ints'].tolist()[0:3] == good_kwargs['ints']
+    df.ints = [1,2,3,4]
+    assert df.ints == [1,2,3,4]
+    assert (df['ints'] == pd.Series([1,2,3,4])).all()
+
+    df['ints'] = df['ints']._append(pd.Series(5))
+
diff --git a/nwb_linkml/tests/test_types/ndarray.py b/nwb_linkml/tests/test_types/test_ndarray.py
similarity index 100%
rename from nwb_linkml/tests/test_types/ndarray.py
rename to nwb_linkml/tests/test_types/test_ndarray.py