From 111965aacefc854324b9d7bc7aa4f15e3258ee6e Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <JLSaunders987@gmail.com>
Date: Fri, 18 Aug 2023 22:09:28 -0700
Subject: [PATCH] working schema language import

---
 nwb-schema-language/Makefile                  |   7 +-
 .../datamodel/nwb_schema_pydantic.py          |  14 +--
 .../src/nwb_schema_language/patches.py        |  38 +++++--
 .../schema/nwb_schema_language.yaml           |   6 +-
 nwb_linkml/__init__.py                        |   1 +
 nwb_linkml/io.py                              | 101 ++++++++++++++++--
 nwb_linkml/map.py                             |  67 ++++++++++++
 nwb_linkml/maps/__init__.py                   |   2 +
 nwb_linkml/maps/preload.py                    |  22 ++++
 poetry.lock                                   |  69 +++++++++++-
 pyproject.toml                                |   1 +
 11 files changed, 301 insertions(+), 27 deletions(-)
 create mode 100644 nwb_linkml/map.py
 create mode 100644 nwb_linkml/maps/preload.py

diff --git a/nwb-schema-language/Makefile b/nwb-schema-language/Makefile
index 595e8e2..9fa7103 100644
--- a/nwb-schema-language/Makefile
+++ b/nwb-schema-language/Makefile
@@ -108,7 +108,12 @@ gen-project: $(PYMODEL)
 
 gen-pydantic: $(PYMODEL)
 	$(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py
-	$(RUN) run_patches --phase post_generation
+	$(RUN) run_patches --phase post_generation_pydantic
+
+gen-pydantic-test: $(PYMODEL)
+	$(RUN) gen-pydantic src/nwb_schema_language/schema/test_multival.yml --pydantic_version 1 > $(PYMODEL)/test_multival_pydantic.py
+#	$(RUN) run_patches --phase post_generation
+
 
 test: test-schema test-python test-examples
 
diff --git a/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py b/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
index e7c0d2d..c986b47 100644
--- a/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
+++ b/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
@@ -168,7 +168,7 @@ class CompoundDtype(ConfiguredBaseModel):
 
 class DtypeMixin(ConfiguredBaseModel):
     
-    dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
+    dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
     
 
 class Attribute(DtypeMixin):
@@ -180,7 +180,7 @@ class Attribute(DtypeMixin):
     default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
     doc: str = Field(..., description="""Description of corresponding object.""")
     required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""")
-    dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
+    dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
     
 
 class NamingMixin(ConfiguredBaseModel):
@@ -192,8 +192,8 @@ class NamingMixin(ConfiguredBaseModel):
 
 class Group(NamingMixin):
     
-    neurodata_type_def: Optional[str] = Field(None)
-    neurodata_type_inc: Optional[str] = Field(None)
+    neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""")
+    neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""")
     name: Optional[str] = Field(None)
     default_name: Optional[str] = Field(None)
     doc: str = Field(..., description="""Description of corresponding object.""")
@@ -207,8 +207,8 @@ class Group(NamingMixin):
 
 class Dataset(NamingMixin, DtypeMixin):
     
-    neurodata_type_def: Optional[str] = Field(None)
-    neurodata_type_inc: Optional[str] = Field(None)
+    neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""")
+    neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""")
     name: Optional[str] = Field(None)
     default_name: Optional[str] = Field(None)
     dims: Optional[List[Union[Any, str]]] = Field(default_factory=list)
@@ -219,7 +219,7 @@ class Dataset(NamingMixin, DtypeMixin):
     quantity: Optional[Union[QuantityEnum, int]] = Field(1)
     linkable: Optional[bool] = Field(None)
     attributes: Optional[List[Attribute]] = Field(default_factory=list)
-    dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
+    dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
     
 
 
diff --git a/nwb-schema-language/src/nwb_schema_language/patches.py b/nwb-schema-language/src/nwb_schema_language/patches.py
index 4388f50..3fd58df 100644
--- a/nwb-schema-language/src/nwb_schema_language/patches.py
+++ b/nwb-schema-language/src/nwb_schema_language/patches.py
@@ -11,7 +11,9 @@ import argparse
 import pprint
 
 class Phases(StrEnum):
-    post_generation = "post_generation"
+    post_generation_pydantic = "post_generation_pydantic"
+    post_load_yaml = "post_load_yaml"
+    """After the yaml of the nwb schema classes is loaded"""
 
 @dataclass
 class Patch:
@@ -34,18 +36,42 @@ class Patch:
 
 ### Patches
 
+## Patches for the generated pydantic classes
+
 patch_schema_slot = Patch(
-    phase=Phases.post_generation,
+    phase=Phases.post_generation_pydantic,
     path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
-    match=r"\n\s*(schema)(.*Field\()(.*)",
-    replacement=r'\n    schema_\2alias="schema", \3',
+    match=r"\n\s*(schema:)(.*Field\()(.*)",
+    replacement=r'\n    schema_:\2alias="schema", \3',
+)
+
+# patch_neurodata_type_def_alias = Patch(
+#     phase=Phases.post_generation_pydantic,
+#     path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
+#     match=r"(\n\s*neurodata_type_def.*Field\(None, )(.*)",
+#     replacement=r'\1alias="data_type_def", \2',
+# )
+#
+# patch_neurodata_type_inc_alias = Patch(
+#     phase=Phases.post_generation_pydantic,
+#     path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
+#     match=r"(\n\s*neurodata_type_inc.*Field\(None, )(.*)",
+#     replacement=r'\1alias="data_type_inc", \2',
+# )
+
+patch_dtype_single_multiple = Patch(
+    phase=Phases.post_generation_pydantic,
+    path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
+    match=r"(\n\s*dtype: Optional\[)List\[Union\[CompoundDtype, (FlatDtype, ReferenceDtype\]\])\]",
+    replacement=r'\1Union[List[CompoundDtype], \2',
 )
 
 def run_patches(phase:Phases, verbose:bool=False):
     patches = [p for p in Patch.instances if p.phase == phase]
     for patch in patches:
-        print('Patching:')
-        pprint.pprint(patch)
+        if verbose:
+            print('Patching:')
+            pprint.pprint(patch)
         with open(patch.path, 'r') as pfile:
             string = pfile.read()
         string = re.sub(patch.match, patch.replacement, string)
diff --git a/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml b/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml
index 0848300..a92bff9 100644
--- a/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml
+++ b/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml
@@ -157,11 +157,11 @@ classes:
       - preconditions:
           slot_conditions:
             dtype:
-              range: CompoundDtype
+              range: FlatDtype
         postconditions:
           slot_conditions:
             dtype:
-              multivalued: true
+              multivalued: false
 
   NamingMixin:
     mixin: true
@@ -239,10 +239,12 @@ slots:
 
   # groups
   neurodata_type_def:
+    description: Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins
     structured_pattern:
       syntax: "{protected_string}"
       interpolated: true
   neurodata_type_inc:
+    description: Used alongside neurodata_type_def to indicate inheritance, naming, and mixins
     structured_pattern:
       syntax: "{protected_string}"
       interpolated: true
diff --git a/nwb_linkml/__init__.py b/nwb_linkml/__init__.py
index e69de29..2ea95fc 100644
--- a/nwb_linkml/__init__.py
+++ b/nwb_linkml/__init__.py
@@ -0,0 +1 @@
+from nwb_linkml.maps import preload
\ No newline at end of file
diff --git a/nwb_linkml/io.py b/nwb_linkml/io.py
index cc4fd5a..e7d62cc 100644
--- a/nwb_linkml/io.py
+++ b/nwb_linkml/io.py
@@ -2,31 +2,51 @@
 Loading/saving NWB Schema yaml files
 """
 from pathlib import Path
-from typing import TypedDict, List
+from typing import TypedDict, List, Dict
 from pprint import pprint
+import warnings
 
 from linkml_runtime.loaders import yaml_loader
 import yaml
 
-from nwb_schema_language import Namespaces, Group, Dataset
-from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO
+from nwb_schema_language import Namespaces, Namespace, Group, Dataset
+from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
+from nwb_linkml.maps import preload
+from nwb_linkml.map import PHASES, Map
 
+class NamespaceBundle(TypedDict):
+    """
+    A complete namespaces file and all indicated schema files
+    """
+    namespace: Namespaces
+    schema: Dict[str, List[Dataset | Group]]
 
+def load_yaml(path:Path) -> dict:
+    with open(path, 'r') as file:
+        ns_dict = yaml.safe_load(file)
+
+    # apply maps
+    maps = [m for m in Map.instances if m.phase == PHASES.postload]
+    print('got maps')
+    print(maps)
+    for amap in maps:
+        ns_dict = amap.apply(ns_dict)
+    return ns_dict
 
 def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
     if isinstance(path, NamespaceRepo):
         path = path.provide_from_git()
 
-    namespaces = yaml_loader.load(str(path), target_class=Namespaces)
+    ns_dict = load_yaml(path)
+
+
+    namespaces = yaml_loader.load(ns_dict, target_class=Namespaces)
     return namespaces
 
-class SchemaFile(TypedDict):
-    datasets: List[Dataset]
-    groups: List[Group]
+
 
 def load_schema_file(path:Path) -> List[Dataset | Group]:
-    with open(path, 'r') as yfile:
-        source = yaml.safe_load(yfile)
+    source = load_yaml(path)
 
     schema = []
 
@@ -37,13 +57,74 @@ def load_schema_file(path:Path) -> List[Dataset | Group]:
             pprint(dataset)
             raise e
 
+    for group in source.get('groups', []):
+        try:
+            schema.append(Group(**group))
+        except Exception as e:
+            pprint(group)
+            raise e
+
     #schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])])
     #schema.extend([Group(**group) for group in source.get('groups', [])])
     return schema
 
-def load_nwb_core():
+def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')) -> Dict[str, List[Dataset | Group]]:
+    """
+    Load all schema referenced by a namespace file
+
+    Args:
+        namespace (:class:`.Namespace`):
+        path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this
+
+    Returns:
+        List[Union[Dataset|Group]]
+    """
+    if isinstance(namespace, Namespace):
+        ns_iter = [namespace]
+    elif isinstance(namespace, Namespaces):
+        ns_iter = namespace.namespaces
+    else:
+        raise TypeError("Need to pass a namespace or namespaces :)")
+
+    path = Path(path).resolve()
+    if path.is_file():
+        # given the namespace file itself, so find paths relative to its directory
+        path = path.parent
+
+    sch = {}
+    for ns in ns_iter:
+        for schema in ns.schema_:
+            if schema.source is None:
+                warnings.warn(f"No source specified for {schema}")
+                continue
+            yml_file = (path / schema.source).resolve()
+            sch[schema.source] = load_schema_file(yml_file)
+
+    return sch
+
+def load_nwb_core() -> Dict[str, NamespaceBundle]:
+    # First get hdmf-common:
+    hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git()
+    hdmf_ns = load_namespaces(hdmf_ns_file)
+    hdmf_schema = load_namespace_schema(hdmf_ns, hdmf_ns_file)
+
     namespace_file = NWB_CORE_REPO.provide_from_git()
     ns = load_namespaces(namespace_file)
+    schema = load_namespace_schema(ns, namespace_file)
+
+    return {
+        'hdmf-common': NamespaceBundle(
+            namespace=hdmf_ns,
+            schema=hdmf_schema
+        ),
+        'nwb-core': NamespaceBundle(
+            namespace=ns,
+            schema=schema
+        )
+    }
+
+
+
 
 
 
diff --git a/nwb_linkml/map.py b/nwb_linkml/map.py
new file mode 100644
index 0000000..f901a66
--- /dev/null
+++ b/nwb_linkml/map.py
@@ -0,0 +1,67 @@
+from dataclasses import dataclass
+from typing import ClassVar, List, Optional
+from enum import StrEnum
+import ast
+import re
+
+class MAP_TYPES(StrEnum):
+    key = 'key'
+    """Mapping the name of one key to another key"""
+
+class SCOPE_TYPES(StrEnum):
+    namespace = 'namespace'
+
+class PHASES(StrEnum):
+    postload = "postload"
+    """After the YAML for a model has been loaded"""
+
+
+@dataclass
+class Map:
+    scope: str
+    """The namespace that the map is relevant to"""
+    scope_type: SCOPE_TYPES
+
+    source: str
+    """The path within the schema to select the element to transform"""
+    target: str
+    """The path where the element should end"""
+
+    transform: Optional[callable] = None
+    """
+    Some transformation function, currently not implemented.
+    """
+
+    phase: Optional[PHASES] = None
+
+
+    instances: ClassVar[List['Map']] = []
+    """
+    Maps that get defined!!!
+    """
+
+    def apply(self):
+        raise NotImplementedError('do this in a subclass')
+
+    def __post_init__(self):
+        self.instances.append(self)
+
+
+
+# def replace_keys(input: dict, source: str, target: str) -> dict:
+#     """Recursively change keys in a dictionary"""
+
+
+class KeyMap(Map):
+    def apply(self, input: dict) -> dict:
+        """
+        Change all keys from source to target in a super naive way.
+
+        Convert the dictionary to a string. Do regex. parse ast
+        """
+        input_str = str(input)
+        input_str = re.sub(self.source, self.target, input_str)
+        out = ast.literal_eval(input_str)
+        return out
+
+
diff --git a/nwb_linkml/maps/__init__.py b/nwb_linkml/maps/__init__.py
index e69de29..6e2a50c 100644
--- a/nwb_linkml/maps/__init__.py
+++ b/nwb_linkml/maps/__init__.py
@@ -0,0 +1,2 @@
+# Import everything so it's defined, but shoudlnt' necessarily be used from here
+from nwb_linkml.maps.preload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC
\ No newline at end of file
diff --git a/nwb_linkml/maps/preload.py b/nwb_linkml/maps/preload.py
new file mode 100644
index 0000000..489c6ce
--- /dev/null
+++ b/nwb_linkml/maps/preload.py
@@ -0,0 +1,22 @@
+"""
+Maps to change the loaded .yaml from nwb schema before it's
+"""
+
+from nwb_linkml.map import KeyMap, SCOPE_TYPES, PHASES
+
+MAP_HDMF_DATATYPE_DEF = KeyMap(
+    source="\'data_type_def\'",
+    target="\'neurodata_type_def\'",
+    scope='hdmf-common',
+    scope_type=SCOPE_TYPES.namespace,
+    phase=PHASES.postload
+)
+
+MAP_HDMF_DATATYPE_INC = KeyMap(
+    source="\'data_type_inc\'",
+    target="\'neurodata_type_inc\'",
+    scope='hdmf-common',
+    scope_type=SCOPE_TYPES.namespace,
+    phase=PHASES.postload
+)
+
diff --git a/poetry.lock b/poetry.lock
index fb19e82..c408568 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -408,6 +408,41 @@ pyyaml = "*"
 rdflib = ">=6.0.0"
 requests = "*"
 
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+    {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+    {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
 [[package]]
 name = "nwb-schema-language"
 version = "0.1.0"
@@ -544,6 +579,20 @@ typing-extensions = ">=4.2.0"
 dotenv = ["python-dotenv (>=0.10.4)"]
 email = ["email-validator (>=1.0.3)"]
 
+[[package]]
+name = "pygments"
+version = "2.16.1"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
+    {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
+]
+
+[package.extras]
+plugins = ["importlib-metadata"]
+
 [[package]]
 name = "pyparsing"
 version = "3.1.1"
@@ -710,6 +759,24 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "rich"
+version = "13.5.2"
+description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"},
+    {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"},
+]
+
+[package.dependencies]
+markdown-it-py = ">=2.2.0"
+pygments = ">=2.13.0,<3.0.0"
+
+[package.extras]
+jupyter = ["ipywidgets (>=7.5.1,<9)"]
+
 [[package]]
 name = "rpds-py"
 version = "0.9.2"
@@ -968,4 +1035,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "71a9529af92fd8aeca93d42ec2e5bfcc6129787958795a367e145c9ed97002e1"
+content-hash = "32b9026599bc647c7c91a582f2629cd4bf23ca6bbedecd62ac455cfde4c8ed5c"
diff --git a/pyproject.toml b/pyproject.toml
index aac6898..5e84214 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ pyyaml = "^6.0"
 linkml-runtime = "^1.5.6"
 nwb_schema_language = { path = './nwb-schema-language', develop = true }
 pydantic = "<2"
+rich = "^13.5.2"
 
 
 [build-system]