From 111965aacefc854324b9d7bc7aa4f15e3258ee6e Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Fri, 18 Aug 2023 22:09:28 -0700 Subject: [PATCH] working schema language import --- nwb-schema-language/Makefile | 7 +- .../datamodel/nwb_schema_pydantic.py | 14 +-- .../src/nwb_schema_language/patches.py | 38 +++++-- .../schema/nwb_schema_language.yaml | 6 +- nwb_linkml/__init__.py | 1 + nwb_linkml/io.py | 101 ++++++++++++++++-- nwb_linkml/map.py | 67 ++++++++++++ nwb_linkml/maps/__init__.py | 2 + nwb_linkml/maps/preload.py | 22 ++++ poetry.lock | 69 +++++++++++- pyproject.toml | 1 + 11 files changed, 301 insertions(+), 27 deletions(-) create mode 100644 nwb_linkml/map.py create mode 100644 nwb_linkml/maps/preload.py diff --git a/nwb-schema-language/Makefile b/nwb-schema-language/Makefile index 595e8e2..9fa7103 100644 --- a/nwb-schema-language/Makefile +++ b/nwb-schema-language/Makefile @@ -108,7 +108,12 @@ gen-project: $(PYMODEL) gen-pydantic: $(PYMODEL) $(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py - $(RUN) run_patches --phase post_generation + $(RUN) run_patches --phase post_generation_pydantic + +gen-pydantic-test: $(PYMODEL) + $(RUN) gen-pydantic src/nwb_schema_language/schema/test_multival.yml --pydantic_version 1 > $(PYMODEL)/test_multival_pydantic.py +# $(RUN) run_patches --phase post_generation + test: test-schema test-python test-examples diff --git a/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py b/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py index e7c0d2d..c986b47 100644 --- a/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py +++ b/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py @@ -168,7 +168,7 @@ class CompoundDtype(ConfiguredBaseModel): class DtypeMixin(ConfiguredBaseModel): - dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list) + dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list) class Attribute(DtypeMixin): @@ -180,7 +180,7 @@ class Attribute(DtypeMixin): default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""") doc: str = Field(..., description="""Description of corresponding object.""") required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""") - dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list) + dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list) class NamingMixin(ConfiguredBaseModel): @@ -192,8 +192,8 @@ class NamingMixin(ConfiguredBaseModel): class Group(NamingMixin): - neurodata_type_def: Optional[str] = Field(None) - neurodata_type_inc: Optional[str] = Field(None) + neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""") + neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""") name: Optional[str] = Field(None) default_name: Optional[str] = Field(None) doc: str = Field(..., description="""Description of corresponding object.""") @@ -207,8 +207,8 @@ class Group(NamingMixin): class Dataset(NamingMixin, DtypeMixin): - neurodata_type_def: Optional[str] = Field(None) - neurodata_type_inc: Optional[str] = Field(None) + neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""") + neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""") name: Optional[str] = Field(None) default_name: Optional[str] = Field(None) dims: Optional[List[Union[Any, str]]] = Field(default_factory=list) @@ -219,7 +219,7 @@ class Dataset(NamingMixin, DtypeMixin): quantity: Optional[Union[QuantityEnum, int]] = Field(1) linkable: Optional[bool] = Field(None) attributes: Optional[List[Attribute]] = Field(default_factory=list) - dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list) + dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list) diff --git a/nwb-schema-language/src/nwb_schema_language/patches.py b/nwb-schema-language/src/nwb_schema_language/patches.py index 4388f50..3fd58df 100644 --- a/nwb-schema-language/src/nwb_schema_language/patches.py +++ b/nwb-schema-language/src/nwb_schema_language/patches.py @@ -11,7 +11,9 @@ import argparse import pprint class Phases(StrEnum): - post_generation = "post_generation" + post_generation_pydantic = "post_generation_pydantic" + post_load_yaml = "post_load_yaml" + """After the yaml of the nwb schema classes is loaded""" @dataclass class Patch: @@ -34,18 +36,42 @@ class Patch: ### Patches +## Patches for the generated pydantic classes + patch_schema_slot = Patch( - phase=Phases.post_generation, + phase=Phases.post_generation_pydantic, path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'), - match=r"\n\s*(schema)(.*Field\()(.*)", - replacement=r'\n schema_\2alias="schema", \3', + match=r"\n\s*(schema:)(.*Field\()(.*)", + replacement=r'\n schema_:\2alias="schema", \3', +) + +# patch_neurodata_type_def_alias = Patch( +# phase=Phases.post_generation_pydantic, +# path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'), +# match=r"(\n\s*neurodata_type_def.*Field\(None, )(.*)", +# replacement=r'\1alias="data_type_def", \2', +# ) +# +# patch_neurodata_type_inc_alias = Patch( +# phase=Phases.post_generation_pydantic, +# path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'), +# match=r"(\n\s*neurodata_type_inc.*Field\(None, )(.*)", +# replacement=r'\1alias="data_type_inc", \2', +# ) + +patch_dtype_single_multiple = Patch( + phase=Phases.post_generation_pydantic, + path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'), + match=r"(\n\s*dtype: Optional\[)List\[Union\[CompoundDtype, (FlatDtype, ReferenceDtype\]\])\]", + replacement=r'\1Union[List[CompoundDtype], \2', ) def run_patches(phase:Phases, verbose:bool=False): patches = [p for p in Patch.instances if p.phase == phase] for patch in patches: - print('Patching:') - pprint.pprint(patch) + if verbose: + print('Patching:') + pprint.pprint(patch) with open(patch.path, 'r') as pfile: string = pfile.read() string = re.sub(patch.match, patch.replacement, string) diff --git a/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml b/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml index 0848300..a92bff9 100644 --- a/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml +++ b/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml @@ -157,11 +157,11 @@ classes: - preconditions: slot_conditions: dtype: - range: CompoundDtype + range: FlatDtype postconditions: slot_conditions: dtype: - multivalued: true + multivalued: false NamingMixin: mixin: true @@ -239,10 +239,12 @@ slots: # groups neurodata_type_def: + description: Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins structured_pattern: syntax: "{protected_string}" interpolated: true neurodata_type_inc: + description: Used alongside neurodata_type_def to indicate inheritance, naming, and mixins structured_pattern: syntax: "{protected_string}" interpolated: true diff --git a/nwb_linkml/__init__.py b/nwb_linkml/__init__.py index e69de29..2ea95fc 100644 --- a/nwb_linkml/__init__.py +++ b/nwb_linkml/__init__.py @@ -0,0 +1 @@ +from nwb_linkml.maps import preload \ No newline at end of file diff --git a/nwb_linkml/io.py b/nwb_linkml/io.py index cc4fd5a..e7d62cc 100644 --- a/nwb_linkml/io.py +++ b/nwb_linkml/io.py @@ -2,31 +2,51 @@ Loading/saving NWB Schema yaml files """ from pathlib import Path -from typing import TypedDict, List +from typing import TypedDict, List, Dict from pprint import pprint +import warnings from linkml_runtime.loaders import yaml_loader import yaml -from nwb_schema_language import Namespaces, Group, Dataset -from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO +from nwb_schema_language import Namespaces, Namespace, Group, Dataset +from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO +from nwb_linkml.maps import preload +from nwb_linkml.map import PHASES, Map +class NamespaceBundle(TypedDict): + """ + A complete namespaces file and all indicated schema files + """ + namespace: Namespaces + schema: Dict[str, List[Dataset | Group]] +def load_yaml(path:Path) -> dict: + with open(path, 'r') as file: + ns_dict = yaml.safe_load(file) + + # apply maps + maps = [m for m in Map.instances if m.phase == PHASES.postload] + print('got maps') + print(maps) + for amap in maps: + ns_dict = amap.apply(ns_dict) + return ns_dict def load_namespaces(path:Path|NamespaceRepo) -> Namespaces: if isinstance(path, NamespaceRepo): path = path.provide_from_git() - namespaces = yaml_loader.load(str(path), target_class=Namespaces) + ns_dict = load_yaml(path) + + + namespaces = yaml_loader.load(ns_dict, target_class=Namespaces) return namespaces -class SchemaFile(TypedDict): - datasets: List[Dataset] - groups: List[Group] + def load_schema_file(path:Path) -> List[Dataset | Group]: - with open(path, 'r') as yfile: - source = yaml.safe_load(yfile) + source = load_yaml(path) schema = [] @@ -37,13 +57,74 @@ def load_schema_file(path:Path) -> List[Dataset | Group]: pprint(dataset) raise e + for group in source.get('groups', []): + try: + schema.append(Group(**group)) + except Exception as e: + pprint(group) + raise e + #schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])]) #schema.extend([Group(**group) for group in source.get('groups', [])]) return schema -def load_nwb_core(): +def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')) -> Dict[str, List[Dataset | Group]]: + """ + Load all schema referenced by a namespace file + + Args: + namespace (:class:`.Namespace`): + path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this + + Returns: + List[Union[Dataset|Group]] + """ + if isinstance(namespace, Namespace): + ns_iter = [namespace] + elif isinstance(namespace, Namespaces): + ns_iter = namespace.namespaces + else: + raise TypeError("Need to pass a namespace or namespaces :)") + + path = Path(path).resolve() + if path.is_file(): + # given the namespace file itself, so find paths relative to its directory + path = path.parent + + sch = {} + for ns in ns_iter: + for schema in ns.schema_: + if schema.source is None: + warnings.warn(f"No source specified for {schema}") + continue + yml_file = (path / schema.source).resolve() + sch[schema.source] = load_schema_file(yml_file) + + return sch + +def load_nwb_core() -> Dict[str, NamespaceBundle]: + # First get hdmf-common: + hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git() + hdmf_ns = load_namespaces(hdmf_ns_file) + hdmf_schema = load_namespace_schema(hdmf_ns, hdmf_ns_file) + namespace_file = NWB_CORE_REPO.provide_from_git() ns = load_namespaces(namespace_file) + schema = load_namespace_schema(ns, namespace_file) + + return { + 'hdmf-common': NamespaceBundle( + namespace=hdmf_ns, + schema=hdmf_schema + ), + 'nwb-core': NamespaceBundle( + namespace=ns, + schema=schema + ) + } + + + diff --git a/nwb_linkml/map.py b/nwb_linkml/map.py new file mode 100644 index 0000000..f901a66 --- /dev/null +++ b/nwb_linkml/map.py @@ -0,0 +1,67 @@ +from dataclasses import dataclass +from typing import ClassVar, List, Optional +from enum import StrEnum +import ast +import re + +class MAP_TYPES(StrEnum): + key = 'key' + """Mapping the name of one key to another key""" + +class SCOPE_TYPES(StrEnum): + namespace = 'namespace' + +class PHASES(StrEnum): + postload = "postload" + """After the YAML for a model has been loaded""" + + +@dataclass +class Map: + scope: str + """The namespace that the map is relevant to""" + scope_type: SCOPE_TYPES + + source: str + """The path within the schema to select the element to transform""" + target: str + """The path where the element should end""" + + transform: Optional[callable] = None + """ + Some transformation function, currently not implemented. + """ + + phase: Optional[PHASES] = None + + + instances: ClassVar[List['Map']] = [] + """ + Maps that get defined!!! + """ + + def apply(self): + raise NotImplementedError('do this in a subclass') + + def __post_init__(self): + self.instances.append(self) + + + +# def replace_keys(input: dict, source: str, target: str) -> dict: +# """Recursively change keys in a dictionary""" + + +class KeyMap(Map): + def apply(self, input: dict) -> dict: + """ + Change all keys from source to target in a super naive way. + + Convert the dictionary to a string. Do regex. parse ast + """ + input_str = str(input) + input_str = re.sub(self.source, self.target, input_str) + out = ast.literal_eval(input_str) + return out + + diff --git a/nwb_linkml/maps/__init__.py b/nwb_linkml/maps/__init__.py index e69de29..6e2a50c 100644 --- a/nwb_linkml/maps/__init__.py +++ b/nwb_linkml/maps/__init__.py @@ -0,0 +1,2 @@ +# Import everything so it's defined, but shoudlnt' necessarily be used from here +from nwb_linkml.maps.preload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC \ No newline at end of file diff --git a/nwb_linkml/maps/preload.py b/nwb_linkml/maps/preload.py new file mode 100644 index 0000000..489c6ce --- /dev/null +++ b/nwb_linkml/maps/preload.py @@ -0,0 +1,22 @@ +""" +Maps to change the loaded .yaml from nwb schema before it's +""" + +from nwb_linkml.map import KeyMap, SCOPE_TYPES, PHASES + +MAP_HDMF_DATATYPE_DEF = KeyMap( + source="\'data_type_def\'", + target="\'neurodata_type_def\'", + scope='hdmf-common', + scope_type=SCOPE_TYPES.namespace, + phase=PHASES.postload +) + +MAP_HDMF_DATATYPE_INC = KeyMap( + source="\'data_type_inc\'", + target="\'neurodata_type_inc\'", + scope='hdmf-common', + scope_type=SCOPE_TYPES.namespace, + phase=PHASES.postload +) + diff --git a/poetry.lock b/poetry.lock index fb19e82..c408568 100644 --- a/poetry.lock +++ b/poetry.lock @@ -408,6 +408,41 @@ pyyaml = "*" rdflib = ">=6.0.0" requests = "*" +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "nwb-schema-language" version = "0.1.0" @@ -544,6 +579,20 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, + {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, +] + +[package.extras] +plugins = ["importlib-metadata"] + [[package]] name = "pyparsing" version = "3.1.1" @@ -710,6 +759,24 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rich" +version = "13.5.2" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, + {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "rpds-py" version = "0.9.2" @@ -968,4 +1035,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "71a9529af92fd8aeca93d42ec2e5bfcc6129787958795a367e145c9ed97002e1" +content-hash = "32b9026599bc647c7c91a582f2629cd4bf23ca6bbedecd62ac455cfde4c8ed5c" diff --git a/pyproject.toml b/pyproject.toml index aac6898..5e84214 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ pyyaml = "^6.0" linkml-runtime = "^1.5.6" nwb_schema_language = { path = './nwb-schema-language', develop = true } pydantic = "<2" +rich = "^13.5.2" [build-system]