working schema language import

This commit is contained in:
sneakers-the-rat 2023-08-18 22:09:28 -07:00
parent fe44c1c657
commit 111965aace
11 changed files with 301 additions and 27 deletions

View file

@ -108,7 +108,12 @@ gen-project: $(PYMODEL)
gen-pydantic: $(PYMODEL)
$(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py
$(RUN) run_patches --phase post_generation
$(RUN) run_patches --phase post_generation_pydantic
gen-pydantic-test: $(PYMODEL)
$(RUN) gen-pydantic src/nwb_schema_language/schema/test_multival.yml --pydantic_version 1 > $(PYMODEL)/test_multival_pydantic.py
# $(RUN) run_patches --phase post_generation
test: test-schema test-python test-examples

View file

@ -168,7 +168,7 @@ class CompoundDtype(ConfiguredBaseModel):
class DtypeMixin(ConfiguredBaseModel):
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
class Attribute(DtypeMixin):
@ -180,7 +180,7 @@ class Attribute(DtypeMixin):
default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
doc: str = Field(..., description="""Description of corresponding object.""")
required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""")
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
class NamingMixin(ConfiguredBaseModel):
@ -192,8 +192,8 @@ class NamingMixin(ConfiguredBaseModel):
class Group(NamingMixin):
neurodata_type_def: Optional[str] = Field(None)
neurodata_type_inc: Optional[str] = Field(None)
neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""")
neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""")
name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None)
doc: str = Field(..., description="""Description of corresponding object.""")
@ -207,8 +207,8 @@ class Group(NamingMixin):
class Dataset(NamingMixin, DtypeMixin):
neurodata_type_def: Optional[str] = Field(None)
neurodata_type_inc: Optional[str] = Field(None)
neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""")
neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""")
name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None)
dims: Optional[List[Union[Any, str]]] = Field(default_factory=list)
@ -219,7 +219,7 @@ class Dataset(NamingMixin, DtypeMixin):
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(default_factory=list)
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)

View file

@ -11,7 +11,9 @@ import argparse
import pprint
class Phases(StrEnum):
post_generation = "post_generation"
post_generation_pydantic = "post_generation_pydantic"
post_load_yaml = "post_load_yaml"
"""After the yaml of the nwb schema classes is loaded"""
@dataclass
class Patch:
@ -34,18 +36,42 @@ class Patch:
### Patches
## Patches for the generated pydantic classes
patch_schema_slot = Patch(
phase=Phases.post_generation,
phase=Phases.post_generation_pydantic,
path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
match=r"\n\s*(schema)(.*Field\()(.*)",
replacement=r'\n schema_\2alias="schema", \3',
match=r"\n\s*(schema:)(.*Field\()(.*)",
replacement=r'\n schema_:\2alias="schema", \3',
)
# patch_neurodata_type_def_alias = Patch(
# phase=Phases.post_generation_pydantic,
# path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
# match=r"(\n\s*neurodata_type_def.*Field\(None, )(.*)",
# replacement=r'\1alias="data_type_def", \2',
# )
#
# patch_neurodata_type_inc_alias = Patch(
# phase=Phases.post_generation_pydantic,
# path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
# match=r"(\n\s*neurodata_type_inc.*Field\(None, )(.*)",
# replacement=r'\1alias="data_type_inc", \2',
# )
patch_dtype_single_multiple = Patch(
phase=Phases.post_generation_pydantic,
path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
match=r"(\n\s*dtype: Optional\[)List\[Union\[CompoundDtype, (FlatDtype, ReferenceDtype\]\])\]",
replacement=r'\1Union[List[CompoundDtype], \2',
)
def run_patches(phase:Phases, verbose:bool=False):
patches = [p for p in Patch.instances if p.phase == phase]
for patch in patches:
print('Patching:')
pprint.pprint(patch)
if verbose:
print('Patching:')
pprint.pprint(patch)
with open(patch.path, 'r') as pfile:
string = pfile.read()
string = re.sub(patch.match, patch.replacement, string)

View file

@ -157,11 +157,11 @@ classes:
- preconditions:
slot_conditions:
dtype:
range: CompoundDtype
range: FlatDtype
postconditions:
slot_conditions:
dtype:
multivalued: true
multivalued: false
NamingMixin:
mixin: true
@ -239,10 +239,12 @@ slots:
# groups
neurodata_type_def:
description: Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins
structured_pattern:
syntax: "{protected_string}"
interpolated: true
neurodata_type_inc:
description: Used alongside neurodata_type_def to indicate inheritance, naming, and mixins
structured_pattern:
syntax: "{protected_string}"
interpolated: true

View file

@ -0,0 +1 @@
from nwb_linkml.maps import preload

View file

@ -2,31 +2,51 @@
Loading/saving NWB Schema yaml files
"""
from pathlib import Path
from typing import TypedDict, List
from typing import TypedDict, List, Dict
from pprint import pprint
import warnings
from linkml_runtime.loaders import yaml_loader
import yaml
from nwb_schema_language import Namespaces, Group, Dataset
from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO
from nwb_schema_language import Namespaces, Namespace, Group, Dataset
from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.maps import preload
from nwb_linkml.map import PHASES, Map
class NamespaceBundle(TypedDict):
"""
A complete namespaces file and all indicated schema files
"""
namespace: Namespaces
schema: Dict[str, List[Dataset | Group]]
def load_yaml(path:Path) -> dict:
with open(path, 'r') as file:
ns_dict = yaml.safe_load(file)
# apply maps
maps = [m for m in Map.instances if m.phase == PHASES.postload]
print('got maps')
print(maps)
for amap in maps:
ns_dict = amap.apply(ns_dict)
return ns_dict
def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
if isinstance(path, NamespaceRepo):
path = path.provide_from_git()
namespaces = yaml_loader.load(str(path), target_class=Namespaces)
ns_dict = load_yaml(path)
namespaces = yaml_loader.load(ns_dict, target_class=Namespaces)
return namespaces
class SchemaFile(TypedDict):
datasets: List[Dataset]
groups: List[Group]
def load_schema_file(path:Path) -> List[Dataset | Group]:
with open(path, 'r') as yfile:
source = yaml.safe_load(yfile)
source = load_yaml(path)
schema = []
@ -37,13 +57,74 @@ def load_schema_file(path:Path) -> List[Dataset | Group]:
pprint(dataset)
raise e
for group in source.get('groups', []):
try:
schema.append(Group(**group))
except Exception as e:
pprint(group)
raise e
#schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])])
#schema.extend([Group(**group) for group in source.get('groups', [])])
return schema
def load_nwb_core():
def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')) -> Dict[str, List[Dataset | Group]]:
"""
Load all schema referenced by a namespace file
Args:
namespace (:class:`.Namespace`):
path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this
Returns:
List[Union[Dataset|Group]]
"""
if isinstance(namespace, Namespace):
ns_iter = [namespace]
elif isinstance(namespace, Namespaces):
ns_iter = namespace.namespaces
else:
raise TypeError("Need to pass a namespace or namespaces :)")
path = Path(path).resolve()
if path.is_file():
# given the namespace file itself, so find paths relative to its directory
path = path.parent
sch = {}
for ns in ns_iter:
for schema in ns.schema_:
if schema.source is None:
warnings.warn(f"No source specified for {schema}")
continue
yml_file = (path / schema.source).resolve()
sch[schema.source] = load_schema_file(yml_file)
return sch
def load_nwb_core() -> Dict[str, NamespaceBundle]:
# First get hdmf-common:
hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git()
hdmf_ns = load_namespaces(hdmf_ns_file)
hdmf_schema = load_namespace_schema(hdmf_ns, hdmf_ns_file)
namespace_file = NWB_CORE_REPO.provide_from_git()
ns = load_namespaces(namespace_file)
schema = load_namespace_schema(ns, namespace_file)
return {
'hdmf-common': NamespaceBundle(
namespace=hdmf_ns,
schema=hdmf_schema
),
'nwb-core': NamespaceBundle(
namespace=ns,
schema=schema
)
}

67
nwb_linkml/map.py Normal file
View file

@ -0,0 +1,67 @@
from dataclasses import dataclass
from typing import ClassVar, List, Optional
from enum import StrEnum
import ast
import re
class MAP_TYPES(StrEnum):
key = 'key'
"""Mapping the name of one key to another key"""
class SCOPE_TYPES(StrEnum):
namespace = 'namespace'
class PHASES(StrEnum):
postload = "postload"
"""After the YAML for a model has been loaded"""
@dataclass
class Map:
scope: str
"""The namespace that the map is relevant to"""
scope_type: SCOPE_TYPES
source: str
"""The path within the schema to select the element to transform"""
target: str
"""The path where the element should end"""
transform: Optional[callable] = None
"""
Some transformation function, currently not implemented.
"""
phase: Optional[PHASES] = None
instances: ClassVar[List['Map']] = []
"""
Maps that get defined!!!
"""
def apply(self):
raise NotImplementedError('do this in a subclass')
def __post_init__(self):
self.instances.append(self)
# def replace_keys(input: dict, source: str, target: str) -> dict:
# """Recursively change keys in a dictionary"""
class KeyMap(Map):
def apply(self, input: dict) -> dict:
"""
Change all keys from source to target in a super naive way.
Convert the dictionary to a string. Do regex. parse ast
"""
input_str = str(input)
input_str = re.sub(self.source, self.target, input_str)
out = ast.literal_eval(input_str)
return out

View file

@ -0,0 +1,2 @@
# Import everything so it's defined, but shoudlnt' necessarily be used from here
from nwb_linkml.maps.preload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC

View file

@ -0,0 +1,22 @@
"""
Maps to change the loaded .yaml from nwb schema before it's
"""
from nwb_linkml.map import KeyMap, SCOPE_TYPES, PHASES
MAP_HDMF_DATATYPE_DEF = KeyMap(
source="\'data_type_def\'",
target="\'neurodata_type_def\'",
scope='hdmf-common',
scope_type=SCOPE_TYPES.namespace,
phase=PHASES.postload
)
MAP_HDMF_DATATYPE_INC = KeyMap(
source="\'data_type_inc\'",
target="\'neurodata_type_inc\'",
scope='hdmf-common',
scope_type=SCOPE_TYPES.namespace,
phase=PHASES.postload
)

69
poetry.lock generated
View file

@ -408,6 +408,41 @@ pyyaml = "*"
rdflib = ">=6.0.0"
requests = "*"
[[package]]
name = "markdown-it-py"
version = "3.0.0"
description = "Python port of markdown-it. Markdown parsing, done right!"
optional = false
python-versions = ">=3.8"
files = [
{file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
{file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
]
[package.dependencies]
mdurl = ">=0.1,<1.0"
[package.extras]
benchmarking = ["psutil", "pytest", "pytest-benchmark"]
code-style = ["pre-commit (>=3.0,<4.0)"]
compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
linkify = ["linkify-it-py (>=1,<3)"]
plugins = ["mdit-py-plugins"]
profiling = ["gprof2dot"]
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
[[package]]
name = "mdurl"
version = "0.1.2"
description = "Markdown URL utilities"
optional = false
python-versions = ">=3.7"
files = [
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
]
[[package]]
name = "nwb-schema-language"
version = "0.1.0"
@ -544,6 +579,20 @@ typing-extensions = ">=4.2.0"
dotenv = ["python-dotenv (>=0.10.4)"]
email = ["email-validator (>=1.0.3)"]
[[package]]
name = "pygments"
version = "2.16.1"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
python-versions = ">=3.7"
files = [
{file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
{file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
]
[package.extras]
plugins = ["importlib-metadata"]
[[package]]
name = "pyparsing"
version = "3.1.1"
@ -710,6 +759,24 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "rich"
version = "13.5.2"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
optional = false
python-versions = ">=3.7.0"
files = [
{file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"},
{file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"},
]
[package.dependencies]
markdown-it-py = ">=2.2.0"
pygments = ">=2.13.0,<3.0.0"
[package.extras]
jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]]
name = "rpds-py"
version = "0.9.2"
@ -968,4 +1035,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "71a9529af92fd8aeca93d42ec2e5bfcc6129787958795a367e145c9ed97002e1"
content-hash = "32b9026599bc647c7c91a582f2629cd4bf23ca6bbedecd62ac455cfde4c8ed5c"

View file

@ -16,6 +16,7 @@ pyyaml = "^6.0"
linkml-runtime = "^1.5.6"
nwb_schema_language = { path = './nwb-schema-language', develop = true }
pydantic = "<2"
rich = "^13.5.2"
[build-system]