working schema language import

This commit is contained in:
sneakers-the-rat 2023-08-18 22:09:28 -07:00
parent fe44c1c657
commit 111965aace
11 changed files with 301 additions and 27 deletions

View file

@ -108,7 +108,12 @@ gen-project: $(PYMODEL)
gen-pydantic: $(PYMODEL) gen-pydantic: $(PYMODEL)
$(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py $(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py
$(RUN) run_patches --phase post_generation $(RUN) run_patches --phase post_generation_pydantic
gen-pydantic-test: $(PYMODEL)
$(RUN) gen-pydantic src/nwb_schema_language/schema/test_multival.yml --pydantic_version 1 > $(PYMODEL)/test_multival_pydantic.py
# $(RUN) run_patches --phase post_generation
test: test-schema test-python test-examples test: test-schema test-python test-examples

View file

@ -168,7 +168,7 @@ class CompoundDtype(ConfiguredBaseModel):
class DtypeMixin(ConfiguredBaseModel): class DtypeMixin(ConfiguredBaseModel):
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list) dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
class Attribute(DtypeMixin): class Attribute(DtypeMixin):
@ -180,7 +180,7 @@ class Attribute(DtypeMixin):
default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""") default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
doc: str = Field(..., description="""Description of corresponding object.""") doc: str = Field(..., description="""Description of corresponding object.""")
required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""") required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""")
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list) dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)
class NamingMixin(ConfiguredBaseModel): class NamingMixin(ConfiguredBaseModel):
@ -192,8 +192,8 @@ class NamingMixin(ConfiguredBaseModel):
class Group(NamingMixin): class Group(NamingMixin):
neurodata_type_def: Optional[str] = Field(None) neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""")
neurodata_type_inc: Optional[str] = Field(None) neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""")
name: Optional[str] = Field(None) name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None) default_name: Optional[str] = Field(None)
doc: str = Field(..., description="""Description of corresponding object.""") doc: str = Field(..., description="""Description of corresponding object.""")
@ -207,8 +207,8 @@ class Group(NamingMixin):
class Dataset(NamingMixin, DtypeMixin): class Dataset(NamingMixin, DtypeMixin):
neurodata_type_def: Optional[str] = Field(None) neurodata_type_def: Optional[str] = Field(None, description="""Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins""")
neurodata_type_inc: Optional[str] = Field(None) neurodata_type_inc: Optional[str] = Field(None, description="""Used alongside neurodata_type_def to indicate inheritance, naming, and mixins""")
name: Optional[str] = Field(None) name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None) default_name: Optional[str] = Field(None)
dims: Optional[List[Union[Any, str]]] = Field(default_factory=list) dims: Optional[List[Union[Any, str]]] = Field(default_factory=list)
@ -219,7 +219,7 @@ class Dataset(NamingMixin, DtypeMixin):
quantity: Optional[Union[QuantityEnum, int]] = Field(1) quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None) linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(default_factory=list) attributes: Optional[List[Attribute]] = Field(default_factory=list)
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list) dtype: Optional[Union[List[CompoundDtype], FlatDtype, ReferenceDtype]] = Field(default_factory=list)

View file

@ -11,7 +11,9 @@ import argparse
import pprint import pprint
class Phases(StrEnum): class Phases(StrEnum):
post_generation = "post_generation" post_generation_pydantic = "post_generation_pydantic"
post_load_yaml = "post_load_yaml"
"""After the yaml of the nwb schema classes is loaded"""
@dataclass @dataclass
class Patch: class Patch:
@ -34,18 +36,42 @@ class Patch:
### Patches ### Patches
## Patches for the generated pydantic classes
patch_schema_slot = Patch( patch_schema_slot = Patch(
phase=Phases.post_generation, phase=Phases.post_generation_pydantic,
path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'), path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
match=r"\n\s*(schema)(.*Field\()(.*)", match=r"\n\s*(schema:)(.*Field\()(.*)",
replacement=r'\n schema_\2alias="schema", \3', replacement=r'\n schema_:\2alias="schema", \3',
)
# patch_neurodata_type_def_alias = Patch(
# phase=Phases.post_generation_pydantic,
# path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
# match=r"(\n\s*neurodata_type_def.*Field\(None, )(.*)",
# replacement=r'\1alias="data_type_def", \2',
# )
#
# patch_neurodata_type_inc_alias = Patch(
# phase=Phases.post_generation_pydantic,
# path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
# match=r"(\n\s*neurodata_type_inc.*Field\(None, )(.*)",
# replacement=r'\1alias="data_type_inc", \2',
# )
patch_dtype_single_multiple = Patch(
phase=Phases.post_generation_pydantic,
path=Path('src/nwb_schema_language/datamodel/nwb_schema_pydantic.py'),
match=r"(\n\s*dtype: Optional\[)List\[Union\[CompoundDtype, (FlatDtype, ReferenceDtype\]\])\]",
replacement=r'\1Union[List[CompoundDtype], \2',
) )
def run_patches(phase:Phases, verbose:bool=False): def run_patches(phase:Phases, verbose:bool=False):
patches = [p for p in Patch.instances if p.phase == phase] patches = [p for p in Patch.instances if p.phase == phase]
for patch in patches: for patch in patches:
print('Patching:') if verbose:
pprint.pprint(patch) print('Patching:')
pprint.pprint(patch)
with open(patch.path, 'r') as pfile: with open(patch.path, 'r') as pfile:
string = pfile.read() string = pfile.read()
string = re.sub(patch.match, patch.replacement, string) string = re.sub(patch.match, patch.replacement, string)

View file

@ -157,11 +157,11 @@ classes:
- preconditions: - preconditions:
slot_conditions: slot_conditions:
dtype: dtype:
range: CompoundDtype range: FlatDtype
postconditions: postconditions:
slot_conditions: slot_conditions:
dtype: dtype:
multivalued: true multivalued: false
NamingMixin: NamingMixin:
mixin: true mixin: true
@ -239,10 +239,12 @@ slots:
# groups # groups
neurodata_type_def: neurodata_type_def:
description: Used alongside neurodata_type_inc to indicate inheritance, naming, and mixins
structured_pattern: structured_pattern:
syntax: "{protected_string}" syntax: "{protected_string}"
interpolated: true interpolated: true
neurodata_type_inc: neurodata_type_inc:
description: Used alongside neurodata_type_def to indicate inheritance, naming, and mixins
structured_pattern: structured_pattern:
syntax: "{protected_string}" syntax: "{protected_string}"
interpolated: true interpolated: true

View file

@ -0,0 +1 @@
from nwb_linkml.maps import preload

View file

@ -2,31 +2,51 @@
Loading/saving NWB Schema yaml files Loading/saving NWB Schema yaml files
""" """
from pathlib import Path from pathlib import Path
from typing import TypedDict, List from typing import TypedDict, List, Dict
from pprint import pprint from pprint import pprint
import warnings
from linkml_runtime.loaders import yaml_loader from linkml_runtime.loaders import yaml_loader
import yaml import yaml
from nwb_schema_language import Namespaces, Group, Dataset from nwb_schema_language import Namespaces, Namespace, Group, Dataset
from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.maps import preload
from nwb_linkml.map import PHASES, Map
class NamespaceBundle(TypedDict):
"""
A complete namespaces file and all indicated schema files
"""
namespace: Namespaces
schema: Dict[str, List[Dataset | Group]]
def load_yaml(path:Path) -> dict:
with open(path, 'r') as file:
ns_dict = yaml.safe_load(file)
# apply maps
maps = [m for m in Map.instances if m.phase == PHASES.postload]
print('got maps')
print(maps)
for amap in maps:
ns_dict = amap.apply(ns_dict)
return ns_dict
def load_namespaces(path:Path|NamespaceRepo) -> Namespaces: def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
if isinstance(path, NamespaceRepo): if isinstance(path, NamespaceRepo):
path = path.provide_from_git() path = path.provide_from_git()
namespaces = yaml_loader.load(str(path), target_class=Namespaces) ns_dict = load_yaml(path)
namespaces = yaml_loader.load(ns_dict, target_class=Namespaces)
return namespaces return namespaces
class SchemaFile(TypedDict):
datasets: List[Dataset]
groups: List[Group]
def load_schema_file(path:Path) -> List[Dataset | Group]: def load_schema_file(path:Path) -> List[Dataset | Group]:
with open(path, 'r') as yfile: source = load_yaml(path)
source = yaml.safe_load(yfile)
schema = [] schema = []
@ -37,13 +57,74 @@ def load_schema_file(path:Path) -> List[Dataset | Group]:
pprint(dataset) pprint(dataset)
raise e raise e
for group in source.get('groups', []):
try:
schema.append(Group(**group))
except Exception as e:
pprint(group)
raise e
#schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])]) #schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])])
#schema.extend([Group(**group) for group in source.get('groups', [])]) #schema.extend([Group(**group) for group in source.get('groups', [])])
return schema return schema
def load_nwb_core(): def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')) -> Dict[str, List[Dataset | Group]]:
"""
Load all schema referenced by a namespace file
Args:
namespace (:class:`.Namespace`):
path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this
Returns:
List[Union[Dataset|Group]]
"""
if isinstance(namespace, Namespace):
ns_iter = [namespace]
elif isinstance(namespace, Namespaces):
ns_iter = namespace.namespaces
else:
raise TypeError("Need to pass a namespace or namespaces :)")
path = Path(path).resolve()
if path.is_file():
# given the namespace file itself, so find paths relative to its directory
path = path.parent
sch = {}
for ns in ns_iter:
for schema in ns.schema_:
if schema.source is None:
warnings.warn(f"No source specified for {schema}")
continue
yml_file = (path / schema.source).resolve()
sch[schema.source] = load_schema_file(yml_file)
return sch
def load_nwb_core() -> Dict[str, NamespaceBundle]:
# First get hdmf-common:
hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git()
hdmf_ns = load_namespaces(hdmf_ns_file)
hdmf_schema = load_namespace_schema(hdmf_ns, hdmf_ns_file)
namespace_file = NWB_CORE_REPO.provide_from_git() namespace_file = NWB_CORE_REPO.provide_from_git()
ns = load_namespaces(namespace_file) ns = load_namespaces(namespace_file)
schema = load_namespace_schema(ns, namespace_file)
return {
'hdmf-common': NamespaceBundle(
namespace=hdmf_ns,
schema=hdmf_schema
),
'nwb-core': NamespaceBundle(
namespace=ns,
schema=schema
)
}

67
nwb_linkml/map.py Normal file
View file

@ -0,0 +1,67 @@
from dataclasses import dataclass
from typing import ClassVar, List, Optional
from enum import StrEnum
import ast
import re
class MAP_TYPES(StrEnum):
key = 'key'
"""Mapping the name of one key to another key"""
class SCOPE_TYPES(StrEnum):
namespace = 'namespace'
class PHASES(StrEnum):
postload = "postload"
"""After the YAML for a model has been loaded"""
@dataclass
class Map:
scope: str
"""The namespace that the map is relevant to"""
scope_type: SCOPE_TYPES
source: str
"""The path within the schema to select the element to transform"""
target: str
"""The path where the element should end"""
transform: Optional[callable] = None
"""
Some transformation function, currently not implemented.
"""
phase: Optional[PHASES] = None
instances: ClassVar[List['Map']] = []
"""
Maps that get defined!!!
"""
def apply(self):
raise NotImplementedError('do this in a subclass')
def __post_init__(self):
self.instances.append(self)
# def replace_keys(input: dict, source: str, target: str) -> dict:
# """Recursively change keys in a dictionary"""
class KeyMap(Map):
def apply(self, input: dict) -> dict:
"""
Change all keys from source to target in a super naive way.
Convert the dictionary to a string. Do regex. parse ast
"""
input_str = str(input)
input_str = re.sub(self.source, self.target, input_str)
out = ast.literal_eval(input_str)
return out

View file

@ -0,0 +1,2 @@
# Import everything so it's defined, but shoudlnt' necessarily be used from here
from nwb_linkml.maps.preload import MAP_HDMF_DATATYPE_DEF, MAP_HDMF_DATATYPE_INC

View file

@ -0,0 +1,22 @@
"""
Maps to change the loaded .yaml from nwb schema before it's
"""
from nwb_linkml.map import KeyMap, SCOPE_TYPES, PHASES
MAP_HDMF_DATATYPE_DEF = KeyMap(
source="\'data_type_def\'",
target="\'neurodata_type_def\'",
scope='hdmf-common',
scope_type=SCOPE_TYPES.namespace,
phase=PHASES.postload
)
MAP_HDMF_DATATYPE_INC = KeyMap(
source="\'data_type_inc\'",
target="\'neurodata_type_inc\'",
scope='hdmf-common',
scope_type=SCOPE_TYPES.namespace,
phase=PHASES.postload
)

69
poetry.lock generated
View file

@ -408,6 +408,41 @@ pyyaml = "*"
rdflib = ">=6.0.0" rdflib = ">=6.0.0"
requests = "*" requests = "*"
[[package]]
name = "markdown-it-py"
version = "3.0.0"
description = "Python port of markdown-it. Markdown parsing, done right!"
optional = false
python-versions = ">=3.8"
files = [
{file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
{file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
]
[package.dependencies]
mdurl = ">=0.1,<1.0"
[package.extras]
benchmarking = ["psutil", "pytest", "pytest-benchmark"]
code-style = ["pre-commit (>=3.0,<4.0)"]
compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
linkify = ["linkify-it-py (>=1,<3)"]
plugins = ["mdit-py-plugins"]
profiling = ["gprof2dot"]
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
[[package]]
name = "mdurl"
version = "0.1.2"
description = "Markdown URL utilities"
optional = false
python-versions = ">=3.7"
files = [
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
]
[[package]] [[package]]
name = "nwb-schema-language" name = "nwb-schema-language"
version = "0.1.0" version = "0.1.0"
@ -544,6 +579,20 @@ typing-extensions = ">=4.2.0"
dotenv = ["python-dotenv (>=0.10.4)"] dotenv = ["python-dotenv (>=0.10.4)"]
email = ["email-validator (>=1.0.3)"] email = ["email-validator (>=1.0.3)"]
[[package]]
name = "pygments"
version = "2.16.1"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
python-versions = ">=3.7"
files = [
{file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
{file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
]
[package.extras]
plugins = ["importlib-metadata"]
[[package]] [[package]]
name = "pyparsing" name = "pyparsing"
version = "3.1.1" version = "3.1.1"
@ -710,6 +759,24 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"] socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "rich"
version = "13.5.2"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
optional = false
python-versions = ">=3.7.0"
files = [
{file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"},
{file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"},
]
[package.dependencies]
markdown-it-py = ">=2.2.0"
pygments = ">=2.13.0,<3.0.0"
[package.extras]
jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]] [[package]]
name = "rpds-py" name = "rpds-py"
version = "0.9.2" version = "0.9.2"
@ -968,4 +1035,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "71a9529af92fd8aeca93d42ec2e5bfcc6129787958795a367e145c9ed97002e1" content-hash = "32b9026599bc647c7c91a582f2629cd4bf23ca6bbedecd62ac455cfde4c8ed5c"

View file

@ -16,6 +16,7 @@ pyyaml = "^6.0"
linkml-runtime = "^1.5.6" linkml-runtime = "^1.5.6"
nwb_schema_language = { path = './nwb-schema-language', develop = true } nwb_schema_language = { path = './nwb-schema-language', develop = true }
pydantic = "<2" pydantic = "<2"
rich = "^13.5.2"
[build-system] [build-system]