From 2e7670a2bdc86a29ceaa4e029a3dc6b69d3e5236 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Fri, 19 Jul 2024 20:50:18 -0700 Subject: [PATCH] yaml peek to quickly find the value of a key --- nwb_linkml/src/nwb_linkml/io/yaml.py | 63 +++++++++++++++++++ .../src/nwb_linkml/providers/pydantic.py | 45 ++++++------- nwb_linkml/tests/test_io/test_io_yaml.py | 44 +++++++++++++ 3 files changed, 131 insertions(+), 21 deletions(-) create mode 100644 nwb_linkml/src/nwb_linkml/io/yaml.py create mode 100644 nwb_linkml/tests/test_io/test_io_yaml.py diff --git a/nwb_linkml/src/nwb_linkml/io/yaml.py b/nwb_linkml/src/nwb_linkml/io/yaml.py new file mode 100644 index 0000000..95e79fa --- /dev/null +++ b/nwb_linkml/src/nwb_linkml/io/yaml.py @@ -0,0 +1,63 @@ +""" +Utility functions for dealing with yaml files. + +No we are not going to implement a yaml parser here +""" +import re +from pathlib import Path +from typing import Literal, List, Union, overload + + +@overload +def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:Literal[True]=True) -> str: ... + +@overload +def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:Literal[False]=False) -> List[str]: ... + +@overload +def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:bool=True) -> Union[str, List[str]]: ... + +def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:bool=True) -> Union[str, List[str]]: + """ + Peek into a yaml file without parsing the whole file to retrieve the value of a single key. + + This function is _not_ designed for robustness to the yaml spec, it is for simple key: value + pairs, not fancy shit like multiline strings, tagged values, etc. If you want it to be, + then i'm afraid you'll have to make a PR about it. + + Returns a string no matter what the yaml type is so ya have to do your own casting if you want + + Args: + key (str): The key to peek for + path (:class:`pathlib.Path` , str): The yaml file to peek into + root (bool): Only find keys at the root of the document (default ``True`` ), otherwise + find keys at any level of nesting. + first (bool): Only return the first appearance of the key (default). Otherwise return a + list of values (not implemented lol) + + Returns: + str + """ + if root: + pattern = re.compile(rf'^(?P{key}):\s*(?P\S.*)') + else: + pattern = re.compile(rf'^\s*(?P{key}):\s*(?P\S.*)') + + res = None + if first: + with open(path, 'r') as yfile: + for l in yfile: + res = pattern.match(l) + if res: + break + if res: + return res.groupdict()['value'] + else: + with open(path, 'r') as yfile: + text = yfile.read() + res = [match.groupdict()['value'] for match in pattern.finditer(text)] + if res: + return res + + raise KeyError(f'Key {key} not found in {path}') + diff --git a/nwb_linkml/src/nwb_linkml/providers/pydantic.py b/nwb_linkml/src/nwb_linkml/providers/pydantic.py index ae76f2b..a9307fa 100644 --- a/nwb_linkml/src/nwb_linkml/providers/pydantic.py +++ b/nwb_linkml/src/nwb_linkml/providers/pydantic.py @@ -13,6 +13,7 @@ from typing import List, Optional, Type from pydantic import BaseModel from nwb_linkml import io +from nwb_linkml.io.yaml import yaml_peek from nwb_linkml.generators.pydantic import NWBPydanticGenerator from nwb_linkml.maps.naming import module_case, version_module_case from nwb_linkml.providers import LinkMLProvider, Provider @@ -36,9 +37,6 @@ class PydanticProvider(Provider): def __init__(self, path: Optional[Path] = None, verbose: bool = True): super().__init__(path, verbose) - # create a metapathfinder to find module we might create - pathfinder = EctopicModelFinder(self.path) - sys.meta_path.append(pathfinder) @property def path(self) -> Path: @@ -50,7 +48,6 @@ class PydanticProvider(Provider): namespace: str | Path, out_file: Optional[Path] = None, version: Optional[str] = None, - versions: Optional[dict] = None, split: bool = True, dump: bool = True, force: bool = False, @@ -75,13 +72,6 @@ class PydanticProvider(Provider): version (Optional[str]): The version of the schema to build, if present. Works similarly to ``version`` in :class:`.LinkMLProvider`. Ignored if ``namespace`` is a Path. - versions (Optional[dict]): An explicit mapping of namespaces and versions to use when - building the combined pydantic `namespace.py` file. - Since NWB doesn't have an explicit version dependency system between schema, - there is intrinsic ambiguity between which version - of which schema should be used when imported from another. - This mapping allows those ambiguities to be resolved. - See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information. split (bool): If ``False`` (default), generate a single ``namespace.py`` file, otherwise generate a python file for each schema in the namespace in addition to a ``namespace.py`` that imports from them @@ -107,19 +97,15 @@ class PydanticProvider(Provider): if version is None: # Get the most recently built version version = LinkMLProvider(path=self.config.cache_dir).available_versions[name][-1] - fn = path.parts[-1] + fn = path.name else: # given a path to a namespace linkml yaml file path = Path(namespace) - # FIXME: this is extremely fragile, but get the details from the path. - # this is faster than reading yaml for now - name = path.parts[-3] - version = path.parts[-2] - fn = path.parts[-1] + name = yaml_peek('name', path) + version = yaml_peek('version', path) + fn = path.name version = version_module_case(version) - # this is extremely fragile, we should not be inferring version number from paths... - # TODO: we need an efficient peek for specific keys within a yaml file if out_file is None: fn = fn.removesuffix(".yaml") fn = module_case(fn) + ".py" @@ -137,10 +123,14 @@ class PydanticProvider(Provider): if versions is None: versions = self._get_dependent_versions(path) + if split: - return self._build_split(path, versions, default_kwargs, dump, out_file, force) + result = self._build_split(path, versions, default_kwargs, dump, out_file, force) else: - return self._build_unsplit(path, versions, default_kwargs, dump, out_file, force) + result = self._build_unsplit(path, versions, default_kwargs, dump, out_file, force) + + self.install_pathfinder() + return result def _build_unsplit( self, @@ -406,6 +396,19 @@ class PydanticProvider(Provider): mod = self.get(namespace, version) return getattr(mod, class_) + def install_pathfinder(self): + """ + Add a :class:`.EctopicModelFinder` instance that allows us to import from + the directory that we are generating models into + """ + # check if one already exists + matches = [finder for finder in sys.meta_path if isinstance(finder, EctopicModelFinder) and finder.path == self.path] + if len(matches) > 0: + return + + pathfinder = EctopicModelFinder(self.path) + sys.meta_path.append(pathfinder) + class EctopicModelFinder(MetaPathFinder): """ diff --git a/nwb_linkml/tests/test_io/test_io_yaml.py b/nwb_linkml/tests/test_io/test_io_yaml.py new file mode 100644 index 0000000..512aa77 --- /dev/null +++ b/nwb_linkml/tests/test_io/test_io_yaml.py @@ -0,0 +1,44 @@ +import pytest +import yaml + +from nwb_linkml.io.yaml import yaml_peek + +@pytest.fixture() +def yaml_file(tmp_path): + data = { + 'key1': 'val1', + 'key2': 'val2', + 'key3': { + 'key1': 'val3', + 'key4': 'val4' + } + } + out_file = tmp_path / 'test.yaml' + with open(out_file, 'w') as yfile: + yaml.dump(data, yfile) + + yield out_file + + out_file.unlink() + + + +@pytest.mark.parametrize( + 'key,expected,root,first', + [ + ('key1', 'val1', True, True), + ('key1', 'val1', False, True), + ('key1', ['val1'], True, False), + ('key1', ['val1', 'val3'], False, False), + ('key2', 'val2', True, True), + ('key3', False, True, True), + ('key4', False, True, True), + ('key4', 'val4', False, True) + ] +) +def test_peek_yaml(key, expected, root, first, yaml_file): + if not expected: + with pytest.raises(KeyError): + _ = yaml_peek(key, yaml_file, root=root, first=first) + else: + assert yaml_peek(key, yaml_file, root=root, first=first)