mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 06:04:28 +00:00
yaml peek to quickly find the value of a key
This commit is contained in:
parent
27d18b69d8
commit
2e7670a2bd
3 changed files with 131 additions and 21 deletions
63
nwb_linkml/src/nwb_linkml/io/yaml.py
Normal file
63
nwb_linkml/src/nwb_linkml/io/yaml.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
"""
|
||||||
|
Utility functions for dealing with yaml files.
|
||||||
|
|
||||||
|
No we are not going to implement a yaml parser here
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal, List, Union, overload
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:Literal[True]=True) -> str: ...
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:Literal[False]=False) -> List[str]: ...
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:bool=True) -> Union[str, List[str]]: ...
|
||||||
|
|
||||||
|
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:bool=True) -> Union[str, List[str]]:
|
||||||
|
"""
|
||||||
|
Peek into a yaml file without parsing the whole file to retrieve the value of a single key.
|
||||||
|
|
||||||
|
This function is _not_ designed for robustness to the yaml spec, it is for simple key: value
|
||||||
|
pairs, not fancy shit like multiline strings, tagged values, etc. If you want it to be,
|
||||||
|
then i'm afraid you'll have to make a PR about it.
|
||||||
|
|
||||||
|
Returns a string no matter what the yaml type is so ya have to do your own casting if you want
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key (str): The key to peek for
|
||||||
|
path (:class:`pathlib.Path` , str): The yaml file to peek into
|
||||||
|
root (bool): Only find keys at the root of the document (default ``True`` ), otherwise
|
||||||
|
find keys at any level of nesting.
|
||||||
|
first (bool): Only return the first appearance of the key (default). Otherwise return a
|
||||||
|
list of values (not implemented lol)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str
|
||||||
|
"""
|
||||||
|
if root:
|
||||||
|
pattern = re.compile(rf'^(?P<key>{key}):\s*(?P<value>\S.*)')
|
||||||
|
else:
|
||||||
|
pattern = re.compile(rf'^\s*(?P<key>{key}):\s*(?P<value>\S.*)')
|
||||||
|
|
||||||
|
res = None
|
||||||
|
if first:
|
||||||
|
with open(path, 'r') as yfile:
|
||||||
|
for l in yfile:
|
||||||
|
res = pattern.match(l)
|
||||||
|
if res:
|
||||||
|
break
|
||||||
|
if res:
|
||||||
|
return res.groupdict()['value']
|
||||||
|
else:
|
||||||
|
with open(path, 'r') as yfile:
|
||||||
|
text = yfile.read()
|
||||||
|
res = [match.groupdict()['value'] for match in pattern.finditer(text)]
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
|
||||||
|
raise KeyError(f'Key {key} not found in {path}')
|
||||||
|
|
|
@ -13,6 +13,7 @@ from typing import List, Optional, Type
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from nwb_linkml import io
|
from nwb_linkml import io
|
||||||
|
from nwb_linkml.io.yaml import yaml_peek
|
||||||
from nwb_linkml.generators.pydantic import NWBPydanticGenerator
|
from nwb_linkml.generators.pydantic import NWBPydanticGenerator
|
||||||
from nwb_linkml.maps.naming import module_case, version_module_case
|
from nwb_linkml.maps.naming import module_case, version_module_case
|
||||||
from nwb_linkml.providers import LinkMLProvider, Provider
|
from nwb_linkml.providers import LinkMLProvider, Provider
|
||||||
|
@ -36,9 +37,6 @@ class PydanticProvider(Provider):
|
||||||
|
|
||||||
def __init__(self, path: Optional[Path] = None, verbose: bool = True):
|
def __init__(self, path: Optional[Path] = None, verbose: bool = True):
|
||||||
super().__init__(path, verbose)
|
super().__init__(path, verbose)
|
||||||
# create a metapathfinder to find module we might create
|
|
||||||
pathfinder = EctopicModelFinder(self.path)
|
|
||||||
sys.meta_path.append(pathfinder)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def path(self) -> Path:
|
def path(self) -> Path:
|
||||||
|
@ -50,7 +48,6 @@ class PydanticProvider(Provider):
|
||||||
namespace: str | Path,
|
namespace: str | Path,
|
||||||
out_file: Optional[Path] = None,
|
out_file: Optional[Path] = None,
|
||||||
version: Optional[str] = None,
|
version: Optional[str] = None,
|
||||||
versions: Optional[dict] = None,
|
|
||||||
split: bool = True,
|
split: bool = True,
|
||||||
dump: bool = True,
|
dump: bool = True,
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
|
@ -75,13 +72,6 @@ class PydanticProvider(Provider):
|
||||||
version (Optional[str]): The version of the schema to build, if present.
|
version (Optional[str]): The version of the schema to build, if present.
|
||||||
Works similarly to ``version`` in :class:`.LinkMLProvider`.
|
Works similarly to ``version`` in :class:`.LinkMLProvider`.
|
||||||
Ignored if ``namespace`` is a Path.
|
Ignored if ``namespace`` is a Path.
|
||||||
versions (Optional[dict]): An explicit mapping of namespaces and versions to use when
|
|
||||||
building the combined pydantic `namespace.py` file.
|
|
||||||
Since NWB doesn't have an explicit version dependency system between schema,
|
|
||||||
there is intrinsic ambiguity between which version
|
|
||||||
of which schema should be used when imported from another.
|
|
||||||
This mapping allows those ambiguities to be resolved.
|
|
||||||
See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information.
|
|
||||||
split (bool): If ``False`` (default), generate a single ``namespace.py`` file,
|
split (bool): If ``False`` (default), generate a single ``namespace.py`` file,
|
||||||
otherwise generate a python file for each schema in the namespace
|
otherwise generate a python file for each schema in the namespace
|
||||||
in addition to a ``namespace.py`` that imports from them
|
in addition to a ``namespace.py`` that imports from them
|
||||||
|
@ -107,19 +97,15 @@ class PydanticProvider(Provider):
|
||||||
if version is None:
|
if version is None:
|
||||||
# Get the most recently built version
|
# Get the most recently built version
|
||||||
version = LinkMLProvider(path=self.config.cache_dir).available_versions[name][-1]
|
version = LinkMLProvider(path=self.config.cache_dir).available_versions[name][-1]
|
||||||
fn = path.parts[-1]
|
fn = path.name
|
||||||
else:
|
else:
|
||||||
# given a path to a namespace linkml yaml file
|
# given a path to a namespace linkml yaml file
|
||||||
path = Path(namespace)
|
path = Path(namespace)
|
||||||
# FIXME: this is extremely fragile, but get the details from the path.
|
name = yaml_peek('name', path)
|
||||||
# this is faster than reading yaml for now
|
version = yaml_peek('version', path)
|
||||||
name = path.parts[-3]
|
fn = path.name
|
||||||
version = path.parts[-2]
|
|
||||||
fn = path.parts[-1]
|
|
||||||
|
|
||||||
version = version_module_case(version)
|
version = version_module_case(version)
|
||||||
# this is extremely fragile, we should not be inferring version number from paths...
|
|
||||||
# TODO: we need an efficient peek for specific keys within a yaml file
|
|
||||||
if out_file is None:
|
if out_file is None:
|
||||||
fn = fn.removesuffix(".yaml")
|
fn = fn.removesuffix(".yaml")
|
||||||
fn = module_case(fn) + ".py"
|
fn = module_case(fn) + ".py"
|
||||||
|
@ -137,10 +123,14 @@ class PydanticProvider(Provider):
|
||||||
if versions is None:
|
if versions is None:
|
||||||
versions = self._get_dependent_versions(path)
|
versions = self._get_dependent_versions(path)
|
||||||
|
|
||||||
|
|
||||||
if split:
|
if split:
|
||||||
return self._build_split(path, versions, default_kwargs, dump, out_file, force)
|
result = self._build_split(path, versions, default_kwargs, dump, out_file, force)
|
||||||
else:
|
else:
|
||||||
return self._build_unsplit(path, versions, default_kwargs, dump, out_file, force)
|
result = self._build_unsplit(path, versions, default_kwargs, dump, out_file, force)
|
||||||
|
|
||||||
|
self.install_pathfinder()
|
||||||
|
return result
|
||||||
|
|
||||||
def _build_unsplit(
|
def _build_unsplit(
|
||||||
self,
|
self,
|
||||||
|
@ -406,6 +396,19 @@ class PydanticProvider(Provider):
|
||||||
mod = self.get(namespace, version)
|
mod = self.get(namespace, version)
|
||||||
return getattr(mod, class_)
|
return getattr(mod, class_)
|
||||||
|
|
||||||
|
def install_pathfinder(self):
|
||||||
|
"""
|
||||||
|
Add a :class:`.EctopicModelFinder` instance that allows us to import from
|
||||||
|
the directory that we are generating models into
|
||||||
|
"""
|
||||||
|
# check if one already exists
|
||||||
|
matches = [finder for finder in sys.meta_path if isinstance(finder, EctopicModelFinder) and finder.path == self.path]
|
||||||
|
if len(matches) > 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
pathfinder = EctopicModelFinder(self.path)
|
||||||
|
sys.meta_path.append(pathfinder)
|
||||||
|
|
||||||
|
|
||||||
class EctopicModelFinder(MetaPathFinder):
|
class EctopicModelFinder(MetaPathFinder):
|
||||||
"""
|
"""
|
||||||
|
|
44
nwb_linkml/tests/test_io/test_io_yaml.py
Normal file
44
nwb_linkml/tests/test_io/test_io_yaml.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from nwb_linkml.io.yaml import yaml_peek
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def yaml_file(tmp_path):
|
||||||
|
data = {
|
||||||
|
'key1': 'val1',
|
||||||
|
'key2': 'val2',
|
||||||
|
'key3': {
|
||||||
|
'key1': 'val3',
|
||||||
|
'key4': 'val4'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out_file = tmp_path / 'test.yaml'
|
||||||
|
with open(out_file, 'w') as yfile:
|
||||||
|
yaml.dump(data, yfile)
|
||||||
|
|
||||||
|
yield out_file
|
||||||
|
|
||||||
|
out_file.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'key,expected,root,first',
|
||||||
|
[
|
||||||
|
('key1', 'val1', True, True),
|
||||||
|
('key1', 'val1', False, True),
|
||||||
|
('key1', ['val1'], True, False),
|
||||||
|
('key1', ['val1', 'val3'], False, False),
|
||||||
|
('key2', 'val2', True, True),
|
||||||
|
('key3', False, True, True),
|
||||||
|
('key4', False, True, True),
|
||||||
|
('key4', 'val4', False, True)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_peek_yaml(key, expected, root, first, yaml_file):
|
||||||
|
if not expected:
|
||||||
|
with pytest.raises(KeyError):
|
||||||
|
_ = yaml_peek(key, yaml_file, root=root, first=first)
|
||||||
|
else:
|
||||||
|
assert yaml_peek(key, yaml_file, root=root, first=first)
|
Loading…
Reference in a new issue