mirror of
https://github.com/p2p-ld/nwb-linkml.git
synced 2025-01-10 06:04:28 +00:00
yaml peek to quickly find the value of a key
This commit is contained in:
parent
27d18b69d8
commit
2e7670a2bd
3 changed files with 131 additions and 21 deletions
63
nwb_linkml/src/nwb_linkml/io/yaml.py
Normal file
63
nwb_linkml/src/nwb_linkml/io/yaml.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
"""
|
||||
Utility functions for dealing with yaml files.
|
||||
|
||||
No we are not going to implement a yaml parser here
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Literal, List, Union, overload
|
||||
|
||||
|
||||
@overload
|
||||
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:Literal[True]=True) -> str: ...
|
||||
|
||||
@overload
|
||||
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:Literal[False]=False) -> List[str]: ...
|
||||
|
||||
@overload
|
||||
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:bool=True) -> Union[str, List[str]]: ...
|
||||
|
||||
def yaml_peek(key: str, path: Union[str, Path], root:bool = True, first:bool=True) -> Union[str, List[str]]:
|
||||
"""
|
||||
Peek into a yaml file without parsing the whole file to retrieve the value of a single key.
|
||||
|
||||
This function is _not_ designed for robustness to the yaml spec, it is for simple key: value
|
||||
pairs, not fancy shit like multiline strings, tagged values, etc. If you want it to be,
|
||||
then i'm afraid you'll have to make a PR about it.
|
||||
|
||||
Returns a string no matter what the yaml type is so ya have to do your own casting if you want
|
||||
|
||||
Args:
|
||||
key (str): The key to peek for
|
||||
path (:class:`pathlib.Path` , str): The yaml file to peek into
|
||||
root (bool): Only find keys at the root of the document (default ``True`` ), otherwise
|
||||
find keys at any level of nesting.
|
||||
first (bool): Only return the first appearance of the key (default). Otherwise return a
|
||||
list of values (not implemented lol)
|
||||
|
||||
Returns:
|
||||
str
|
||||
"""
|
||||
if root:
|
||||
pattern = re.compile(rf'^(?P<key>{key}):\s*(?P<value>\S.*)')
|
||||
else:
|
||||
pattern = re.compile(rf'^\s*(?P<key>{key}):\s*(?P<value>\S.*)')
|
||||
|
||||
res = None
|
||||
if first:
|
||||
with open(path, 'r') as yfile:
|
||||
for l in yfile:
|
||||
res = pattern.match(l)
|
||||
if res:
|
||||
break
|
||||
if res:
|
||||
return res.groupdict()['value']
|
||||
else:
|
||||
with open(path, 'r') as yfile:
|
||||
text = yfile.read()
|
||||
res = [match.groupdict()['value'] for match in pattern.finditer(text)]
|
||||
if res:
|
||||
return res
|
||||
|
||||
raise KeyError(f'Key {key} not found in {path}')
|
||||
|
|
@ -13,6 +13,7 @@ from typing import List, Optional, Type
|
|||
from pydantic import BaseModel
|
||||
|
||||
from nwb_linkml import io
|
||||
from nwb_linkml.io.yaml import yaml_peek
|
||||
from nwb_linkml.generators.pydantic import NWBPydanticGenerator
|
||||
from nwb_linkml.maps.naming import module_case, version_module_case
|
||||
from nwb_linkml.providers import LinkMLProvider, Provider
|
||||
|
@ -36,9 +37,6 @@ class PydanticProvider(Provider):
|
|||
|
||||
def __init__(self, path: Optional[Path] = None, verbose: bool = True):
|
||||
super().__init__(path, verbose)
|
||||
# create a metapathfinder to find module we might create
|
||||
pathfinder = EctopicModelFinder(self.path)
|
||||
sys.meta_path.append(pathfinder)
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
|
@ -50,7 +48,6 @@ class PydanticProvider(Provider):
|
|||
namespace: str | Path,
|
||||
out_file: Optional[Path] = None,
|
||||
version: Optional[str] = None,
|
||||
versions: Optional[dict] = None,
|
||||
split: bool = True,
|
||||
dump: bool = True,
|
||||
force: bool = False,
|
||||
|
@ -75,13 +72,6 @@ class PydanticProvider(Provider):
|
|||
version (Optional[str]): The version of the schema to build, if present.
|
||||
Works similarly to ``version`` in :class:`.LinkMLProvider`.
|
||||
Ignored if ``namespace`` is a Path.
|
||||
versions (Optional[dict]): An explicit mapping of namespaces and versions to use when
|
||||
building the combined pydantic `namespace.py` file.
|
||||
Since NWB doesn't have an explicit version dependency system between schema,
|
||||
there is intrinsic ambiguity between which version
|
||||
of which schema should be used when imported from another.
|
||||
This mapping allows those ambiguities to be resolved.
|
||||
See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information.
|
||||
split (bool): If ``False`` (default), generate a single ``namespace.py`` file,
|
||||
otherwise generate a python file for each schema in the namespace
|
||||
in addition to a ``namespace.py`` that imports from them
|
||||
|
@ -107,19 +97,15 @@ class PydanticProvider(Provider):
|
|||
if version is None:
|
||||
# Get the most recently built version
|
||||
version = LinkMLProvider(path=self.config.cache_dir).available_versions[name][-1]
|
||||
fn = path.parts[-1]
|
||||
fn = path.name
|
||||
else:
|
||||
# given a path to a namespace linkml yaml file
|
||||
path = Path(namespace)
|
||||
# FIXME: this is extremely fragile, but get the details from the path.
|
||||
# this is faster than reading yaml for now
|
||||
name = path.parts[-3]
|
||||
version = path.parts[-2]
|
||||
fn = path.parts[-1]
|
||||
name = yaml_peek('name', path)
|
||||
version = yaml_peek('version', path)
|
||||
fn = path.name
|
||||
|
||||
version = version_module_case(version)
|
||||
# this is extremely fragile, we should not be inferring version number from paths...
|
||||
# TODO: we need an efficient peek for specific keys within a yaml file
|
||||
if out_file is None:
|
||||
fn = fn.removesuffix(".yaml")
|
||||
fn = module_case(fn) + ".py"
|
||||
|
@ -137,10 +123,14 @@ class PydanticProvider(Provider):
|
|||
if versions is None:
|
||||
versions = self._get_dependent_versions(path)
|
||||
|
||||
|
||||
if split:
|
||||
return self._build_split(path, versions, default_kwargs, dump, out_file, force)
|
||||
result = self._build_split(path, versions, default_kwargs, dump, out_file, force)
|
||||
else:
|
||||
return self._build_unsplit(path, versions, default_kwargs, dump, out_file, force)
|
||||
result = self._build_unsplit(path, versions, default_kwargs, dump, out_file, force)
|
||||
|
||||
self.install_pathfinder()
|
||||
return result
|
||||
|
||||
def _build_unsplit(
|
||||
self,
|
||||
|
@ -406,6 +396,19 @@ class PydanticProvider(Provider):
|
|||
mod = self.get(namespace, version)
|
||||
return getattr(mod, class_)
|
||||
|
||||
def install_pathfinder(self):
|
||||
"""
|
||||
Add a :class:`.EctopicModelFinder` instance that allows us to import from
|
||||
the directory that we are generating models into
|
||||
"""
|
||||
# check if one already exists
|
||||
matches = [finder for finder in sys.meta_path if isinstance(finder, EctopicModelFinder) and finder.path == self.path]
|
||||
if len(matches) > 0:
|
||||
return
|
||||
|
||||
pathfinder = EctopicModelFinder(self.path)
|
||||
sys.meta_path.append(pathfinder)
|
||||
|
||||
|
||||
class EctopicModelFinder(MetaPathFinder):
|
||||
"""
|
||||
|
|
44
nwb_linkml/tests/test_io/test_io_yaml.py
Normal file
44
nwb_linkml/tests/test_io/test_io_yaml.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
import pytest
|
||||
import yaml
|
||||
|
||||
from nwb_linkml.io.yaml import yaml_peek
|
||||
|
||||
@pytest.fixture()
|
||||
def yaml_file(tmp_path):
|
||||
data = {
|
||||
'key1': 'val1',
|
||||
'key2': 'val2',
|
||||
'key3': {
|
||||
'key1': 'val3',
|
||||
'key4': 'val4'
|
||||
}
|
||||
}
|
||||
out_file = tmp_path / 'test.yaml'
|
||||
with open(out_file, 'w') as yfile:
|
||||
yaml.dump(data, yfile)
|
||||
|
||||
yield out_file
|
||||
|
||||
out_file.unlink()
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'key,expected,root,first',
|
||||
[
|
||||
('key1', 'val1', True, True),
|
||||
('key1', 'val1', False, True),
|
||||
('key1', ['val1'], True, False),
|
||||
('key1', ['val1', 'val3'], False, False),
|
||||
('key2', 'val2', True, True),
|
||||
('key3', False, True, True),
|
||||
('key4', False, True, True),
|
||||
('key4', 'val4', False, True)
|
||||
]
|
||||
)
|
||||
def test_peek_yaml(key, expected, root, first, yaml_file):
|
||||
if not expected:
|
||||
with pytest.raises(KeyError):
|
||||
_ = yaml_peek(key, yaml_file, root=root, first=first)
|
||||
else:
|
||||
assert yaml_peek(key, yaml_file, root=root, first=first)
|
Loading…
Reference in a new issue