successfully building many versions of nwb schema.

working on hdf5 importing, come back to it when fresh, just sorta poking at it because it's so close.
This commit is contained in:
sneakers-the-rat 2023-09-14 02:45:01 -07:00
parent 32f81fd409
commit e6a41415f5
9 changed files with 550 additions and 127 deletions

View file

@ -48,7 +48,7 @@ class NamespacesAdapter(Adapter):
""" """
from nwb_linkml.io import schema as schema_io from nwb_linkml.io import schema as schema_io
ns_adapter = schema_io.load_namespaces(path) ns_adapter = schema_io.load_namespaces(path)
ns_adapter = schema_io.load_namespace_schema(ns_adapter, path) ns_adapter = schema_io.load_namespace_adapter(ns_adapter, path)
# try and find imported schema # try and find imported schema
@ -73,10 +73,19 @@ class NamespacesAdapter(Adapter):
sch_result = BuildResult() sch_result = BuildResult()
for sch in self.schemas: for sch in self.schemas:
if progress is not None: if progress is not None:
progress.update(sch.namespace, action=sch.name) try:
progress.update(sch.namespace, action=sch.name)
except KeyError:
# happens when we skip builds due to cachine
pass
sch_result += sch.build() sch_result += sch.build()
if progress is not None: if progress is not None:
progress.update(sch.namespace, advance=1) try:
progress.update(sch.namespace, advance=1)
except KeyError:
# happens when we skip builds due to caching
pass
# recursive step # recursive step
if not skip_imports: if not skip_imports:
@ -145,10 +154,9 @@ class NamespacesAdapter(Adapter):
sources = [sch.source for sch in ns.schema_] sources = [sch.source for sch in ns.schema_]
if sch_name in sources or sch.path.stem in sources: if sch_name in sources or sch.path.stem in sources:
sch.namespace = ns.name sch.namespace = ns.name
sch.version = ns.version
break break
def find_type_source(self, name:str) -> SchemaAdapter: def find_type_source(self, name:str) -> SchemaAdapter:
""" """
Given some neurodata_type_inc, find the schema that it's defined in. Given some neurodata_type_inc, find the schema that it's defined in.

View file

@ -33,6 +33,10 @@ class SchemaAdapter(Adapter):
namespace: Optional[str] = Field( namespace: Optional[str] = Field(
None, None,
description="""String of containing namespace. Populated by NamespacesAdapter""") description="""String of containing namespace. Populated by NamespacesAdapter""")
version: Optional[str] = Field(
None,
description="Version of schema, populated by NamespacesAdapter since individual schema files dont know their version in NWB Schema Lang"
)
split: bool = Field( split: bool = Field(
False, False,
description="Split anonymous subclasses into a separate schema file" description="Split anonymous subclasses into a separate schema file"
@ -67,7 +71,6 @@ class SchemaAdapter(Adapter):
- `id` (but need to have a placeholder to instantiate) - `id` (but need to have a placeholder to instantiate)
- `version` - `version`
""" """
res = BuildResult() res = BuildResult()
for dset in self.datasets: for dset in self.datasets:
@ -90,7 +93,8 @@ class SchemaAdapter(Adapter):
imports = [i.name if isinstance(i, SchemaAdapter) else i for i in self.imports ], imports = [i.name if isinstance(i, SchemaAdapter) else i for i in self.imports ],
classes=res.classes, classes=res.classes,
slots=res.slots, slots=res.slots,
types=res.types types=res.types,
version=self.version
) )
# every schema needs the language elements # every schema needs the language elements
sch.imports.append('.'.join([self.namespace, 'nwb.language'])) sch.imports.append('.'.join([self.namespace, 'nwb.language']))

View file

@ -219,10 +219,14 @@ class NWBPydanticGenerator(PydanticGenerator):
# Don't get classes that are defined in this schema! # Don't get classes that are defined in this schema!
if module_name == self.schema.name: if module_name == self.schema.name:
continue continue
# pdb.set_trace()
if self.versions and module_name in self.versions: schema_name = module_name.split('.')[0]
version = version_module_case(self.versions[module_name]) if self.versions and schema_name != self.schema.name.split('.')[0] and schema_name in self.versions:
local_mod_name = '....' + module_case(module_name) + '.' + version + '.' + 'namespace' version = version_module_case(self.versions[schema_name])
if self.split:
local_mod_name = '...' + module_case(schema_name) + '.' + version + '.' + module_case(module_name)
else:
local_mod_name = '...' + module_case(schema_name) + '.' + version + '.' + 'namespace'
else: else:
local_mod_name = '.' + module_case(module_name) local_mod_name = '.' + module_case(module_name)
@ -372,7 +376,7 @@ class NWBPydanticGenerator(PydanticGenerator):
try: try:
dtype = flat_to_npytyping[list(attrs.values())[0].range] dtype = flat_to_npytyping[list(attrs.values())[0].range]
except KeyError as e: except KeyError as e:
warnings.warn(e) warnings.warn(str(e))
range = list(attrs.values())[0].range range = list(attrs.values())[0].range
return f'List[{range}] | {range}' return f'List[{range}] | {range}'
suffix = "]" suffix = "]"

View file

@ -1,11 +1,14 @@
""" """
This is a sandbox file that should be split out to its own pydantic-hdf5 package, but just experimenting here to get our bearings This is a sandbox file that should be split out to its own pydantic-hdf5 package, but just experimenting here to get our bearings
""" """
import pdb
import typing
from typing import Optional, List, Dict, overload, Literal, Type, Any from typing import Optional, List, Dict, overload, Literal, Type, Any
from pathlib import Path from pathlib import Path
from types import ModuleType from types import ModuleType
from typing import TypeVar, TYPE_CHECKING from typing import TypeVar, TYPE_CHECKING
from abc import abstractmethod from abc import abstractmethod
import json
import h5py import h5py
from pydantic import BaseModel from pydantic import BaseModel
@ -15,13 +18,14 @@ from nwb_linkml.translate import generate_from_nwbfile
#from nwb_linkml.models.core_nwb_file import NWBFile #from nwb_linkml.models.core_nwb_file import NWBFile
if TYPE_CHECKING: if TYPE_CHECKING:
from nwb_linkml.models.core_nwb_file import NWBFile from nwb_linkml.models.core_nwb_file import NWBFile
from nwb_linkml.providers.schema import SchemaProvider
@dataclass @dataclass
class HDF5Element(): class HDF5Element():
cls: h5py.Dataset | h5py.Group cls: h5py.Dataset | h5py.Group
models: Dict[str, ModuleType]
parent: Type[BaseModel] parent: Type[BaseModel]
model: Optional[Any] = None
@abstractmethod @abstractmethod
def read(self) -> BaseModel | List[BaseModel]: def read(self) -> BaseModel | List[BaseModel]:
@ -40,34 +44,69 @@ class HDF5Element():
"""Just the terminal group name""" """Just the terminal group name"""
return self.cls.name.split('/')[-1] return self.cls.name.split('/')[-1]
def get_model(self) -> Type[BaseModel | dict]: def get_model(self) -> Type[BaseModel | dict | list]:
""" """
Find our model Find our model
- If we have a neurodata_type in our attrs, use that - If we have a neurodata_type in our attrs, use that
- Otherwise, use our parent to resolve the type - Otherwise, use our parent to resolve the type
""" """
if self.model is not None:
return self.model
if 'neurodata_type' in self.cls.attrs.keys(): if 'neurodata_type' in self.cls.attrs.keys():
return get_model(self.cls.attrs, self.models) return get_model(self.cls)
else: else:
parent_model = get_model(self.cls.parent.attrs, self.models) parent_model = get_model(self.cls.parent)
field = parent_model.model_fields.get(self.name) field = parent_model.model_fields.get(self.name)
if issubclass(type(field.annotation), BaseModel): if issubclass(type(field.annotation), BaseModel):
return field.annotation return field.annotation
else:
try:
if issubclass(field.annotation, BaseModel):
return field.annotation
except TypeError:
pass
# remove any optionals
annotation = field.annotation
annotation = unwrap_optional(annotation)
if typing.get_origin(annotation) is list:
return list
else: else:
return dict return dict
#raise NotImplementedError('Need to unpack at least listlike annotations') #raise NotImplementedError('Need to unpack at least listlike annotations')
def unwrap_optional(annotation):
if typing.get_origin(annotation) == typing.Union:
args = typing.get_args(annotation)
if len(args) == 2 and args[1].__name__ == 'NoneType':
annotation = args[0]
return annotation
def take_outer_type(annotation):
if typing.get_origin(annotation) is list:
return list
return annotation
@dataclass @dataclass
class H5Dataset(HDF5Element): class H5Dataset(HDF5Element):
cls: h5py.Dataset cls: h5py.Dataset
def read(self) -> Any: def read(self) -> Any:
model = self.get_model()
# TODO: Handle references
if self.cls.dtype == h5py.ref_dtype:
return None
if self.cls.shape == (): if self.cls.shape == ():
return self.cls[()] return self.cls[()]
elif len(self.cls.shape) == 1: elif model is list:
return self.cls[:].tolist() return self.cls[:].tolist()
else: else:
raise NotImplementedError('oop') return {'array':self.cls[:], 'name': self.cls.name.split('/')[-1]}
#raise NotImplementedError('oop')
@dataclass @dataclass
class H5Group(HDF5Element): class H5Group(HDF5Element):
@ -82,14 +121,25 @@ class H5Group(HDF5Element):
} }
data.update(model_attrs) data.update(model_attrs)
for k, v in self.cls.items(): for k, v in self.cls.items():
child_model = None
if isinstance(model, type) and issubclass(model, BaseModel):
child_field = model.model_fields.get(k, None)
if child_field is not None:
child_model = unwrap_optional(child_field.annotation)
child_model = take_outer_type(child_model)
if isinstance(v, h5py.Group): if isinstance(v, h5py.Group):
data[k] = H5Group(cls=v, models=self.models, parent=model).read() data[k] = H5Group(cls=v, parent=model, model=child_model).read()
elif isinstance(v, h5py.Dataset): elif isinstance(v, h5py.Dataset):
data[k] = H5Dataset(cls=v, models=self.models, parent=model).read() data[k] = H5Dataset(cls=v, parent=model, model=child_model).read()
return model(**data) if issubclass(model, BaseModel):
data['name'] = self.cls.name.split('/')[-1]
return model(**data)
elif model is list:
return list(data.values())
class HDF5IO(): class HDF5IO():
@ -112,20 +162,25 @@ class HDF5IO():
def read(self, path:Optional[str] = None): def read(self, path:Optional[str] = None):
h5f = h5py.File(str(self.path)) h5f = h5py.File(str(self.path))
schema = read_specs(h5f.get('specifications'))
# build schema so we have them cached
provider = SchemaProvider()
res = provider.build_from_dicts(schema)
if path: if path:
src = h5f.get(path) src = h5f.get(path)
parent = get_model(src.attrs, self.modules) parent = get_model(src)
else: else:
src = h5f src = h5f
parent = getattr(self.modules['core'], 'NWBFile') parent = provider.get_class('core', 'NWBFile')
data = {} data = {}
for k, v in src.items(): for k, v in src.items():
if isinstance(v, h5py.Group): if isinstance(v, h5py.Group):
data[k] = H5Group(cls=v, models=self.modules, parent=parent).read() data[k] = H5Group(cls=v, parent=parent).read()
elif isinstance(v, h5py.Dataset): elif isinstance(v, h5py.Dataset):
data[k] = H5Dataset(cls=v, models=self.modules, parent=parent).read() data[k] = H5Dataset(cls=v, parent=parent).read()
if path is None: if path is None:
return parent(**data) return parent(**data)
@ -168,13 +223,36 @@ class HDF5IO():
if len(data.shape) == 1: if len(data.shape) == 1:
return list(data[:]) return list(data[:])
def get_model(attrs: h5py.AttributeManager, models: Dict[str, ModuleType]) -> Type[BaseModel]:
def read_specs(group: h5py.Group) -> dict:
spec_dict = {}
def _read_spec(name, node):
if isinstance(node, h5py.Dataset):
# make containing dict if they dont exist
pieces = node.name.split('/')
if pieces[-3] not in spec_dict.keys():
spec_dict[pieces[-3]] = {}
spec = json.loads(node[()])
spec_dict[pieces[-3]][pieces[-1]] = spec
group.visititems(_read_spec)
return spec_dict
def get_model(cls: h5py.Group | h5py.Dataset) -> Type[BaseModel]:
attrs = cls.attrs
ns = attrs.get('namespace') ns = attrs.get('namespace')
model_name = attrs.get('neurodata_type') model_name = attrs.get('neurodata_type')
return getattr(models[ns], model_name)
# if __name__ == "__main__": try:
# NWBFILE = Path('/Users/jonny/Dropbox/lab/p2p_ld/data/nwb/sub-738651046_ses-760693773.nwb') return SchemaProvider().get_class(ns, model_name)
# h5f = HDF5IO(NWBFILE) except:
# try to get parent class
mod = get_model(cls.parent)
return mod.model_fields[cls.name.split('/')[-1]].annotation

View file

@ -25,7 +25,8 @@ def load_yaml(path:Path) -> dict:
ns_dict = amap.apply(ns_dict) ns_dict = amap.apply(ns_dict)
return ns_dict return ns_dict
def load_namespaces(path:Path|NamespaceRepo) -> Namespaces: def _load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
"""Loads the NWB SCHEMA LANGUAGE namespaces (not the namespacesadapter)"""
if isinstance(path, NamespaceRepo): if isinstance(path, NamespaceRepo):
path = path.provide_from_git() path = path.provide_from_git()
@ -37,6 +38,10 @@ def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
def load_schema_file(path:Path, yaml:Optional[dict] = None) -> SchemaAdapter: def load_schema_file(path:Path, yaml:Optional[dict] = None) -> SchemaAdapter:
if yaml is not None: if yaml is not None:
source = yaml source = yaml
# apply maps
maps = [m for m in Map.instances if m.phase == PHASES.postload]
for amap in maps:
source = amap.apply(source)
else: else:
source = load_yaml(path) source = load_yaml(path)
@ -64,25 +69,39 @@ def load_schema_file(path:Path, yaml:Optional[dict] = None) -> SchemaAdapter:
) )
return schema return schema
def load_namespace_schema(namespace: Namespaces, path:Path=Path('..')) -> NamespacesAdapter: def load_namespace_adapter(namespace: Path | NamespaceRepo | Namespaces, path:Optional[Path]=None) -> NamespacesAdapter:
""" """
Load all schema referenced by a namespace file Load all schema referenced by a namespace file
Args: Args:
namespace (:class:`.Namespace`): namespace (:class:`:class:`.Namespace`):
path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this
Returns: Returns:
:class:`.NamespacesAdapter` :class:`.NamespacesAdapter`
""" """
if path is None:
path = Path('..')
if isinstance(namespace, Path):
path = namespace
namespaces = _load_namespaces(path)
elif isinstance(namespace, NamespaceRepo):
path = namespace.provide_from_git()
namespaces = _load_namespaces(namespace)
elif isinstance(namespace, Namespaces):
namespaces = namespace
else:
raise ValueError(f"Namespace must be a path, namespace repo, or already loaded namespaces")
path = Path(path).resolve()
if path.is_file(): if path.is_file():
# given the namespace file itself, so find paths relative to its directory # given the namespace file itself, so find paths relative to its directory
path = path.parent path = path.parent
sch = [] sch = []
for ns in namespace.namespaces: for ns in namespaces.namespaces:
for schema in ns.schema_: for schema in ns.schema_:
if schema.source is None: if schema.source is None:
# this is normal, we'll resolve later # this is normal, we'll resolve later
@ -91,7 +110,7 @@ def load_namespace_schema(namespace: Namespaces, path:Path=Path('..')) -> Namesp
sch.append(load_schema_file(yml_file)) sch.append(load_schema_file(yml_file))
adapter = NamespacesAdapter( adapter = NamespacesAdapter(
namespaces=namespace, namespaces=namespaces,
schemas=sch schemas=sch
) )
@ -99,13 +118,8 @@ def load_namespace_schema(namespace: Namespaces, path:Path=Path('..')) -> Namesp
def load_nwb_core() -> NamespacesAdapter: def load_nwb_core() -> NamespacesAdapter:
# First get hdmf-common: # First get hdmf-common:
hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git() hdmf_schema = load_namespace_adapter(HDMF_COMMON_REPO)
hdmf_ns = load_namespaces(hdmf_ns_file) schema = load_namespace_adapter(NWB_CORE_REPO)
hdmf_schema = load_namespace_schema(hdmf_ns, hdmf_ns_file)
namespace_file = NWB_CORE_REPO.provide_from_git()
ns = load_namespaces(namespace_file)
schema = load_namespace_schema(ns, namespace_file)
schema.imported.append(hdmf_schema) schema.imported.append(hdmf_schema)

View file

@ -131,6 +131,7 @@ class GitRepo:
self._git_call('checkout', "HEAD") self._git_call('checkout', "HEAD")
else: else:
self._git_call('checkout', commit) self._git_call('checkout', commit)
self._git_call('submodule', 'update', '--init', '--recursive')
self._commit = commit self._commit = commit
@property @property
@ -166,6 +167,7 @@ class GitRepo:
self._git_call('fetch', '--all', '--tags') self._git_call('fetch', '--all', '--tags')
self._git_call('checkout', f'tags/{tag}') self._git_call('checkout', f'tags/{tag}')
# error will be raised by _git_call if tag not found # error will be raised by _git_call if tag not found
self._git_call('submodule', 'update', '--init', '--recursive')
@property @property
def default_branch(self) -> str: def default_branch(self) -> str:

View file

@ -10,9 +10,37 @@ Relationship to other modules:
* :mod:`.adapters` manage the conversion from NWB schema language to linkML. * :mod:`.adapters` manage the conversion from NWB schema language to linkML.
* :mod:`.generators` create models like pydantic models from the linkML schema * :mod:`.generators` create models like pydantic models from the linkML schema
* :mod:`.providers` then use ``adapters`` and ``generators`` to provide models from generated schema! * :mod:`.providers` then use ``adapters`` and ``generators`` to provide models from generated schema!
Providers create a set of directories with namespaces and versions,
so eg. for the linkML and pydantic providers:
cache_dir
- linkml
- nwb_core
- v0_2_0
- namespace.yaml
- nwb.core.file.yaml
- ...
- v0_2_1
- namespace.yaml
- ...
- my_schema
- v0_1_0
- ...
- pydantic
- nwb_core
- v0_2_0
- namespace.py
- ...
- v0_2_1
- namespace.py
- ...
""" """
import pdb import pdb
from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict import shutil
from typing import Dict, TypedDict, List, Optional, Literal, TypeVar, Any, Dict, Type
from types import ModuleType from types import ModuleType
from pathlib import Path from pathlib import Path
import os import os
@ -83,6 +111,13 @@ class Provider(ABC):
Whatever needs to be done to build this thing, if applicable Whatever needs to be done to build this thing, if applicable
""" """
@abstractmethod
def get(self, *args: Any) -> Any:
"""
Get a cached item.
Optionally, try any build it if it's possible to do so
"""
def namespace_path( def namespace_path(
self, self,
@ -136,12 +171,48 @@ class Provider(ABC):
return version_path return version_path
@property
def versions(self) -> Dict[str,List[str]]:
"""
Dictionary mapping a namespace to a list of built versions
"""
versions = {} # type: Dict[str, List[Path]]
# first get any builtins provided by the package itself
# these get overwritten by
module_path = Path(importlib.util.find_spec('nwb_linkml').origin).parent
builtin_namespaces = []
if self.PROVIDES == 'linkml':
namespace_path = module_path / 'schema'
builtin_namespaces = list(namespace_path.iterdir())
elif self.PROVIDES == 'pydantic':
namespace_path = module_path / 'models'
builtin_namespaces = list(namespace_path.iterdir())
for ns_dir in builtin_namespaces + list(self.path.iterdir()):
if not ns_dir.is_dir():
continue
if ns_dir.name not in versions.keys():
versions[ns_dir.name] = []
versions[ns_dir.name].extend([v for v in ns_dir.iterdir() if v.is_dir()])
# flatten out in case we got duplicates between the builtins and cache
res = {
k: [v.name for v in sorted(set(v_paths), key=os.path.getmtime)]
for k, v_paths in versions.items()
}
return res
class LinkMLSchemaBuild(TypedDict): class LinkMLSchemaBuild(TypedDict):
"""Build result from :meth:`.LinkMLProvider.build`""" """Build result from :meth:`.LinkMLProvider.build`"""
result: BuildResult
version: str version: str
namespace: Path namespace: Path
name: str
result: Optional[BuildResult]
class LinkMLProvider(Provider): class LinkMLProvider(Provider):
@ -219,19 +290,35 @@ class LinkMLProvider(Provider):
to infer version and schema name. Post-load maps should have already to infer version and schema name. Post-load maps should have already
been applied been applied
""" """
ns = Namespaces(**schemas['namespace']) ns_adapters = {}
typed_schemas = [ for ns_name, ns_schemas in schemas.items():
io.schema.load_schema_file( ns = Namespaces(**ns_schemas['namespace'])
path=Path(key + ".yaml"), typed_schemas = [
yaml=val) io.schema.load_schema_file(
for key, val in schemas.items() path=Path(key + ".yaml"),
if key != 'namespace' yaml=val)
] for key, val in ns_schemas.items()
ns_adapter = adapters.NamespacesAdapter( if key != 'namespace'
namespaces=ns, ]
schemas=typed_schemas ns_adapter = adapters.NamespacesAdapter(
) namespaces=ns,
return self.build(ns_adapter, **kwargs) schemas=typed_schemas
)
ns_adapters[ns_name] = ns_adapter
# get the correct imports
for ns_name, adapter in ns_adapters.items():
for schema_needs in adapter.needed_imports.values():
for needed in schema_needs:
adapter.imported.append(ns_adapters[needed])
# then do the build
res = {}
for ns_name, adapter in ns_adapters.items():
res.update(self.build(adapter, **kwargs))
return res
def build( def build(
@ -239,6 +326,7 @@ class LinkMLProvider(Provider):
ns_adapter: adapters.NamespacesAdapter, ns_adapter: adapters.NamespacesAdapter,
versions: Optional[dict] = None, versions: Optional[dict] = None,
dump: bool = True, dump: bool = True,
force: bool = False
) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]: ) -> Dict[str | SchemaDefinitionName, LinkMLSchemaBuild]:
""" """
Arguments: Arguments:
@ -249,8 +337,24 @@ class LinkMLProvider(Provider):
If none is provided, use the most recent version If none is provided, use the most recent version
available. available.
dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return dump (bool): If ``True`` (default), dump generated schema to YAML. otherwise just return
force (bool): If ``False`` (default), don't build schema that already exist. If ``True`` , clear directory and rebuild
Returns:
Dict[str, LinkMLSchemaBuild]. For normal builds, :attr:`.LinkMLSchemaBuild.result` will be populated with results
of the build. If ``force == False`` and the schema already exist, it will be ``None``
""" """
if not force:
if all([(self.namespace_path(ns, version) / 'namespace.yaml').exists() for ns, version in ns_adapter.versions.items()]):
return {
k: LinkMLSchemaBuild(
name=k,
result=None,
namespace=self.namespace_path(k, v) / 'namespace.yaml',
version=v
) for k,v in ns_adapter.versions.items()
}
#self._find_imports(ns_adapter, versions, populate=True) #self._find_imports(ns_adapter, versions, populate=True)
if self.verbose: if self.verbose:
progress = AdapterProgress(ns_adapter) progress = AdapterProgress(ns_adapter)
@ -265,27 +369,32 @@ class LinkMLProvider(Provider):
build_result = {} build_result = {}
namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()] namespace_sch = [sch for sch in built.schemas if 'namespace' in sch.annotations.keys()]
namespace_names = [sch.name for sch in namespace_sch]
for ns_linkml in namespace_sch: for ns_linkml in namespace_sch:
version = ns_adapter.versions[ns_linkml.name] version = ns_adapter.versions[ns_linkml.name]
version_path = self.namespace_path(ns_linkml.name, version, allow_repo=False) version_path = self.namespace_path(ns_linkml.name, version, allow_repo=False)
if version_path.exists() and force:
shutil.rmtree(str(version_path))
version_path.mkdir(exist_ok=True, parents=True) version_path.mkdir(exist_ok=True, parents=True)
ns_file = version_path / 'namespace.yaml' ns_file = version_path / 'namespace.yaml'
ns_linkml = self._fix_schema_imports(ns_linkml, ns_adapter, ns_file) # schema built as part of this namespace that aren't the namespace file
yaml_dumper.dump(ns_linkml, ns_file) other_schema = [sch for sch in built.schemas if
sch.name.split('.')[0] == ns_linkml.name and sch not in namespace_sch]
# write the schemas for this namespace if force or (not force and not ns_file.exists()):
other_schema = [sch for sch in built.schemas if sch.name.split('.')[0] == ns_linkml.name and sch not in namespace_sch] ns_linkml = self._fix_schema_imports(ns_linkml, ns_adapter, ns_file)
for sch in other_schema: yaml_dumper.dump(ns_linkml, ns_file)
output_file = version_path / (sch.name + '.yaml')
# fix the paths for intra-schema imports
sch = self._fix_schema_imports(sch, ns_adapter, output_file)
yaml_dumper.dump(sch, output_file) # write the schemas for this namespace
for sch in other_schema:
output_file = version_path / (sch.name + '.yaml')
# fix the paths for intra-schema imports
sch = self._fix_schema_imports(sch, ns_adapter, output_file)
yaml_dumper.dump(sch, output_file)
# make return result for just this namespace # make return result for just this namespace
build_result[ns_linkml.name] = LinkMLSchemaBuild( build_result[ns_linkml.name] = LinkMLSchemaBuild(
namespace=ns_file, namespace=ns_file,
name=ns_linkml.name,
result= BuildResult(schemas=[ns_linkml, *other_schema]), result= BuildResult(schemas=[ns_linkml, *other_schema]),
version=version version=version
) )
@ -350,27 +459,40 @@ class PydanticProvider(Provider):
def build( def build(
self, self,
namespace: str | Path, namespace: str | Path,
out_file: Optional[Path] = None,
version: Optional[str] = None, version: Optional[str] = None,
versions: Optional[dict] = None, versions: Optional[dict] = None,
split: bool = False,
dump: bool = True, dump: bool = True,
force: bool = False,
**kwargs **kwargs
) -> str: ) -> str:
""" """
Notes:
We currently infer namespace and version from the path when ``namespace`` is a Path,
which is a patently Bad Thing To Do. This is a temporary measure until we decide on
a permanent means by which we want to cache built artifacts <3. Hierarchies of folders
is not the target design.
Args: Args:
namespace (Union[str, :class:`pathlib.Path`]): If a string, use a namespace (Union[str, :class:`pathlib.Path`]): If a string, use a
:class:`.LinkMLProvider` to get the converted schema. If a path, :class:`.LinkMLProvider` to get the converted schema. If a path,
assume we have been given an explicit ``namespace.yaml`` from a converted assume we have been given an explicit ``namespace.yaml`` from a converted
NWB -> LinkML schema to load from. NWB -> LinkML schema to load from.
out_file (Optional[Path]): Optionally override the output file. If ``None``, generate from namespace and version
version (Optional[str]): The version of the schema to build, if present. version (Optional[str]): The version of the schema to build, if present.
Works similarly to ``version`` in :class:`.LinkMLProvider` Works similarly to ``version`` in :class:`.LinkMLProvider`. Ignored if ``namespace`` is a Path.
versions (Optional[dict]): An explicit mapping of namespaces and versions to use when versions (Optional[dict]): An explicit mapping of namespaces and versions to use when
building the combined pydantic `namespace.py` file. Since NWB doesn't have an explicit building the combined pydantic `namespace.py` file. Since NWB doesn't have an explicit
version dependency system between schema, there is intrinsic ambiguity between which version version dependency system between schema, there is intrinsic ambiguity between which version
of which schema should be used when imported from another. This mapping allows those ambiguities to be resolved. of which schema should be used when imported from another. This mapping allows those ambiguities to be resolved.
See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information. See :class:`.NWBPydanticGenerator` 's ``versions`` argument for more information.
split (bool): If ``False`` (default), generate a single ``namespace.py`` file, otherwise generate a python file for each schema in the namespace
in addition to a ``namespace.py`` that imports from them
dump (bool): If ``True`` (default), dump the model to the cache, otherwise just return the serialized string of built pydantic model dump (bool): If ``True`` (default), dump the model to the cache, otherwise just return the serialized string of built pydantic model
force (bool): If ``False`` (default), don't build the model if it already exists, if ``True`` , delete and rebuild any model
**kwargs: Passed to :class:`.NWBPydanticGenerator` **kwargs: Passed to :class:`.NWBPydanticGenerator`
Returns: Returns:
@ -379,13 +501,36 @@ class PydanticProvider(Provider):
if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')): if isinstance(namespace, str) and not (namespace.endswith('.yaml') or namespace.endswith('.yml')):
# we're given a name of a namespace to build # we're given a name of a namespace to build
name = namespace
path = LinkMLProvider(path=self.config.cache_dir).namespace_path(namespace, version) / 'namespace.yaml' path = LinkMLProvider(path=self.config.cache_dir).namespace_path(namespace, version) / 'namespace.yaml'
if version is None:
# Get the most recently built version
version = LinkMLProvider(path=self.config.cache_dir).versions[name][-1]
fn = path.parts[-1]
else: else:
# given a path to a namespace linkml yaml file # given a path to a namespace linkml yaml file
path = Path(namespace) path = Path(namespace)
# FIXME: this is extremely fragile, but get the details from the path. this is faster than reading yaml for now
name = path.parts[-3]
version = path.parts[-2]
fn = path.parts[-1]
version = version_module_case(version)
# this is extremely fragile, we should not be inferring version number from paths...
if out_file is None:
fn = fn.strip('.yaml')
fn = module_case(fn) + '.py'
out_file = self.path / name / version / fn
if out_file.exists() and not force:
with open(out_file, 'r') as ofile:
serialized = ofile.read()
return serialized
default_kwargs = { default_kwargs = {
'split': False, 'split': split,
'emit_metadata': True, 'emit_metadata': True,
'gen_slots': True, 'gen_slots': True,
'pydantic_version': '2' 'pydantic_version': '2'
@ -399,10 +544,16 @@ class PydanticProvider(Provider):
) )
serialized = generator.serialize() serialized = generator.serialize()
if dump: if dump:
out_file = self.path / path.parts[-3] / path.parts[-2] / 'namespace.py'
out_file.parent.mkdir(parents=True,exist_ok=True) out_file.parent.mkdir(parents=True,exist_ok=True)
with open(out_file, 'w') as ofile: with open(out_file, 'w') as ofile:
ofile.write(serialized) ofile.write(serialized)
with open(out_file.parent / '__init__.py', 'w') as initfile:
initfile.write(' ')
# make parent file, being a bit more careful because it could be for another module
parent_init = out_file.parent.parent / '__init__.py'
if not parent_init.exists():
with open(parent_init, 'w') as initfile:
initfile.write(' ')
return serialized return serialized
@ -487,7 +638,7 @@ class PydanticProvider(Provider):
module = self.import_module(namespace, version) module = self.import_module(namespace, version)
return module return module
def get_class(self, namespace: str, class_: str, version: Optional[str] = None) -> BaseModel: def get_class(self, namespace: str, class_: str, version: Optional[str] = None) -> Type[BaseModel]:
""" """
Get a class from a given namespace and version! Get a class from a given namespace and version!
@ -507,10 +658,10 @@ class PydanticProvider(Provider):
class SchemaProvider: class SchemaProvider(Provider):
""" """
Class to manage building and caching linkml and pydantic models generated Class to manage building and caching linkml and pydantic models generated
from nwb schema language from nwb schema language. Combines :class:`.LinkMLProvider` and :class:`.PydanticProvider`
Behaves like a singleton without needing to be one - since we're working off Behaves like a singleton without needing to be one - since we're working off
caches on disk that are indexed by hash in most "normal" conditions you should caches on disk that are indexed by hash in most "normal" conditions you should
@ -519,52 +670,81 @@ class SchemaProvider:
Store each generated schema in a directory structure indexed by Store each generated schema in a directory structure indexed by
schema namespace name and version schema namespace name and version
"""
eg: build_from_yaml = LinkMLProvider.build_from_yaml
"""
cache_dir Alias for :meth:`.LinkMLProvider.build_from_yaml` that also builds a pydantic model
- linkml """
- nwb_core build_from_dicts = LinkMLProvider.build_from_dicts
- v0_2_0 """
- namespace.yaml Alias for :meth:`.LinkMLProvider.build_from_dicts` that also builds a pydantic model
- nwb.core.file.yaml
- ...
- v0_2_1
- namespace.yaml
- ...
- my_schema
- v0_1_0
- ...
- pydantic
- nwb_core
- v0_2_0
- namespace.py
- ...
- v0_2_1
- namespace.py
- ...
""" """
def __init__(self, @property
path: Optional[Path] = None, def path(self) -> Path:
verbose: bool = True): return self.config.cache_dir
def build(
self,
ns_adapter: adapters.NamespacesAdapter,
verbose: bool = True,
linkml_kwargs: Optional[dict] = None,
pydantic_kwargs: Optional[dict] = None,
**kwargs
) -> Dict[str, str]:
""" """
Arguments: Build a namespace, storing its linkML and pydantic models.
path (bool): If provided, output to an explicit base directory.
Otherwise use that provided in ``NWB_LINKML_CACHE_DIR`` Args:
verbose (bool): If ``True`` (default), show progress bars and other messages ns_adapter:
useful for interactive use verbose (bool): If ``True`` (default), show progress bars
linkml_kwargs (Optional[dict]): Dictionary of kwargs optionally passed to :meth:`.LinkMLProvider.build`
pydantic_kwargs (Optional[dict]): Dictionary of kwargs optionally passed to :meth:`.PydanticProvider.build`
**kwargs: Common options added to both ``linkml_kwargs`` and ``pydantic_kwargs``
Returns:
Dict[str,str] mapping namespaces to built pydantic sources
""" """
if path is not None: if linkml_kwargs is None:
config = Config(cache_dir=path) linkml_kwargs = {}
else: if pydantic_kwargs is None:
config = Config() pydantic_kwargs = {}
self.cache_dir = config.cache_dir linkml_kwargs.update(kwargs)
self.pydantic_dir = config.pydantic_dir pydantic_kwargs.update(kwargs)
self.linkml_dir = config.linkml_dir
linkml_provider = LinkMLProvider(path=self.path, verbose=verbose)
pydantic_provider = PydanticProvider(path=self.path, verbose=verbose)
linkml_res = linkml_provider.build(ns_adapter=ns_adapter, **linkml_kwargs)
results = {}
for ns, ns_result in linkml_res.items():
results[ns] = pydantic_provider.build(ns_result['namespace'], **pydantic_kwargs)
return results
def get(self, namespace: str, version: Optional[str] = None) -> ModuleType:
"""
Get a built pydantic model for a given namespace and version.
Wrapper around :meth:`.PydanticProvider.get`
"""
return PydanticProvider(path=self.path).get(namespace, version)
def get_class(self, namespace: str, class_: str, version: Optional[str] = None) -> Type[BaseModel]:
"""
Get a pydantic model class from a given namespace and version!
Wrapper around :meth:`.PydanticProvider.get_class`
"""
return PydanticProvider(path=self.path).get_class(namespace, class_, version)
self.verbose = verbose

View file

@ -1,6 +1,10 @@
import pdb
import pytest import pytest
from pathlib import Path from pathlib import Path
from ..fixtures import tmp_output_dir, set_config_vars
from nwb_linkml.io.hdf5 import HDF5IO from nwb_linkml.io.hdf5 import HDF5IO
@pytest.mark.skip() @pytest.mark.skip()
def test_hdf_read(): def test_hdf_read():
@ -8,4 +12,6 @@ def test_hdf_read():
if not NWBFILE.exists(): if not NWBFILE.exists():
return return
io = HDF5IO(path=NWBFILE) io = HDF5IO(path=NWBFILE)
model = io.read('/general') model = io.read('acquisition')
pdb.set_trace()

View file

@ -1,19 +1,34 @@
import pdb
import shutil
import os
import traceback
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.dumpers import yaml_dumper
from rich.live import Live
from rich.panel import Panel
from rich.console import Group
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, Column
from rich import print
from nwb_linkml.generators.pydantic import NWBPydanticGenerator from nwb_linkml.generators.pydantic import NWBPydanticGenerator
from nwb_linkml.src.nwb_linkml import io from nwb_linkml.providers.schema import LinkMLProvider, PydanticProvider
from nwb_linkml.providers.git import NWB_CORE_REPO, GitRepo
from nwb_linkml.io import schema as io
def generate_core_yaml(output_path:Path, dry_run:bool=False):
"""Just build the latest version of the core schema"""
def generate_core_yaml(output_path:Path):
core = io.load_nwb_core() core = io.load_nwb_core()
built_schemas = core.build().schemas built_schemas = core.build().schemas
for schema in built_schemas: for schema in built_schemas:
output_file = output_path / (schema.name + '.yaml') output_file = output_path / (schema.name + '.yaml')
yaml_dumper.dump(schema, output_file) if not dry_run:
yaml_dumper.dump(schema, output_file)
def generate_core_pydantic(yaml_path:Path, output_path:Path): def generate_core_pydantic(yaml_path:Path, output_path:Path, dry_run:bool=False):
"""Just generate the latest version of the core schema"""
for schema in yaml_path.glob('*.yaml'): for schema in yaml_path.glob('*.yaml'):
python_name = schema.stem.replace('.', '_').replace('-', '_') python_name = schema.stem.replace('.', '_').replace('-', '_')
pydantic_file = (output_path / python_name).with_suffix('.py') pydantic_file = (output_path / python_name).with_suffix('.py')
@ -26,11 +41,109 @@ def generate_core_pydantic(yaml_path:Path, output_path:Path):
gen_slots=True gen_slots=True
) )
gen_pydantic = generator.serialize() gen_pydantic = generator.serialize()
with open(pydantic_file, 'w') as pfile: if not dry_run:
pfile.write(gen_pydantic) with open(pydantic_file, 'w') as pfile:
pfile.write(gen_pydantic)
def generate_versions(yaml_path:Path, pydantic_path:Path, dry_run:bool=False):
"""
Generate linkml models for all versions
"""
repo = GitRepo(NWB_CORE_REPO)
#repo.clone(force=True)
repo.clone()
# use a directory underneath this one as the temporary directory rather than
# the default hidden one
tmp_dir = Path(__file__).parent / '__tmp__'
if tmp_dir.exists():
shutil.rmtree(tmp_dir)
tmp_dir.mkdir()
linkml_provider = LinkMLProvider(path=tmp_dir, verbose=False)
pydantic_provider = PydanticProvider(path=tmp_dir, verbose=False)
failed_versions = {}
overall_progress = Progress()
overall_task = overall_progress.add_task('All Versions', total=len(NWB_CORE_REPO.versions))
build_progress = Progress(
TextColumn("[bold blue]{task.fields[name]} - [bold green]{task.fields[action]}",
table_column=Column(ratio=1)),
BarColumn(table_column=Column(ratio=1), bar_width=None)
)
panel = Panel(Group(build_progress, overall_progress))
with Live(panel) as live:
# make pbar tasks
linkml_task = None
pydantic_task = None
for version in NWB_CORE_REPO.versions:
# build linkml
try:
# check out the version (this should also refresh the hdmf-common schema)
linkml_task = build_progress.add_task('', name=version, action='Checkout Version', total=3)
repo.tag = version
build_progress.update(linkml_task, advance=1, action="Load Namespaces")
# first load the core namespace
core_ns = io.load_namespace_adapter(repo.namespace_file)
# then the hdmf-common namespace
hdmf_common_ns = io.load_namespace_adapter(repo.temp_directory / 'hdmf-common-schema' / 'common' / 'namespace.yaml')
core_ns.imported.append(hdmf_common_ns)
build_progress.update(linkml_task, advance=1, action="Build LinkML")
linkml_res = linkml_provider.build(core_ns)
build_progress.update(linkml_task, advance=1, action="Built LinkML")
# build pydantic
ns_files = [res['namespace'] for res in linkml_res.values()]
all_schema = []
for ns_file in ns_files:
all_schema.extend(list(ns_file.parent.glob('*.yaml')))
pydantic_task = build_progress.add_task('', name=version, action='', total=len(all_schema))
for schema in all_schema:
pbar_string = ' - '.join([schema.parts[-3], schema.parts[-2], schema.parts[-1]])
build_progress.update(pydantic_task, action=pbar_string)
pydantic_provider.build(schema, versions=core_ns.versions, split=True)
build_progress.update(pydantic_task, advance=1)
build_progress.update(pydantic_task, action='Built Pydantic')
except Exception as e:
build_progress.stop_task(linkml_task)
if linkml_task is not None:
build_progress.update(linkml_task, action='[bold red]LinkML Build Failed')
build_progress.stop_task(linkml_task)
if pydantic_task is not None:
build_progress.update(pydantic_task, action='[bold red]LinkML Build Failed')
build_progress.stop_task(pydantic_task)
failed_versions[version] = traceback.format_exception(e)
finally:
overall_progress.update(overall_task, advance=1)
linkml_task = None
pydantic_task = None
if not dry_run:
shutil.move(tmp_dir / 'linkml', yaml_path)
shutil.move(tmp_dir / 'pydantic', pydantic_path)
if len(failed_versions) > 0:
print('Failed Building Versions:')
print(failed_versions)
def parser() -> ArgumentParser: def parser() -> ArgumentParser:
parser = ArgumentParser('Generate NWB core schema') parser = ArgumentParser('Generate all available versions of NWB core schema')
parser.add_argument( parser.add_argument(
'--yaml', '--yaml',
help="directory to export linkML schema to", help="directory to export linkML schema to",
@ -43,15 +156,29 @@ def parser() -> ArgumentParser:
type=Path, type=Path,
default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models' default=Path(__file__).parent.parent / 'nwb_linkml' / 'src' / 'nwb_linkml' / 'models'
) )
parser.add_argument(
'--latest',
help="Only generate the latest version of the core schemas.",
action="store_true"
)
parser.add_argument(
'--dry-run',
help="Generate schema and pydantic models without moving them into the target directories, for testing purposes",
action='store_true'
)
return parser return parser
def main(): def main():
args = parser().parse_args() args = parser().parse_args()
args.yaml.mkdir(exist_ok=True) if not args.dry_run:
args.pydantic.mkdir(exist_ok=True) args.yaml.mkdir(exist_ok=True)
generate_core_yaml(args.yaml) args.pydantic.mkdir(exist_ok=True)
generate_core_pydantic(args.yaml, args.pydantic) if args.latest:
generate_core_yaml(args.yaml, args.dry_run)
generate_core_pydantic(args.yaml, args.pydantic, args.dry_run)
else:
generate_versions(args.yaml, args.pydantic, args.dry_run)
if __name__ == "__main__": if __name__ == "__main__":
main() main()