From 170a424fb1c8e45db5ff7f1dccae0d82fb137c27 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 21 Aug 2023 21:43:02 -0700 Subject: [PATCH] Actually generating some translations at this point --- .gitignore | 1 + nwb_linkml/adapters/__init__.py | 0 nwb_linkml/adapters/adapter.py | 71 +++++++++++++++++++++++ nwb_linkml/adapters/classes.py | 36 ++++++++++++ nwb_linkml/adapters/namespaces.py | 81 ++++++++++++++++++++++++++ nwb_linkml/adapters/schema.py | 95 +++++++++++++++++++++++++++++++ nwb_linkml/io.py | 66 ++++++++++----------- nwb_linkml/lang_elements.py | 76 +++++++++++++++++++++++++ nwb_linkml/maps/dtype.py | 31 ++++++++++ tests/fixtures.py | 4 +- tests/test_adapter.py | 36 ++++++++++++ tests/test_generate.py | 17 ++++++ 12 files changed, 476 insertions(+), 38 deletions(-) create mode 100644 nwb_linkml/adapters/__init__.py create mode 100644 nwb_linkml/adapters/adapter.py create mode 100644 nwb_linkml/adapters/classes.py create mode 100644 nwb_linkml/adapters/namespaces.py create mode 100644 nwb_linkml/adapters/schema.py create mode 100644 nwb_linkml/lang_elements.py create mode 100644 nwb_linkml/maps/dtype.py create mode 100644 tests/test_adapter.py create mode 100644 tests/test_generate.py diff --git a/.gitignore b/.gitignore index 5200413..644d9b0 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,4 @@ cython_debug/ #.idea/ nwb.schema.json +__tmp__ diff --git a/nwb_linkml/adapters/__init__.py b/nwb_linkml/adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nwb_linkml/adapters/adapter.py b/nwb_linkml/adapters/adapter.py new file mode 100644 index 0000000..5f656b0 --- /dev/null +++ b/nwb_linkml/adapters/adapter.py @@ -0,0 +1,71 @@ +""" +Base class for adapters +""" +from typing import List, Dict, Type, Generator, Any, Tuple +from pydantic import BaseModel + +class Adapter(BaseModel): + pass + + def walk(self, input: BaseModel | list | dict): + yield input + if isinstance(input, BaseModel): + for key in input.__fields__.keys(): + val = getattr(input, key) + yield (key, val) + if isinstance(val, (BaseModel, dict, list)): + yield from self.walk(val) + + elif isinstance(input, dict): + for key, val in input.items(): + yield (key, val) + if isinstance(val, (BaseModel, dict, list)): + yield from self.walk(val) + + elif isinstance(input, (list, tuple)): + yield input + for val in input: + yield from self.walk(val) + + else: + # do nothing, is a string or whatever + pass + + def walk_fields(self, input: BaseModel | list | dict, field: str): + for item in self.walk(input): + if isinstance(item, tuple) and item[0] == field and item[1] is not None: + yield item[1] + + + def walk_types(self, input: BaseModel | list | dict, get_type: Type | List[Type] | Tuple[Type]): + if not isinstance(get_type, (list, tuple)): + get_type = [get_type] + + for item in self.walk(input): + if any([type(item) == atype for atype in get_type]): + yield item + + # + # + # if isinstance(input, BaseModel): + # for key in input.__fields__.keys(): + # val = getattr(input, key) + # if key == field: + # yield val + # if isinstance(val, (BaseModel, dict, list)): + # yield from self.walk(val, field) + # + # elif isinstance(input, dict): + # for key, val in input.items(): + # if key == field: + # yield val + # if isinstance(val, (BaseModel, dict, list)): + # yield from self.walk(val, field) + # + # elif isinstance(input, (list, tuple)): + # for val in input: + # yield from self.walk(val, field) + # + # else: + # # do nothing, is a string or whatever + # pass diff --git a/nwb_linkml/adapters/classes.py b/nwb_linkml/adapters/classes.py new file mode 100644 index 0000000..45fa5ca --- /dev/null +++ b/nwb_linkml/adapters/classes.py @@ -0,0 +1,36 @@ +""" +Adapters to linkML classes +""" + +from nwb_schema_language import Dataset, Group +from nwb_linkml.adapters.adapter import Adapter +from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition + + +class ClassAdapter(Adapter): + """ + Adapter to class-like things in linkml, including datasets and groups + """ + cls: Dataset | Group + + def build(self) -> ClassDefinition: + if self.cls.neurodata_type_def: + name = self.cls.neurodata_type_def + else: + name = self.cls.name + + attrs = [ + SlotDefinition( + name=attr.name, + description=attr.doc, + + ) for attr in self.cls.attributes + ] + + cls = ClassDefinition( + name = name, + is_a = self.cls.neurodata_type_inc, + description=self.cls.doc, + attributes=attrs + ) + return cls \ No newline at end of file diff --git a/nwb_linkml/adapters/namespaces.py b/nwb_linkml/adapters/namespaces.py new file mode 100644 index 0000000..77ac8b8 --- /dev/null +++ b/nwb_linkml/adapters/namespaces.py @@ -0,0 +1,81 @@ +""" +Namespaces adapter + +Wraps the :class:`nwb_schema_language.Namespaces` and other objects with convenience methods +for extracting information and generating translated schema +""" +import pdb +from typing import List, Optional +from pydantic import BaseModel, Field, validator +from pprint import pformat + +from nwb_schema_language import Namespaces + +from nwb_linkml.adapters.adapter import Adapter +from nwb_linkml.adapters.schema import SchemaAdapter + +class NamespacesAdapter(Adapter): + namespaces: Namespaces + schemas: List[SchemaAdapter] + imported: List['NamespacesAdapter'] = Field(default_factory=list) + + + def __init__(self, **kwargs): + super(NamespacesAdapter, self).__init__(**kwargs) + self._populate_schema_namespaces() + + def _populate_schema_namespaces(self): + # annotate for each schema which namespace imports it + for sch in self.schemas: + # imports seem to always be from same folder, so we can just use name part + sch_name = sch.path.name + # find which namespace imports this schema file + for ns in self.namespaces.namespaces: + sources = [sch.source for sch in ns.schema_] + if sch_name in sources: + sch.namespace = ns.name + break + + + + def find_type_source(self, name:str) -> SchemaAdapter: + """ + Given some neurodata_type_inc, find the schema that it's defined in. + """ + # First check within the main schema + internal_matches = [] + for schema in self.schemas: + class_names = [cls.neurodata_type_def for cls in schema.created_classes] + if name in class_names: + internal_matches.append(schema) + + import_matches = [] + for imported_ns in self.imported: + for schema in imported_ns.schemas: + class_names = [cls.neurodata_type_def for cls in schema.created_classes] + if name in class_names: + import_matches.append(schema) + + all_matches = [*internal_matches, *import_matches] + + if len(all_matches)>1: + pdb.set_trace() + raise KeyError(f"Found multiple schemas in namespace that define {name}:\ninternal: {pformat(internal_matches)}\nimported:{pformat(import_matches)}") + elif len(all_matches) == 1: + return all_matches[0] + else: + raise KeyError(f"No schema found that define {name}") + + def populate_imports(self): + """ + Populate the imports that are needed for each schema file + + """ + for sch in self.schemas: + for needs in sch.needed_imports: + # shouldn't be recursive references, since imports should form a tree + depends_on = self.find_type_source(needs) + if depends_on not in sch.imports: + sch.imports.append(depends_on) + + diff --git a/nwb_linkml/adapters/schema.py b/nwb_linkml/adapters/schema.py new file mode 100644 index 0000000..26980ec --- /dev/null +++ b/nwb_linkml/adapters/schema.py @@ -0,0 +1,95 @@ +""" +Since NWB doesn't necessarily have a term for a single nwb schema file, we're going +to call them "schema" objects +""" + +from typing import Optional, List, TYPE_CHECKING +from pathlib import Path +from pydantic import Field + +from nwb_linkml.adapters.adapter import Adapter +from nwb_linkml.adapters.classes import ClassAdapter +if TYPE_CHECKING: + from nwb_linkml.adapters.namespaces import NamespacesAdapter + +from nwb_schema_language import Group, Dataset + +from linkml_runtime.linkml_model import SchemaDefinition + +class SchemaAdapter(Adapter): + """ + An individual schema file in nwb_schema_language + """ + path: Path + groups: List[Group] = Field(default_factory=list) + datasets: List[Dataset] = Field(default_factory=list) + imports: List['SchemaAdapter'] = Field(default_factory=list) + namespace: Optional[str] = None + """Populated by NamespacesAdapter""" + + @property + def name(self) -> str: + return '.'.join([self.namespace, self.path.with_suffix('').name]) + + def __repr__(self): + out_str = '\n' + self.name + '\n' + out_str += '-'*len(self.name) + '\n' + if len(self.imports) > 0: + out_str += "Imports:\n" + out_str += " " + ', '.join([i.name for i in self.imports]) + '\n' + + out_str += 'Groups:\n' + out_str += ' ' + ', '.join([g.neurodata_type_def for g in self.groups]) + out_str += '\n' + out_str += 'Datasets:\n' + out_str += ' ' + ', '.join([d.neurodata_type_def for d in self.datasets]) + out_str += "\n" + + return out_str + + def build(self) -> SchemaDefinition: + """ + Make the LinkML representation for this schema file + + Things that will be populated later + - `id` (but need to have a placeholder to instantiate) + - `version` + + + """ + classes = [ClassAdapter(cls=dset) for dset in self.datasets] + classes.extend(ClassAdapter(cls=group) for group in self.groups) + built_classes = [c.build() for c in classes] + + + sch = SchemaDefinition( + name = self.name, + id = self.name, + imports = [i.name for i in self.imports], + classes=built_classes + ) + return sch + + + @property + def created_classes(self) -> List[Group|Dataset]: + classes = [t for t in self.walk_types([self.groups, self.datasets], (Group, Dataset)) if t.neurodata_type_def is not None] + return classes + + @property + def needed_imports(self) -> List[str]: + """ + Classes that need to be imported from other namespaces + + TODO: + - Need to also check classes used in links/references + + """ + type_incs = self.walk_fields(self, 'neurodata_type_inc') + definitions = [c.neurodata_type_def for c in self.created_classes] + need = [inc for inc in type_incs if inc not in definitions] + return need + + + + diff --git a/nwb_linkml/io.py b/nwb_linkml/io.py index 5768551..65e2203 100644 --- a/nwb_linkml/io.py +++ b/nwb_linkml/io.py @@ -9,17 +9,13 @@ import warnings from linkml_runtime.loaders import yaml_loader import yaml -from nwb_schema_language import Namespaces, Namespace, Group, Dataset -from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO +from nwb_schema_language import Namespaces, Group, Dataset +from nwb_linkml.namespaces import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO from nwb_linkml.maps import preload from nwb_linkml.map import PHASES, Map +from nwb_linkml.adapters.namespaces import NamespacesAdapter +from nwb_linkml.adapters.schema import SchemaAdapter -class NamespaceBundle(TypedDict): - """ - A complete namespaces file and all indicated schema files - """ - namespace: Namespaces - schema: Dict[str, List[Dataset | Group]] def load_yaml(path:Path) -> dict: with open(path, 'r') as file: @@ -43,30 +39,34 @@ def load_namespaces(path:Path|NamespaceRepo) -> Namespaces: -def load_schema_file(path:Path) -> List[Dataset | Group]: +def load_schema_file(path:Path) -> SchemaAdapter: source = load_yaml(path) - schema = [] + datasets = [] for dataset in source.get('datasets', []): try: - schema.append(Dataset(**dataset)) + datasets.append(Dataset(**dataset)) except Exception as e: pprint(dataset) raise e + groups = [] for group in source.get('groups', []): try: - schema.append(Group(**group)) + groups.append(Group(**group)) except Exception as e: pprint(group) raise e - #schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])]) - #schema.extend([Group(**group) for group in source.get('groups', [])]) + schema = SchemaAdapter( + path=path, + datasets=datasets, + groups=groups + ) return schema -def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')) -> Dict[str, List[Dataset | Group]]: +def load_namespace_schema(namespace: Namespaces, path:Path=Path('.')) -> NamespacesAdapter: """ Load all schema referenced by a namespace file @@ -75,32 +75,31 @@ def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.') path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this Returns: - List[Union[Dataset|Group]] + :class:`.NamespacesAdapter` """ - if isinstance(namespace, Namespace): - ns_iter = [namespace] - elif isinstance(namespace, Namespaces): - ns_iter = namespace.namespaces - else: - raise TypeError("Need to pass a namespace or namespaces :)") path = Path(path).resolve() if path.is_file(): # given the namespace file itself, so find paths relative to its directory path = path.parent - sch = {} - for ns in ns_iter: + sch = [] + for ns in namespace.namespaces: for schema in ns.schema_: if schema.source is None: warnings.warn(f"No source specified for {schema}") continue yml_file = (path / schema.source).resolve() - sch[schema.source] = load_schema_file(yml_file) + sch.append(load_schema_file(yml_file)) - return sch + adapter = NamespacesAdapter( + namespaces=namespace, + schemas=sch + ) -def load_nwb_core() -> Dict[str, NamespaceBundle]: + return adapter + +def load_nwb_core() -> NamespacesAdapter: # First get hdmf-common: hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git() hdmf_ns = load_namespaces(hdmf_ns_file) @@ -110,16 +109,9 @@ def load_nwb_core() -> Dict[str, NamespaceBundle]: ns = load_namespaces(namespace_file) schema = load_namespace_schema(ns, namespace_file) - return { - 'hdmf-common': NamespaceBundle( - namespace=hdmf_ns, - schema=hdmf_schema - ), - 'nwb-core': NamespaceBundle( - namespace=ns, - schema=schema - ) - } + schema.imported.append(hdmf_schema) + + return schema diff --git a/nwb_linkml/lang_elements.py b/nwb_linkml/lang_elements.py new file mode 100644 index 0000000..e7c440b --- /dev/null +++ b/nwb_linkml/lang_elements.py @@ -0,0 +1,76 @@ +""" +Language elements in nwb schema language that have a fixed, alternative representation +in LinkML. These are exported as an nwb.language.yml file along with every generated namespace +""" + +from nwb_schema_language.datamodel.nwb_schema_pydantic import FlatDtype as FlatDtype_source +from linkml_runtime.linkml_model import \ + ClassDefinition, \ + EnumDefinition, \ + SchemaDefinition, \ + SlotDefinition, \ + TypeDefinition,\ + Prefix,\ + PermissibleValue +from nwb_linkml.maps.dtype import flat_to_linkml + + +FlatDType = EnumDefinition( + name="FlatDType", + permissible_values=[PermissibleValue(p) for p in FlatDtype_source.__members__.keys()], +) + +DimNameSlot = SlotDefinition( + name="dim_name", + range="string", + description="The name of a dimension" +) +DimShapeSlot = SlotDefinition( + name="dim_shape", + range="integer", + required=False +) +DimClass = ClassDefinition( + name="Dimension", + slots=[DimNameSlot.name, DimShapeSlot.name], + description="A single dimension within a shape" +) +DimSlot = SlotDefinition( + name="dim", + range=DimClass.name, + multivalued=True, + description="Slot representing the dimensions that a Shape can have" +) + +# ShapeClass = ClassDefinition( +# name="Shape", +# description="A possible shape for an array-like dataset", +# slots=[DimSlot.name] +# ) + +DTypeTypes = [] +for nwbtype, linkmltype in flat_to_linkml.items(): + amin = None + if nwbtype.startswith('uint'): + amin = 0 + + atype = TypeDefinition( + name=nwbtype, + minimum_value=amin, + typeof=linkmltype + ) + DTypeTypes.append(atype) + + +NwbLangSchema = SchemaDefinition( + name="nwb.language", + id='nwb.language', + description="Adapter objects to mimic the behavior of elements in the nwb-schema-language", + enums=[FlatDType], + slots=[DimNameSlot, DimShapeSlot, DimSlot], + classes=[DimClass], + types=DTypeTypes, + imports=['linkml:types'], + prefixes={'linkml': Prefix('linkml','https://w3id.org/linkml')} +) + diff --git a/nwb_linkml/maps/dtype.py b/nwb_linkml/maps/dtype.py new file mode 100644 index 0000000..85b8dd8 --- /dev/null +++ b/nwb_linkml/maps/dtype.py @@ -0,0 +1,31 @@ + + +flat_to_linkml = { + "float" : "float", + "float32" : "float", + "double" : "double", + "float64" : "double", + "long" : "integer", + "int64" : "integer", + "int" : "integer", + "int32" : "integer", + "int16" : "integer", + "short" : "integer", + "int8" : "integer", + "uint" : "integer", + "uint32" : "integer", + "uint16" : "integer", + "uint8" : "integer", + "uint64" : "integer", + "numeric" : "float", + "text" : "string", + "utf" : "string", + "utf8" : "string", + "utf_8" : "string", + "ascii" : "string", + "bool" : "boolean", + "isodatetime" : "date" +} +""" +Map between the flat data types and the simpler linkml base types +""" \ No newline at end of file diff --git a/tests/fixtures.py b/tests/fixtures.py index 872193a..363a71f 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -3,6 +3,7 @@ from typing import Dict from nwb_linkml import io +from nwb_linkml.adapters.namespaces import NamespacesAdapter import shutil from pathlib import Path @@ -17,6 +18,7 @@ def tmp_output_dir() -> Path: @pytest.fixture(scope="session") -def nwb_core_fixture() -> Dict[str, io.NamespaceBundle]: +def nwb_core_fixture() -> NamespacesAdapter: nwb_core = io.load_nwb_core() + nwb_core.populate_imports() return nwb_core \ No newline at end of file diff --git a/tests/test_adapter.py b/tests/test_adapter.py new file mode 100644 index 0000000..eab6fd5 --- /dev/null +++ b/tests/test_adapter.py @@ -0,0 +1,36 @@ +import pytest +from rich import print +import pdb + +from .fixtures import nwb_core_fixture +from nwb_schema_language import Attribute + +def test_walk_adapter(nwb_core_fixture): + base = nwb_core_fixture.schemas[0] + assert base.path.name == "nwb.base.yaml" + # type_incs = [inc for inc in base.walk(base)] + type_incs = [inc for inc in base.walk_fields(base, 'neurodata_type_inc')] + + attributes = [a for a in base.walk_types(base, Attribute)] + + # pdb.set_trace() + +@pytest.mark.parametrize( + ['class_name','schema_file','namespace_name'], + [ + ('DynamicTable', 'table.yaml', 'hdmf-common'), + ('Container', 'base.yaml', 'hdmf-common'), + ('TimeSeries', 'nwb.base.yaml', 'core'), + ('ImageSeries', 'nwb.image.yaml', 'core') + ] +) +def test_find_type_source(nwb_core_fixture, class_name, schema_file, namespace_name): + defining_sch = nwb_core_fixture.find_type_source(class_name) + assert defining_sch.path.name == schema_file + assert namespace_name == defining_sch.namespace + + +def test_populate_imports(nwb_core_fixture): + nwb_core_fixture.populate_imports() + + pdb.set_trace() \ No newline at end of file diff --git a/tests/test_generate.py b/tests/test_generate.py new file mode 100644 index 0000000..f4579f0 --- /dev/null +++ b/tests/test_generate.py @@ -0,0 +1,17 @@ +import pytest +import warnings + +from .fixtures import nwb_core_fixture, tmp_output_dir +from linkml_runtime.dumpers import yaml_dumper + +from nwb_linkml.lang_elements import NwbLangSchema + +def test_generate_nwblang(tmp_output_dir): + output_file = (tmp_output_dir / NwbLangSchema.name).with_suffix('.yml') + yaml_dumper.dump(NwbLangSchema, output_file) + +def test_generate_base(nwb_core_fixture, tmp_output_dir): + schema = nwb_core_fixture.schemas[0].build() + output_file = (tmp_output_dir / schema.name).with_suffix('.yml') + warnings.warn(output_file) + yaml_dumper.dump(schema, output_file)