Actually generating some translations at this point

This commit is contained in:
sneakers-the-rat 2023-08-21 21:43:02 -07:00
parent 3996f319e2
commit 170a424fb1
12 changed files with 476 additions and 38 deletions

1
.gitignore vendored
View file

@ -160,3 +160,4 @@ cython_debug/
#.idea/
nwb.schema.json
__tmp__

View file

View file

@ -0,0 +1,71 @@
"""
Base class for adapters
"""
from typing import List, Dict, Type, Generator, Any, Tuple
from pydantic import BaseModel
class Adapter(BaseModel):
pass
def walk(self, input: BaseModel | list | dict):
yield input
if isinstance(input, BaseModel):
for key in input.__fields__.keys():
val = getattr(input, key)
yield (key, val)
if isinstance(val, (BaseModel, dict, list)):
yield from self.walk(val)
elif isinstance(input, dict):
for key, val in input.items():
yield (key, val)
if isinstance(val, (BaseModel, dict, list)):
yield from self.walk(val)
elif isinstance(input, (list, tuple)):
yield input
for val in input:
yield from self.walk(val)
else:
# do nothing, is a string or whatever
pass
def walk_fields(self, input: BaseModel | list | dict, field: str):
for item in self.walk(input):
if isinstance(item, tuple) and item[0] == field and item[1] is not None:
yield item[1]
def walk_types(self, input: BaseModel | list | dict, get_type: Type | List[Type] | Tuple[Type]):
if not isinstance(get_type, (list, tuple)):
get_type = [get_type]
for item in self.walk(input):
if any([type(item) == atype for atype in get_type]):
yield item
#
#
# if isinstance(input, BaseModel):
# for key in input.__fields__.keys():
# val = getattr(input, key)
# if key == field:
# yield val
# if isinstance(val, (BaseModel, dict, list)):
# yield from self.walk(val, field)
#
# elif isinstance(input, dict):
# for key, val in input.items():
# if key == field:
# yield val
# if isinstance(val, (BaseModel, dict, list)):
# yield from self.walk(val, field)
#
# elif isinstance(input, (list, tuple)):
# for val in input:
# yield from self.walk(val, field)
#
# else:
# # do nothing, is a string or whatever
# pass

View file

@ -0,0 +1,36 @@
"""
Adapters to linkML classes
"""
from nwb_schema_language import Dataset, Group
from nwb_linkml.adapters.adapter import Adapter
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
class ClassAdapter(Adapter):
"""
Adapter to class-like things in linkml, including datasets and groups
"""
cls: Dataset | Group
def build(self) -> ClassDefinition:
if self.cls.neurodata_type_def:
name = self.cls.neurodata_type_def
else:
name = self.cls.name
attrs = [
SlotDefinition(
name=attr.name,
description=attr.doc,
) for attr in self.cls.attributes
]
cls = ClassDefinition(
name = name,
is_a = self.cls.neurodata_type_inc,
description=self.cls.doc,
attributes=attrs
)
return cls

View file

@ -0,0 +1,81 @@
"""
Namespaces adapter
Wraps the :class:`nwb_schema_language.Namespaces` and other objects with convenience methods
for extracting information and generating translated schema
"""
import pdb
from typing import List, Optional
from pydantic import BaseModel, Field, validator
from pprint import pformat
from nwb_schema_language import Namespaces
from nwb_linkml.adapters.adapter import Adapter
from nwb_linkml.adapters.schema import SchemaAdapter
class NamespacesAdapter(Adapter):
namespaces: Namespaces
schemas: List[SchemaAdapter]
imported: List['NamespacesAdapter'] = Field(default_factory=list)
def __init__(self, **kwargs):
super(NamespacesAdapter, self).__init__(**kwargs)
self._populate_schema_namespaces()
def _populate_schema_namespaces(self):
# annotate for each schema which namespace imports it
for sch in self.schemas:
# imports seem to always be from same folder, so we can just use name part
sch_name = sch.path.name
# find which namespace imports this schema file
for ns in self.namespaces.namespaces:
sources = [sch.source for sch in ns.schema_]
if sch_name in sources:
sch.namespace = ns.name
break
def find_type_source(self, name:str) -> SchemaAdapter:
"""
Given some neurodata_type_inc, find the schema that it's defined in.
"""
# First check within the main schema
internal_matches = []
for schema in self.schemas:
class_names = [cls.neurodata_type_def for cls in schema.created_classes]
if name in class_names:
internal_matches.append(schema)
import_matches = []
for imported_ns in self.imported:
for schema in imported_ns.schemas:
class_names = [cls.neurodata_type_def for cls in schema.created_classes]
if name in class_names:
import_matches.append(schema)
all_matches = [*internal_matches, *import_matches]
if len(all_matches)>1:
pdb.set_trace()
raise KeyError(f"Found multiple schemas in namespace that define {name}:\ninternal: {pformat(internal_matches)}\nimported:{pformat(import_matches)}")
elif len(all_matches) == 1:
return all_matches[0]
else:
raise KeyError(f"No schema found that define {name}")
def populate_imports(self):
"""
Populate the imports that are needed for each schema file
"""
for sch in self.schemas:
for needs in sch.needed_imports:
# shouldn't be recursive references, since imports should form a tree
depends_on = self.find_type_source(needs)
if depends_on not in sch.imports:
sch.imports.append(depends_on)

View file

@ -0,0 +1,95 @@
"""
Since NWB doesn't necessarily have a term for a single nwb schema file, we're going
to call them "schema" objects
"""
from typing import Optional, List, TYPE_CHECKING
from pathlib import Path
from pydantic import Field
from nwb_linkml.adapters.adapter import Adapter
from nwb_linkml.adapters.classes import ClassAdapter
if TYPE_CHECKING:
from nwb_linkml.adapters.namespaces import NamespacesAdapter
from nwb_schema_language import Group, Dataset
from linkml_runtime.linkml_model import SchemaDefinition
class SchemaAdapter(Adapter):
"""
An individual schema file in nwb_schema_language
"""
path: Path
groups: List[Group] = Field(default_factory=list)
datasets: List[Dataset] = Field(default_factory=list)
imports: List['SchemaAdapter'] = Field(default_factory=list)
namespace: Optional[str] = None
"""Populated by NamespacesAdapter"""
@property
def name(self) -> str:
return '.'.join([self.namespace, self.path.with_suffix('').name])
def __repr__(self):
out_str = '\n' + self.name + '\n'
out_str += '-'*len(self.name) + '\n'
if len(self.imports) > 0:
out_str += "Imports:\n"
out_str += " " + ', '.join([i.name for i in self.imports]) + '\n'
out_str += 'Groups:\n'
out_str += ' ' + ', '.join([g.neurodata_type_def for g in self.groups])
out_str += '\n'
out_str += 'Datasets:\n'
out_str += ' ' + ', '.join([d.neurodata_type_def for d in self.datasets])
out_str += "\n"
return out_str
def build(self) -> SchemaDefinition:
"""
Make the LinkML representation for this schema file
Things that will be populated later
- `id` (but need to have a placeholder to instantiate)
- `version`
"""
classes = [ClassAdapter(cls=dset) for dset in self.datasets]
classes.extend(ClassAdapter(cls=group) for group in self.groups)
built_classes = [c.build() for c in classes]
sch = SchemaDefinition(
name = self.name,
id = self.name,
imports = [i.name for i in self.imports],
classes=built_classes
)
return sch
@property
def created_classes(self) -> List[Group|Dataset]:
classes = [t for t in self.walk_types([self.groups, self.datasets], (Group, Dataset)) if t.neurodata_type_def is not None]
return classes
@property
def needed_imports(self) -> List[str]:
"""
Classes that need to be imported from other namespaces
TODO:
- Need to also check classes used in links/references
"""
type_incs = self.walk_fields(self, 'neurodata_type_inc')
definitions = [c.neurodata_type_def for c in self.created_classes]
need = [inc for inc in type_incs if inc not in definitions]
return need

View file

@ -9,17 +9,13 @@ import warnings
from linkml_runtime.loaders import yaml_loader
import yaml
from nwb_schema_language import Namespaces, Namespace, Group, Dataset
from nwb_linkml.namespaces import GitRepo, NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_schema_language import Namespaces, Group, Dataset
from nwb_linkml.namespaces import NamespaceRepo, NWB_CORE_REPO, HDMF_COMMON_REPO
from nwb_linkml.maps import preload
from nwb_linkml.map import PHASES, Map
from nwb_linkml.adapters.namespaces import NamespacesAdapter
from nwb_linkml.adapters.schema import SchemaAdapter
class NamespaceBundle(TypedDict):
"""
A complete namespaces file and all indicated schema files
"""
namespace: Namespaces
schema: Dict[str, List[Dataset | Group]]
def load_yaml(path:Path) -> dict:
with open(path, 'r') as file:
@ -43,30 +39,34 @@ def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
def load_schema_file(path:Path) -> List[Dataset | Group]:
def load_schema_file(path:Path) -> SchemaAdapter:
source = load_yaml(path)
schema = []
datasets = []
for dataset in source.get('datasets', []):
try:
schema.append(Dataset(**dataset))
datasets.append(Dataset(**dataset))
except Exception as e:
pprint(dataset)
raise e
groups = []
for group in source.get('groups', []):
try:
schema.append(Group(**group))
groups.append(Group(**group))
except Exception as e:
pprint(group)
raise e
#schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])])
#schema.extend([Group(**group) for group in source.get('groups', [])])
schema = SchemaAdapter(
path=path,
datasets=datasets,
groups=groups
)
return schema
def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')) -> Dict[str, List[Dataset | Group]]:
def load_namespace_schema(namespace: Namespaces, path:Path=Path('.')) -> NamespacesAdapter:
"""
Load all schema referenced by a namespace file
@ -75,32 +75,31 @@ def load_namespace_schema(namespace: Namespace | Namespaces, path:Path=Path('.')
path (:class:`pathlib.Path`): Location of the namespace file - all relative paths are interpreted relative to this
Returns:
List[Union[Dataset|Group]]
:class:`.NamespacesAdapter`
"""
if isinstance(namespace, Namespace):
ns_iter = [namespace]
elif isinstance(namespace, Namespaces):
ns_iter = namespace.namespaces
else:
raise TypeError("Need to pass a namespace or namespaces :)")
path = Path(path).resolve()
if path.is_file():
# given the namespace file itself, so find paths relative to its directory
path = path.parent
sch = {}
for ns in ns_iter:
sch = []
for ns in namespace.namespaces:
for schema in ns.schema_:
if schema.source is None:
warnings.warn(f"No source specified for {schema}")
continue
yml_file = (path / schema.source).resolve()
sch[schema.source] = load_schema_file(yml_file)
sch.append(load_schema_file(yml_file))
return sch
adapter = NamespacesAdapter(
namespaces=namespace,
schemas=sch
)
def load_nwb_core() -> Dict[str, NamespaceBundle]:
return adapter
def load_nwb_core() -> NamespacesAdapter:
# First get hdmf-common:
hdmf_ns_file = HDMF_COMMON_REPO.provide_from_git()
hdmf_ns = load_namespaces(hdmf_ns_file)
@ -110,16 +109,9 @@ def load_nwb_core() -> Dict[str, NamespaceBundle]:
ns = load_namespaces(namespace_file)
schema = load_namespace_schema(ns, namespace_file)
return {
'hdmf-common': NamespaceBundle(
namespace=hdmf_ns,
schema=hdmf_schema
),
'nwb-core': NamespaceBundle(
namespace=ns,
schema=schema
)
}
schema.imported.append(hdmf_schema)
return schema

View file

@ -0,0 +1,76 @@
"""
Language elements in nwb schema language that have a fixed, alternative representation
in LinkML. These are exported as an nwb.language.yml file along with every generated namespace
"""
from nwb_schema_language.datamodel.nwb_schema_pydantic import FlatDtype as FlatDtype_source
from linkml_runtime.linkml_model import \
ClassDefinition, \
EnumDefinition, \
SchemaDefinition, \
SlotDefinition, \
TypeDefinition,\
Prefix,\
PermissibleValue
from nwb_linkml.maps.dtype import flat_to_linkml
FlatDType = EnumDefinition(
name="FlatDType",
permissible_values=[PermissibleValue(p) for p in FlatDtype_source.__members__.keys()],
)
DimNameSlot = SlotDefinition(
name="dim_name",
range="string",
description="The name of a dimension"
)
DimShapeSlot = SlotDefinition(
name="dim_shape",
range="integer",
required=False
)
DimClass = ClassDefinition(
name="Dimension",
slots=[DimNameSlot.name, DimShapeSlot.name],
description="A single dimension within a shape"
)
DimSlot = SlotDefinition(
name="dim",
range=DimClass.name,
multivalued=True,
description="Slot representing the dimensions that a Shape can have"
)
# ShapeClass = ClassDefinition(
# name="Shape",
# description="A possible shape for an array-like dataset",
# slots=[DimSlot.name]
# )
DTypeTypes = []
for nwbtype, linkmltype in flat_to_linkml.items():
amin = None
if nwbtype.startswith('uint'):
amin = 0
atype = TypeDefinition(
name=nwbtype,
minimum_value=amin,
typeof=linkmltype
)
DTypeTypes.append(atype)
NwbLangSchema = SchemaDefinition(
name="nwb.language",
id='nwb.language',
description="Adapter objects to mimic the behavior of elements in the nwb-schema-language",
enums=[FlatDType],
slots=[DimNameSlot, DimShapeSlot, DimSlot],
classes=[DimClass],
types=DTypeTypes,
imports=['linkml:types'],
prefixes={'linkml': Prefix('linkml','https://w3id.org/linkml')}
)

31
nwb_linkml/maps/dtype.py Normal file
View file

@ -0,0 +1,31 @@
flat_to_linkml = {
"float" : "float",
"float32" : "float",
"double" : "double",
"float64" : "double",
"long" : "integer",
"int64" : "integer",
"int" : "integer",
"int32" : "integer",
"int16" : "integer",
"short" : "integer",
"int8" : "integer",
"uint" : "integer",
"uint32" : "integer",
"uint16" : "integer",
"uint8" : "integer",
"uint64" : "integer",
"numeric" : "float",
"text" : "string",
"utf" : "string",
"utf8" : "string",
"utf_8" : "string",
"ascii" : "string",
"bool" : "boolean",
"isodatetime" : "date"
}
"""
Map between the flat data types and the simpler linkml base types
"""

View file

@ -3,6 +3,7 @@ from typing import Dict
from nwb_linkml import io
from nwb_linkml.adapters.namespaces import NamespacesAdapter
import shutil
from pathlib import Path
@ -17,6 +18,7 @@ def tmp_output_dir() -> Path:
@pytest.fixture(scope="session")
def nwb_core_fixture() -> Dict[str, io.NamespaceBundle]:
def nwb_core_fixture() -> NamespacesAdapter:
nwb_core = io.load_nwb_core()
nwb_core.populate_imports()
return nwb_core

36
tests/test_adapter.py Normal file
View file

@ -0,0 +1,36 @@
import pytest
from rich import print
import pdb
from .fixtures import nwb_core_fixture
from nwb_schema_language import Attribute
def test_walk_adapter(nwb_core_fixture):
base = nwb_core_fixture.schemas[0]
assert base.path.name == "nwb.base.yaml"
# type_incs = [inc for inc in base.walk(base)]
type_incs = [inc for inc in base.walk_fields(base, 'neurodata_type_inc')]
attributes = [a for a in base.walk_types(base, Attribute)]
# pdb.set_trace()
@pytest.mark.parametrize(
['class_name','schema_file','namespace_name'],
[
('DynamicTable', 'table.yaml', 'hdmf-common'),
('Container', 'base.yaml', 'hdmf-common'),
('TimeSeries', 'nwb.base.yaml', 'core'),
('ImageSeries', 'nwb.image.yaml', 'core')
]
)
def test_find_type_source(nwb_core_fixture, class_name, schema_file, namespace_name):
defining_sch = nwb_core_fixture.find_type_source(class_name)
assert defining_sch.path.name == schema_file
assert namespace_name == defining_sch.namespace
def test_populate_imports(nwb_core_fixture):
nwb_core_fixture.populate_imports()
pdb.set_trace()

17
tests/test_generate.py Normal file
View file

@ -0,0 +1,17 @@
import pytest
import warnings
from .fixtures import nwb_core_fixture, tmp_output_dir
from linkml_runtime.dumpers import yaml_dumper
from nwb_linkml.lang_elements import NwbLangSchema
def test_generate_nwblang(tmp_output_dir):
output_file = (tmp_output_dir / NwbLangSchema.name).with_suffix('.yml')
yaml_dumper.dump(NwbLangSchema, output_file)
def test_generate_base(nwb_core_fixture, tmp_output_dir):
schema = nwb_core_fixture.schemas[0].build()
output_file = (tmp_output_dir / schema.name).with_suffix('.yml')
warnings.warn(output_file)
yaml_dumper.dump(schema, output_file)