progress with using pydantic models

This commit is contained in:
sneakers-the-rat 2023-08-18 01:11:14 -07:00
parent e8adb4f88c
commit 63ccd800b3
59 changed files with 1041 additions and 3575 deletions

View file

@ -106,6 +106,8 @@ gen-examples:
gen-project: $(PYMODEL)
$(RUN) gen-project ${GEN_PARGS} -d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL)
gen-pydantic: $(PYMODEL)
$(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py
test: test-schema test-python test-examples

View file

@ -3,5 +3,5 @@ name: nwb_schema_language
author: Jonny Saunders <j@nny.fyi>
description: Translation of the nwb-schema-language to LinkML
source_schema_path: src/nwb_schema_language/schema/nwb_schema_language.yaml
google_sheet_id: 1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ
google_sheet_tabs: personinfo enums
#google_sheet_id: 1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ
#google_sheet_tabs: personinfo enums

View file

@ -24,6 +24,8 @@ generator_args:
mergeimports: true
python:
mergeimports: true
pydantic:
mergeimports: true
prefixmap:
mergeimports: true
proto:

View file

@ -38,6 +38,11 @@ type Dataset implements DtypeMixin, NamingMixin
dtype: String
}
type Datasets
{
datasets: [Dataset]
}
interface DtypeMixin
{
dtype: String
@ -58,6 +63,11 @@ type Group implements NamingMixin
links: [Link]
}
type Groups
{
groups: [Group]
}
type Link
{
name: String
@ -97,8 +107,8 @@ type Schema
{
source: String
namespace: String
doc: String!
title: String
neurodataTypes: [String]
doc: String
}

View file

@ -1,7 +1,7 @@
{
"comments": {
"description": "Auto generated by LinkML jsonld context generator",
"generation_date": "2023-08-16T23:21:36",
"generation_date": "2023-08-18T00:36:53",
"source": "nwb_schema_language.yaml"
},
"@context": {

View file

@ -457,7 +457,6 @@
"owner": "CompoundDtype",
"domain_of": [
"Namespace",
"Schema",
"Group",
"Attribute",
"Link",
@ -635,6 +634,7 @@
],
"range": "Namespace",
"inlined": true,
"inlined_as_list": true,
"@type": "SlotDefinition"
},
{
@ -791,12 +791,14 @@
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/datasets",
"multivalued": true,
"owner": "Group",
"owner": "Datasets",
"domain_of": [
"Group"
"Group",
"Datasets"
],
"range": "Dataset",
"inlined": true,
"inlined_as_list": true,
"@type": "SlotDefinition"
},
{
@ -805,12 +807,14 @@
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/groups",
"multivalued": true,
"owner": "Group",
"owner": "Groups",
"domain_of": [
"Group"
"Group",
"Groups"
],
"range": "Group",
"inlined": true,
"inlined_as_list": true,
"@type": "SlotDefinition"
},
{
@ -838,7 +842,7 @@
"DtypeMixin"
],
"range": "string",
"exactly_one_of": [
"any_of": [
{
"range": "FlatDtype",
"@type": "AnonymousSlotExpression"
@ -981,6 +985,18 @@
"range": "reftype_options",
"@type": "SlotDefinition"
},
{
"name": "schema__doc",
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/doc",
"alias": "doc",
"owner": "Schema",
"domain_of": [
"Schema"
],
"range": "string",
"@type": "SlotDefinition"
},
{
"name": "Namespace_name",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/name",
@ -1004,25 +1020,6 @@
},
"@type": "SlotDefinition"
},
{
"name": "Schema_doc",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/doc",
"description": "Description of corresponding object.",
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"is_a": "doc",
"domain": "Schema",
"slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/doc",
"alias": "doc",
"owner": "Schema",
"domain_of": [
"Schema"
],
"is_usage_slot": true,
"usage_slot_name": "doc",
"range": "string",
"required": true,
"@type": "SlotDefinition"
},
{
"name": "Attribute_name",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/name",
@ -1085,7 +1082,7 @@
"usage_slot_name": "dtype",
"range": "FlatDtype",
"required": true,
"exactly_one_of": [
"any_of": [
{
"range": "FlatDtype",
"@type": "AnonymousSlotExpression"
@ -1139,11 +1136,17 @@
"slots": [
"source",
"namespace",
"Schema_doc",
"title",
"neurodata_types"
"neurodata_types",
"schema__doc"
],
"slot_usage": {},
"attributes": [
{
"name": "doc",
"@type": "SlotDefinition"
}
],
"class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Schema",
"rules": [
{
@ -1287,6 +1290,17 @@
"class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Group",
"@type": "ClassDefinition"
},
{
"name": "Groups",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Groups",
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slots": [
"groups"
],
"slot_usage": {},
"class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Groups",
"@type": "ClassDefinition"
},
{
"name": "Attribute",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Attribute",
@ -1349,6 +1363,17 @@
"class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Dataset",
"@type": "ClassDefinition"
},
{
"name": "Datasets",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Datasets",
"from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
"slots": [
"datasets"
],
"slot_usage": {},
"class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Datasets",
"@type": "ClassDefinition"
},
{
"name": "ReferenceDtype",
"definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/ReferenceDtype",
@ -1491,9 +1516,9 @@
],
"metamodel_version": "1.7.0",
"source_file": "nwb_schema_language.yaml",
"source_file_date": "2023-08-16T23:21:07",
"source_file_size": 10816,
"generation_date": "2023-08-16T23:21:36",
"source_file_date": "2023-08-18T00:28:45",
"source_file_size": 10952,
"generation_date": "2023-08-18T00:36:53",
"settings": [
{
"setting_key": "email",

View file

@ -25,7 +25,7 @@
"type": "string"
},
"dtype": {
"oneOf": [
"anyOf": [
{
"$ref": "#/$defs/FlatDtype"
},
@ -123,7 +123,7 @@
"type": "string"
},
"dtype": {
"oneOf": [
"anyOf": [
{
"$ref": "#/$defs/FlatDtype"
},
@ -186,6 +186,20 @@
"title": "Dataset",
"type": "object"
},
"Datasets": {
"additionalProperties": false,
"description": "",
"properties": {
"datasets": {
"items": {
"$ref": "#/$defs/Dataset"
},
"type": "array"
}
},
"title": "Datasets",
"type": "object"
},
"FlatDtype": {
"description": "",
"enum": [
@ -283,6 +297,20 @@
"title": "Group",
"type": "object"
},
"Groups": {
"additionalProperties": false,
"description": "",
"properties": {
"groups": {
"items": {
"$ref": "#/$defs/Group"
},
"type": "array"
}
},
"title": "Groups",
"type": "object"
},
"Link": {
"additionalProperties": false,
"description": "",
@ -526,7 +554,6 @@
"description": "",
"properties": {
"doc": {
"description": "Description of corresponding object.",
"type": "string"
},
"namespace": {

View file

@ -31,6 +31,10 @@ message Dataset
repeated attribute attributes = 0
string dtype = 0
}
message Datasets
{
repeated dataset datasets = 0
}
message Group
{
string neurodataTypeDef = 0
@ -45,6 +49,10 @@ message Group
repeated group groups = 0
repeated link links = 0
}
message Groups
{
repeated group groups = 0
}
message Link
{
string name = 0
@ -76,7 +84,7 @@ message Schema
{
string source = 0
string namespace = 0
string doc = 0
string title = 0
repeated string neurodataTypes = 0
string doc = 0
}

View file

@ -97,6 +97,12 @@ linkml:Sparqlpath xsd:string
)
}
<Datasets> CLOSED {
( $<Datasets_tes> <datasets> @<Dataset> * ;
rdf:type [ <Datasets> ] ?
)
}
<DtypeMixin> {
( $<DtypeMixin_tes> <dtype> @linkml:String ? ;
rdf:type [ <DtypeMixin> ] ?
@ -122,6 +128,12 @@ linkml:Sparqlpath xsd:string
)
}
<Groups> CLOSED {
( $<Groups_tes> <groups> @<Group> * ;
rdf:type [ <Groups> ] ?
)
}
<Link> CLOSED {
( $<Link_tes> ( <name> @linkml:String ? ;
<doc> @linkml:String ;
@ -169,9 +181,9 @@ linkml:Sparqlpath xsd:string
<Schema> CLOSED {
( $<Schema_tes> ( <source> @linkml:String ? ;
<namespace> @linkml:String ? ;
<doc> @linkml:String ;
<title> @linkml:String ? ;
<neurodata_types> @linkml:String *
<neurodata_types> @linkml:String * ;
<doc> @linkml:String ?
) ;
rdf:type [ <Schema> ] ?
)

View file

@ -36,6 +36,11 @@ CREATE TABLE "Dataset" (
PRIMARY KEY (neurodata_type_def, neurodata_type_inc, name, default_name, dims, shape, value, default_value, doc, quantity, linkable, attributes, dtype)
);
CREATE TABLE "Datasets" (
datasets TEXT,
PRIMARY KEY (datasets)
);
CREATE TABLE "Group" (
neurodata_type_def TEXT,
neurodata_type_inc TEXT,
@ -51,6 +56,11 @@ CREATE TABLE "Group" (
PRIMARY KEY (neurodata_type_def, neurodata_type_inc, name, default_name, doc, quantity, linkable, attributes, datasets, groups, links)
);
CREATE TABLE "Groups" (
groups TEXT,
PRIMARY KEY (groups)
);
CREATE TABLE "Link" (
name TEXT,
doc TEXT NOT NULL,
@ -85,8 +95,8 @@ CREATE TABLE "ReferenceDtype" (
CREATE TABLE "Schema" (
source TEXT,
namespace TEXT,
doc TEXT NOT NULL,
title TEXT,
neurodata_types TEXT,
PRIMARY KEY (source, namespace, doc, title, neurodata_types)
doc TEXT,
PRIMARY KEY (source, namespace, title, neurodata_types, doc)
);

View file

@ -1,5 +1,5 @@
[tool.poetry]
name = "nwb_schema_linkml"
name = "nwb_schema_language"
version = "0.1.0"
description = "Translation of the nwb-schema-language to LinkML"
authors = ["Jonny Saunders <j@nny.fyi>"]

View file

@ -0,0 +1,9 @@
from .datamodel.nwb_schema_pydantic import Namespace, \
Namespaces, \
Schema, \
Group, \
Attribute, \
Link, \
Dataset, \
ReferenceDtype, \
CompoundDtype

View file

@ -1,5 +1,5 @@
# Auto generated from nwb_schema_language.yaml by pythongen.py version: 0.0.1
# Generation date: 2023-08-16T23:21:37
# Generation date: 2023-08-18T00:36:55
# Schema: nwb-schema-language
#
# id: https://w3id.org/p2p_ld/nwb-schema-language
@ -95,7 +95,6 @@ class Namespace(YAMLRoot):
if self.date is not None and not isinstance(self.date, XSDDate):
self.date = XSDDate(self.date)
print(self.schema)
if not isinstance(self.schema, list):
self.schema = [self.schema] if self.schema is not None else []
self.schema = [v if isinstance(v, Schema) else Schema(**as_dict(v)) for v in self.schema]
@ -115,7 +114,9 @@ class Namespaces(YAMLRoot):
namespaces: Optional[Union[Union[dict, Namespace], List[Union[dict, Namespace]]]] = empty_list()
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self._normalize_inlined_as_dict(slot_name="namespaces", slot_type=Namespace, key_name="doc", keyed=False)
if not isinstance(self.namespaces, list):
self.namespaces = [self.namespaces] if self.namespaces is not None else []
self.namespaces = [v if isinstance(v, Namespace) else Namespace(**as_dict(v)) for v in self.namespaces]
super().__post_init__(**kwargs)
@ -129,18 +130,13 @@ class Schema(YAMLRoot):
class_name: ClassVar[str] = "Schema"
class_model_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Schema
doc: str = None
source: Optional[str] = None
namespace: Optional[str] = None
title: Optional[str] = None
neurodata_types: Optional[Union[str, List[str]]] = empty_list()
doc: Optional[str] = None
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self._is_empty(self.doc):
self.MissingRequiredField("doc")
if not isinstance(self.doc, str):
self.doc = str(self.doc)
if self.source is not None and not isinstance(self.source, str):
self.source = str(self.source)
@ -154,6 +150,9 @@ class Schema(YAMLRoot):
self.neurodata_types = [self.neurodata_types] if self.neurodata_types is not None else []
self.neurodata_types = [v if isinstance(v, str) else str(v) for v in self.neurodata_types]
if self.doc is not None and not isinstance(self.doc, str):
self.doc = str(self.doc)
super().__post_init__(**kwargs)
@ -204,15 +203,38 @@ class Group(YAMLRoot):
self._normalize_inlined_as_dict(slot_name="attributes", slot_type=Attribute, key_name="name", keyed=False)
self._normalize_inlined_as_dict(slot_name="datasets", slot_type=Dataset, key_name="doc", keyed=False)
if not isinstance(self.datasets, list):
self.datasets = [self.datasets] if self.datasets is not None else []
self.datasets = [v if isinstance(v, Dataset) else Dataset(**as_dict(v)) for v in self.datasets]
self._normalize_inlined_as_dict(slot_name="groups", slot_type=Group, key_name="doc", keyed=False)
if not isinstance(self.groups, list):
self.groups = [self.groups] if self.groups is not None else []
self.groups = [v if isinstance(v, Group) else Group(**as_dict(v)) for v in self.groups]
self._normalize_inlined_as_dict(slot_name="links", slot_type=Link, key_name="doc", keyed=False)
super().__post_init__(**kwargs)
@dataclass
class Groups(YAMLRoot):
_inherited_slots: ClassVar[List[str]] = []
class_class_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Groups
class_class_curie: ClassVar[str] = "nwb_schema_language:Groups"
class_name: ClassVar[str] = "Groups"
class_model_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Groups
groups: Optional[Union[Union[dict, Group], List[Union[dict, Group]]]] = empty_list()
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if not isinstance(self.groups, list):
self.groups = [self.groups] if self.groups is not None else []
self.groups = [v if isinstance(v, Group) else Group(**as_dict(v)) for v in self.groups]
super().__post_init__(**kwargs)
@dataclass
class Attribute(YAMLRoot):
_inherited_slots: ClassVar[List[str]] = []
@ -356,6 +378,25 @@ class Dataset(YAMLRoot):
super().__post_init__(**kwargs)
@dataclass
class Datasets(YAMLRoot):
_inherited_slots: ClassVar[List[str]] = []
class_class_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Datasets
class_class_curie: ClassVar[str] = "nwb_schema_language:Datasets"
class_name: ClassVar[str] = "Datasets"
class_model_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Datasets
datasets: Optional[Union[Union[dict, Dataset], List[Union[dict, Dataset]]]] = empty_list()
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if not isinstance(self.datasets, list):
self.datasets = [self.datasets] if self.datasets is not None else []
self.datasets = [v if isinstance(v, Dataset) else Dataset(**as_dict(v)) for v in self.datasets]
super().__post_init__(**kwargs)
@dataclass
class ReferenceDtype(YAMLRoot):
_inherited_slots: ClassVar[List[str]] = []
@ -674,12 +715,12 @@ slots.target_type = Slot(uri=NWB_SCHEMA_LANGUAGE.target_type, name="target_type"
slots.reftype = Slot(uri=NWB_SCHEMA_LANGUAGE.reftype, name="reftype", curie=NWB_SCHEMA_LANGUAGE.curie('reftype'),
model_uri=NWB_SCHEMA_LANGUAGE.reftype, domain=None, range=Optional[Union[str, "ReftypeOptions"]])
slots.schema__doc = Slot(uri=NWB_SCHEMA_LANGUAGE.doc, name="schema__doc", curie=NWB_SCHEMA_LANGUAGE.curie('doc'),
model_uri=NWB_SCHEMA_LANGUAGE.schema__doc, domain=None, range=Optional[str])
slots.Namespace_name = Slot(uri=NWB_SCHEMA_LANGUAGE.name, name="Namespace_name", curie=NWB_SCHEMA_LANGUAGE.curie('name'),
model_uri=NWB_SCHEMA_LANGUAGE.Namespace_name, domain=Namespace, range=str)
slots.Schema_doc = Slot(uri=NWB_SCHEMA_LANGUAGE.doc, name="Schema_doc", curie=NWB_SCHEMA_LANGUAGE.curie('doc'),
model_uri=NWB_SCHEMA_LANGUAGE.Schema_doc, domain=Schema, range=str)
slots.Attribute_name = Slot(uri=NWB_SCHEMA_LANGUAGE.name, name="Attribute_name", curie=NWB_SCHEMA_LANGUAGE.curie('name'),
model_uri=NWB_SCHEMA_LANGUAGE.Attribute_name, domain=Attribute, range=str)

View file

@ -0,0 +1,241 @@
from __future__ import annotations
from datetime import datetime, date
from enum import Enum
from typing import List, Dict, Optional, Any, Union
from pydantic import BaseModel as BaseModel, Field
import sys
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
metamodel_version = "None"
version = "None"
class WeakRefShimBaseModel(BaseModel):
__slots__ = '__weakref__'
class ConfiguredBaseModel(WeakRefShimBaseModel,
validate_assignment = True,
validate_all = True,
underscore_attrs_are_private = True,
extra = 'forbid',
arbitrary_types_allowed = True,
use_enum_values = True):
pass
class ReftypeOptions(str, Enum):
# Reference to another group or dataset of the given target_type
ref = "ref"
# Reference to another group or dataset of the given target_type
reference = "reference"
# Reference to another group or dataset of the given target_type
object = "object"
# Reference to a region (i.e. subset) of another dataset of the given target_type
region = "region"
class QuantityEnum(str, Enum):
# Zero or more instances, equivalent to zero_or_many
ASTERISK = "*"
# Zero or one instances, equivalent to zero_or_one
QUESTION_MARK = "?"
# One or more instances, equivalent to one_or_many
PLUS_SIGN = "+"
# Zero or more instances, equivalent to *
zero_or_many = "zero_or_many"
# One or more instances, equivalent to +
one_or_many = "one_or_many"
# Zero or one instances, equivalent to ?
zero_or_one = "zero_or_one"
class FlatDtype(str, Enum):
# single precision floating point (32 bit)
float = "float"
# single precision floating point (32 bit)
float32 = "float32"
# double precision floating point (64 bit)
double = "double"
# double precision floating point (64 bit)
float64 = "float64"
# signed 64 bit integer
long = "long"
# signed 64 bit integer
int64 = "int64"
# signed 32 bit integer
int = "int"
# signed 32 bit integer
int32 = "int32"
# signed 16 bit integer
int16 = "int16"
# signed 16 bit integer
short = "short"
# signed 8 bit integer
int8 = "int8"
# unsigned 32 bit integer
uint = "uint"
# unsigned 32 bit integer
uint32 = "uint32"
# unsigned 16 bit integer
uint16 = "uint16"
# unsigned 8 bit integer
uint8 = "uint8"
# unsigned 64 bit integer
uint64 = "uint64"
# any numeric type (i.e., any int, uint, float)
numeric = "numeric"
# 8-bit Unicode
text = "text"
# 8-bit Unicode
utf = "utf"
# 8-bit Unicode
utf8 = "utf8"
# 8-bit Unicode
utf_8 = "utf-8"
# ASCII text
ascii = "ascii"
# 8 bit integer with valid values 0 or 1
bool = "bool"
# ISO 8601 datetime string
isodatetime = "isodatetime"
class Namespace(ConfiguredBaseModel):
doc: str = Field(..., description="""Description of corresponding object.""")
name: str = Field(...)
full_name: Optional[str] = Field(None, description="""Optional string with extended full name for the namespace.""")
version: str = Field(...)
date: Optional[date] = Field(None, description="""Date that a namespace was last modified or released""")
author: List[str] = Field(default_factory=list, description="""List of strings with the names of the authors of the namespace.""")
contact: List[str] = Field(default_factory=list, description="""List of strings with the contact information for the authors. Ordering of the contacts should match the ordering of the authors.""")
schema_: Optional[List[Schema]] = Field(alias="schema", default_factory=list, description="""List of the schema to be included in this namespace.""")
class Namespaces(ConfiguredBaseModel):
namespaces: Optional[List[Namespace]] = Field(default_factory=list)
class Schema(ConfiguredBaseModel):
source: Optional[str] = Field(None, description="""describes the name of the YAML (or JSON) file with the schema specification. The schema files should be located in the same folder as the namespace file.""")
namespace: Optional[str] = Field(None, description="""describes a named reference to another namespace. In contrast to source, this is a reference by name to a known namespace (i.e., the namespace is resolved during the build and must point to an already existing namespace). This mechanism is used to allow, e.g., extension of a core namespace (here the NWB core namespace) without requiring hard paths to the files describing the core namespace. Either source or namespace must be specified, but not both.""")
title: Optional[str] = Field(None, description="""a descriptive title for a file for documentation purposes.""")
neurodata_types: Optional[List[Union[Dataset, Group]]] = Field(default_factory=list, description="""an optional list of strings indicating which data types should be included from the given specification source or namespace. The default is null indicating that all data types should be included.""")
doc: Optional[str] = Field(None)
class Groups(ConfiguredBaseModel):
groups: Optional[List[Group]] = Field(default_factory=list)
class Link(ConfiguredBaseModel):
name: Optional[str] = Field(None)
doc: str = Field(..., description="""Description of corresponding object.""")
target_type: str = Field(..., description="""Describes the neurodata_type of the target that the reference points to""")
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
class Datasets(ConfiguredBaseModel):
datasets: Optional[List[Dataset]] = Field(default_factory=list)
class ReferenceDtype(ConfiguredBaseModel):
target_type: str = Field(..., description="""Describes the neurodata_type of the target that the reference points to""")
reftype: Optional[ReftypeOptions] = Field(None, description="""describes the kind of reference""")
class CompoundDtype(ConfiguredBaseModel):
name: str = Field(...)
doc: str = Field(..., description="""Description of corresponding object.""")
dtype: Union[FlatDtype, ReferenceDtype] = Field(...)
class DtypeMixin(ConfiguredBaseModel):
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
class Attribute(DtypeMixin):
name: str = Field(...)
dims: Optional[List[str]] = Field(default_factory=list)
shape: Optional[List[str]] = Field(default_factory=list)
value: Optional[Any] = Field(None, description="""Optional constant, fixed value for the attribute.""")
default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
doc: str = Field(..., description="""Description of corresponding object.""")
required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""")
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
class NamingMixin(ConfiguredBaseModel):
"""
require either neurodata_type_def or name to be present
"""
None
class Group(NamingMixin):
neurodata_type_def: Optional[str] = Field(None)
neurodata_type_inc: Optional[str] = Field(None)
name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None)
doc: str = Field(..., description="""Description of corresponding object.""")
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(default_factory=list)
datasets: Optional[List[Dataset]] = Field(default_factory=list)
groups: Optional[List[Group]] = Field(default_factory=list)
links: Optional[List[Link]] = Field(default_factory=list)
class Dataset(NamingMixin, DtypeMixin):
neurodata_type_def: Optional[str] = Field(None)
neurodata_type_inc: Optional[str] = Field(None)
name: Optional[str] = Field(None)
default_name: Optional[str] = Field(None)
dims: Optional[List[str]] = Field(default_factory=list)
shape: Optional[List[str]] = Field(default_factory=list)
value: Optional[Any] = Field(None, description="""Optional constant, fixed value for the attribute.""")
default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
doc: str = Field(..., description="""Description of corresponding object.""")
quantity: Optional[Union[QuantityEnum, int]] = Field(1)
linkable: Optional[bool] = Field(None)
attributes: Optional[List[Attribute]] = Field(default_factory=list)
dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
# Update forward refs
# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/
Namespace.update_forward_refs()
Namespaces.update_forward_refs()
Schema.update_forward_refs()
Groups.update_forward_refs()
Link.update_forward_refs()
Datasets.update_forward_refs()
ReferenceDtype.update_forward_refs()
CompoundDtype.update_forward_refs()
DtypeMixin.update_forward_refs()
Attribute.update_forward_refs()
NamingMixin.update_forward_refs()
Group.update_forward_refs()
Dataset.update_forward_refs()

View file

@ -46,10 +46,9 @@ classes:
slots:
- source
- namespace
- doc
- title
- neurodata_types
slot_usage:
attributes:
doc:
required: false
rules:
@ -82,6 +81,10 @@ classes:
- groups
- links
Groups:
slots:
- groups
Attribute:
mixins:
- DtypeMixin
@ -122,6 +125,10 @@ classes:
- linkable
- attributes
Datasets:
slots:
- datasets
ReferenceDtype:
slots:
- target_type
@ -137,7 +144,10 @@ classes:
required: true
dtype:
required: true
range: FlatDtype
any_of:
- range: ReferenceDtype
- range: FlatDtype
multivalued: false
DtypeMixin:
mixin: true
@ -216,6 +226,7 @@ slots:
namespaces:
multivalued: true
range: Namespace
inlined_as_list: true
neurodata_types:
multivalued: true
any_of:
@ -256,19 +267,22 @@ slots:
datasets:
range: Dataset
multivalued: true
inlined_as_list: true
groups:
range: Group
multivalued: true
inlined_as_list: true
links:
range: Link
multivalued: true
# attributes
dtype:
exactly_one_of:
any_of:
- range: FlatDtype
- range: CompoundDtype
- range: ReferenceDtype
multivalued: true
dims:
multivalued: true
range: string
@ -298,9 +312,9 @@ slots:
description: Describes the neurodata_type of the target that the reference points
to
required: true
any_of:
- range: Dataset
- range: Group
# any_of:
# - range: Dataset
# - range: Group
reftype:
description: describes the kind of reference
range: reftype_options

49
nwb_linkml/io.py Normal file
View file

@ -0,0 +1,49 @@
"""
Loading/saving NWB Schema yaml files
"""
from pathlib import Path
from typing import TypedDict, List
from pprint import pprint
from linkml_runtime.loaders import yaml_loader
import yaml
from nwb_schema_language import Namespaces, Group, Dataset
from nwb_linkml.namespaces import GitRepo, NamespaceRepo
def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
if isinstance(path, NamespaceRepo):
path = path.provide_from_git()
namespaces = yaml_loader.load(str(path), target_class=Namespaces)
return namespaces
class SchemaFile(TypedDict):
datasets: List[Dataset]
groups: List[Group]
def load_schema_file(path:Path) -> List[Dataset | Group]:
with open(path, 'r') as yfile:
source = yaml.safe_load(yfile)
schema = []
for dataset in source.get('datasets', []):
try:
schema.append(Dataset(**dataset))
except Exception as e:
pprint(dataset)
raise e
#schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])])
#schema.extend([Group(**group) for group in source.get('groups', [])])
return schema

172
nwb_linkml/namespaces.py Normal file
View file

@ -0,0 +1,172 @@
"""
Define and manage NWB namespaces in external repositories
"""
import warnings
from pathlib import Path
import tempfile
import subprocess
import shutil
from pydantic import BaseModel, HttpUrl, FilePath, DirectoryPath, Field
class NamespaceRepo(BaseModel):
"""
Definition of one NWB namespaces file to import from a git repository
"""
name: str = Field(description="Short name used to refer to this namespace (usually equivalent to the name field within a namespaces NWB list)")
repository: HttpUrl | DirectoryPath = Field(description="URL or local absolute path to the root repository")
path: Path = Field(description="Relative path from the repository root to the namespace file")
def provide_from_git(self, commit:str|None=None) -> Path:
git = GitRepo(self, commit)
git.clone()
return git.namespace_file
# Constant namespaces
NWB_CORE_REPO = NamespaceRepo(
name="core",
repository="https://github.com/NeurodataWithoutBorders/nwb-schema",
path=Path("core/nwb.namespace.yaml")
)
HDMF_COMMON_REPO = NamespaceRepo(
name="hdmf-common",
repository="https://github.com/hdmf-dev/hdmf-common-schema",
path=Path("common/namespace.yaml")
)
DEFAULT_REPOS = {
repo.name: repo for repo in [NWB_CORE_REPO, HDMF_COMMON_REPO]
}
class GitError(OSError):
pass
class GitRepo:
"""
Manage a temporary git repository that provides the NWB yaml files
"""
def __init__(self, namespace:NamespaceRepo, commit:str|None=None):
self._temp_directory = None
self.namespace = namespace
self.commit = commit
def _git_call(self, *args) -> subprocess.CompletedProcess:
res = subprocess.run(
['git', '-C', self.temp_directory, *args],
capture_output=True
)
if res.returncode != 0:
raise GitError(f'Git call did not complete successfully.\n---\nCall: {args}\nResult: {res.stderr}')
return res
@property
def temp_directory(self) -> Path:
"""
Temporary directory where this repository will be cloned to
"""
if self._temp_directory is None:
self._temp_directory = Path(tempfile.gettempdir()) / f'nwb_linkml__{self.namespace.name}'
if self._temp_directory.exists():
warnings.warn(f'Temporary directory already exists! {self._temp_directory}')
else:
self._temp_directory.mkdir(parents=True)
return self._temp_directory
@property
def remote(self) -> str:
"""
URL for "origin" remote
"""
res = self._git_call('remote', 'get-url', 'origin')
return res.stdout.decode('utf-8').strip()
@property
def active_commit(self) -> str:
"""
Currently checked out commit
"""
res = self._git_call('rev-parse', 'HEAD')
commit = res.stdout.decode('utf-8').strip()
return commit
@property
def namespace_file(self) -> Path:
"""
Local path to the indicated namespace file.
"""
return self.temp_directory / self.namespace.path
def check(self) -> bool:
"""
Check if the repository is already cloned and checked out
Returns:
(bool) True if present, False if not
"""
if not any(self.temp_directory.iterdir()):
# directory is empty
return False
try:
# check our commit, this also checks if we're a git repo
if self.active_commit != self.commit and self.commit is not None:
warnings.warn('At wrong commit')
return False
except GitError:
return False
# Check that the remote matches
if self.remote.strip('.git') != self.namespace.repository:
warnings.warn('Repository exists, but has the wrong remote URL')
return False
# otherwise we're good
return True
def cleanup(self):
"""
Delete contents of temporary directory
"""
if not str(self.temp_directory).startswith(tempfile.gettempdir()):
warnings.warn('Temp directory is outside of the system temp dir, not deleting in case this has been changed by mistake')
self._temp_directory = None
return
shutil.rmtree(str(self.temp_directory))
self._temp_directory = None
def clone(self, force:bool=False):
"""
Clone the repository into the temporary directory
Args:
force (bool): If files are present in the temp directory, delete them
Raises:
:class:`.GitError` - if the repository can't be cloned
"""
if any(self.temp_directory.iterdir()):
if force:
self.cleanup()
else:
if not self.check():
warnings.warn('Destination directory is not empty and does not pass checks for correctness! pass force to overwrite')
return
res = subprocess.run(['git', 'clone', str(self.namespace.repository), str(self.temp_directory)])
if res.returncode != 0:
raise GitError(f'Could not clone repository:\n{res.stderr}')

3850
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,17 +1,21 @@
[tool.poetry]
name = "translate-nwb"
name = "nwb_linkml"
version = "0.1.0"
description = "Translating NWB schema language to LinkML"
authors = ["sneakers-the-rat <JLSaunders987@gmail.com>"]
license = "GPL-3.0"
readme = "README.md"
packages = [{include = "translate_nwb"}]
packages = [
{include = "nwb_linkml"},
# {include = "nwb_schema_language", from="nwb-schema-language/src"}
]
[tool.poetry.dependencies]
python = "^3.9"
schema-automator = "^0.3.0"
pydantic = "<2.0"
python = "^3.11"
pyyaml = "^6.0"
linkml-runtime = "^1.5.6"
nwb_schema_language = { path = './nwb-schema-language', develop = true }
pydantic = "<2"
[build-system]