progress with using pydantic models

2024-09-19 23:44:28 +00:00 · 2023-08-18 01:11:14 -07:00 · 2023-08-18 01:11:14 -07:00 · 63ccd800b3
commit 63ccd800b3
parent e8adb4f88c
59 changed files with 1041 additions and 3575 deletions
--- a/nwb-schema-language/.cruft.json
+++ b/nwb-schema-language/.cruft.json
--- a/nwb-schema-language/.github/workflows/deploy-docs.yaml
+++ b/nwb-schema-language/.github/workflows/deploy-docs.yaml
--- a/nwb-schema-language/.github/workflows/main.yaml
+++ b/nwb-schema-language/.github/workflows/main.yaml
--- a/nwb-schema-language/.github/workflows/pypi-publish.yaml
+++ b/nwb-schema-language/.github/workflows/pypi-publish.yaml
--- a/nwb-schema-language/.gitignore
+++ b/nwb-schema-language/.gitignore
--- a/nwb-schema-language/CODE_OF_CONDUCT.md
+++ b/nwb-schema-language/CODE_OF_CONDUCT.md
--- a/nwb-schema-language/CONTRIBUTING.md
+++ b/nwb-schema-language/CONTRIBUTING.md
--- a/nwb-schema-language/LICENSE
+++ b/nwb-schema-language/LICENSE
--- a/nwb-schema-language/Makefile
+++ b/nwb-schema-language/Makefile
@ -106,6 +106,8 @@ gen-examples:
 gen-project: $(PYMODEL)
 	$(RUN) gen-project ${GEN_PARGS} -d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL)

+gen-pydantic: $(PYMODEL)
+	$(RUN) gen-pydantic $(SOURCE_SCHEMA_PATH) --pydantic_version 1 > $(PYMODEL)/nwb_schema_pydantic.py

 test: test-schema test-python test-examples

--- a/nwb-schema-language/README.md
+++ b/nwb-schema-language/README.md
--- a/nwb-schema-language/about.yaml
+++ b/nwb-schema-language/about.yaml
@ -3,5 +3,5 @@ name: nwb_schema_language
 author: Jonny Saunders <j@nny.fyi>
 description: Translation of the nwb-schema-language to LinkML
 source_schema_path: src/nwb_schema_language/schema/nwb_schema_language.yaml
-google_sheet_id: 1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ
-google_sheet_tabs: personinfo enums
+#google_sheet_id: 1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ
+#google_sheet_tabs: personinfo enums
--- a/nwb-schema-language/config.env
+++ b/nwb-schema-language/config.env
--- a/nwb-schema-language/config.yaml
+++ b/nwb-schema-language/config.yaml
@ -24,6 +24,8 @@ generator_args:
    mergeimports: true
  python:
    mergeimports: true
+  pydantic:
+    mergeimports: true
  prefixmap:
    mergeimports: true
  proto:
--- a/nwb-schema-language/mkdocs.yml
+++ b/nwb-schema-language/mkdocs.yml
--- a/nwb-schema-language/poetry.lock
+++ b/nwb-schema-language/poetry.lock
--- a/nwb-schema-language/project.Makefile
+++ b/nwb-schema-language/project.Makefile
--- a/nwb-schema-language/project/excel/nwb_schema_language.xlsx
+++ b/nwb-schema-language/project/excel/nwb_schema_language.xlsx
--- a/nwb-schema-language/project/graphql/nwb_schema_language.graphql
+++ b/nwb-schema-language/project/graphql/nwb_schema_language.graphql
@ -38,6 +38,11 @@ type Dataset implements DtypeMixin, NamingMixin
    dtype: String
  }

+type Datasets
+  {
+    datasets: [Dataset]
+  }
+
 interface DtypeMixin
  {
    dtype: String
@ -58,6 +63,11 @@ type Group implements NamingMixin
    links: [Link]
  }

+type Groups
+  {
+    groups: [Group]
+  }
+
 type Link
  {
    name: String
@ -97,8 +107,8 @@ type Schema
  {
    source: String
    namespace: String
-    doc: String!
    title: String
    neurodataTypes: [String]
+    doc: String
  }

--- a/nwb-schema-language/project/jsonld/nwb_schema_language.context.jsonld
+++ b/nwb-schema-language/project/jsonld/nwb_schema_language.context.jsonld
@ -1,7 +1,7 @@
 {
   "comments": {
      "description": "Auto generated by LinkML jsonld context generator",
-      "generation_date": "2023-08-16T23:21:36",
+      "generation_date": "2023-08-18T00:36:53",
      "source": "nwb_schema_language.yaml"
   },
   "@context": {
--- a/nwb-schema-language/project/jsonld/nwb_schema_language.jsonld
+++ b/nwb-schema-language/project/jsonld/nwb_schema_language.jsonld
@ -457,7 +457,6 @@
      "owner": "CompoundDtype",
      "domain_of": [
        "Namespace",
-        "Schema",
        "Group",
        "Attribute",
        "Link",
@ -635,6 +634,7 @@
      ],
      "range": "Namespace",
      "inlined": true,
+      "inlined_as_list": true,
      "@type": "SlotDefinition"
    },
    {
@ -791,12 +791,14 @@
      "from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
      "slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/datasets",
      "multivalued": true,
-      "owner": "Group",
+      "owner": "Datasets",
      "domain_of": [
-        "Group"
+        "Group",
+        "Datasets"
      ],
      "range": "Dataset",
      "inlined": true,
+      "inlined_as_list": true,
      "@type": "SlotDefinition"
    },
    {
@ -805,12 +807,14 @@
      "from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
      "slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/groups",
      "multivalued": true,
-      "owner": "Group",
+      "owner": "Groups",
      "domain_of": [
-        "Group"
+        "Group",
+        "Groups"
      ],
      "range": "Group",
      "inlined": true,
+      "inlined_as_list": true,
      "@type": "SlotDefinition"
    },
    {
@ -838,7 +842,7 @@
        "DtypeMixin"
      ],
      "range": "string",
-      "exactly_one_of": [
+      "any_of": [
        {
          "range": "FlatDtype",
          "@type": "AnonymousSlotExpression"
@ -981,6 +985,18 @@
      "range": "reftype_options",
      "@type": "SlotDefinition"
    },
+    {
+      "name": "schema__doc",
+      "from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
+      "slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/doc",
+      "alias": "doc",
+      "owner": "Schema",
+      "domain_of": [
+        "Schema"
+      ],
+      "range": "string",
+      "@type": "SlotDefinition"
+    },
    {
      "name": "Namespace_name",
      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/name",
@ -1004,25 +1020,6 @@
      },
      "@type": "SlotDefinition"
    },
-    {
-      "name": "Schema_doc",
-      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/doc",
-      "description": "Description of corresponding object.",
-      "from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
-      "is_a": "doc",
-      "domain": "Schema",
-      "slot_uri": "https://w3id.org/p2p_ld/nwb-schema-language/doc",
-      "alias": "doc",
-      "owner": "Schema",
-      "domain_of": [
-        "Schema"
-      ],
-      "is_usage_slot": true,
-      "usage_slot_name": "doc",
-      "range": "string",
-      "required": true,
-      "@type": "SlotDefinition"
-    },
    {
      "name": "Attribute_name",
      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/name",
@ -1085,7 +1082,7 @@
      "usage_slot_name": "dtype",
      "range": "FlatDtype",
      "required": true,
-      "exactly_one_of": [
+      "any_of": [
        {
          "range": "FlatDtype",
          "@type": "AnonymousSlotExpression"
@ -1139,11 +1136,17 @@
      "slots": [
        "source",
        "namespace",
-        "Schema_doc",
        "title",
-        "neurodata_types"
+        "neurodata_types",
+        "schema__doc"
      ],
      "slot_usage": {},
+      "attributes": [
+        {
+          "name": "doc",
+          "@type": "SlotDefinition"
+        }
+      ],
      "class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Schema",
      "rules": [
        {
@ -1287,6 +1290,17 @@
      "class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Group",
      "@type": "ClassDefinition"
    },
+    {
+      "name": "Groups",
+      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Groups",
+      "from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
+      "slots": [
+        "groups"
+      ],
+      "slot_usage": {},
+      "class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Groups",
+      "@type": "ClassDefinition"
+    },
    {
      "name": "Attribute",
      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Attribute",
@ -1349,6 +1363,17 @@
      "class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Dataset",
      "@type": "ClassDefinition"
    },
+    {
+      "name": "Datasets",
+      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Datasets",
+      "from_schema": "https://w3id.org/p2p_ld/nwb-schema-language",
+      "slots": [
+        "datasets"
+      ],
+      "slot_usage": {},
+      "class_uri": "https://w3id.org/p2p_ld/nwb-schema-language/Datasets",
+      "@type": "ClassDefinition"
+    },
    {
      "name": "ReferenceDtype",
      "definition_uri": "https://w3id.org/p2p_ld/nwb-schema-language/ReferenceDtype",
@ -1491,9 +1516,9 @@
  ],
  "metamodel_version": "1.7.0",
  "source_file": "nwb_schema_language.yaml",
-  "source_file_date": "2023-08-16T23:21:07",
-  "source_file_size": 10816,
-  "generation_date": "2023-08-16T23:21:36",
+  "source_file_date": "2023-08-18T00:28:45",
+  "source_file_size": 10952,
+  "generation_date": "2023-08-18T00:36:53",
  "settings": [
    {
      "setting_key": "email",
--- a/nwb-schema-language/project/jsonschema/nwb_schema_language.schema.json
+++ b/nwb-schema-language/project/jsonschema/nwb_schema_language.schema.json
@ -25,7 +25,7 @@
                    "type": "string"
                },
                "dtype": {
-                    "oneOf": [
+                    "anyOf": [
                        {
                            "$ref": "#/$defs/FlatDtype"
                        },
@ -123,7 +123,7 @@
                    "type": "string"
                },
                "dtype": {
-                    "oneOf": [
+                    "anyOf": [
                        {
                            "$ref": "#/$defs/FlatDtype"
                        },
@ -186,6 +186,20 @@
            "title": "Dataset",
            "type": "object"
        },
+        "Datasets": {
+            "additionalProperties": false,
+            "description": "",
+            "properties": {
+                "datasets": {
+                    "items": {
+                        "$ref": "#/$defs/Dataset"
+                    },
+                    "type": "array"
+                }
+            },
+            "title": "Datasets",
+            "type": "object"
+        },
        "FlatDtype": {
            "description": "",
            "enum": [
@ -283,6 +297,20 @@
            "title": "Group",
            "type": "object"
        },
+        "Groups": {
+            "additionalProperties": false,
+            "description": "",
+            "properties": {
+                "groups": {
+                    "items": {
+                        "$ref": "#/$defs/Group"
+                    },
+                    "type": "array"
+                }
+            },
+            "title": "Groups",
+            "type": "object"
+        },
        "Link": {
            "additionalProperties": false,
            "description": "",
@ -526,7 +554,6 @@
            "description": "",
            "properties": {
                "doc": {
-                    "description": "Description of corresponding object.",
                    "type": "string"
                },
                "namespace": {
--- a/nwb-schema-language/project/owl/nwb_schema_language.owl.ttl
+++ b/nwb-schema-language/project/owl/nwb_schema_language.owl.ttl
--- a/nwb-schema-language/project/prefixmap/nwb_schema_language.yaml
+++ b/nwb-schema-language/project/prefixmap/nwb_schema_language.yaml
--- a/nwb-schema-language/project/protobuf/nwb_schema_language.proto
+++ b/nwb-schema-language/project/protobuf/nwb_schema_language.proto
@ -31,6 +31,10 @@ message Dataset
 repeated  attribute attributes = 0
  string dtype = 0
 }
+message Datasets
+ {
+ repeated  dataset datasets = 0
+ }
 message Group
 {
  string neurodataTypeDef = 0
@ -45,6 +49,10 @@ message Group
 repeated  group groups = 0
 repeated  link links = 0
 }
+message Groups
+ {
+ repeated  group groups = 0
+ }
 message Link
 {
  string name = 0
@ -76,7 +84,7 @@ message Schema
 {
  string source = 0
  string namespace = 0
-  string doc = 0
  string title = 0
 repeated  string neurodataTypes = 0
+  string doc = 0
 }
--- a/nwb-schema-language/project/shacl/nwb_schema_language.shacl.ttl
+++ b/nwb-schema-language/project/shacl/nwb_schema_language.shacl.ttl
--- a/nwb-schema-language/project/shex/nwb_schema_language.shex
+++ b/nwb-schema-language/project/shex/nwb_schema_language.shex
@ -97,6 +97,12 @@ linkml:Sparqlpath xsd:string
    )
 }

+<Datasets> CLOSED {
+    (  $<Datasets_tes> <datasets> @<Dataset> * ;
+       rdf:type [ <Datasets> ] ?
+    )
+}
+
 <DtypeMixin> {
    (  $<DtypeMixin_tes> <dtype> @linkml:String ? ;
       rdf:type [ <DtypeMixin> ] ?
@ -122,6 +128,12 @@ linkml:Sparqlpath xsd:string
    )
 }

+<Groups> CLOSED {
+    (  $<Groups_tes> <groups> @<Group> * ;
+       rdf:type [ <Groups> ] ?
+    )
+}
+
 <Link> CLOSED {
    (  $<Link_tes> (  <name> @linkml:String ? ;
          <doc> @linkml:String ;
@ -169,9 +181,9 @@ linkml:Sparqlpath xsd:string
 <Schema> CLOSED {
    (  $<Schema_tes> (  <source> @linkml:String ? ;
          <namespace> @linkml:String ? ;
-          <doc> @linkml:String ;
          <title> @linkml:String ? ;
-          <neurodata_types> @linkml:String *
+          <neurodata_types> @linkml:String * ;
+          <doc> @linkml:String ?
       ) ;
       rdf:type [ <Schema> ] ?
    )
--- a/nwb-schema-language/project/sqlschema/nwb_schema_language.sql
+++ b/nwb-schema-language/project/sqlschema/nwb_schema_language.sql
@ -36,6 +36,11 @@ CREATE TABLE "Dataset" (
 	PRIMARY KEY (neurodata_type_def, neurodata_type_inc, name, default_name, dims, shape, value, default_value, doc, quantity, linkable, attributes, dtype)
 );

+CREATE TABLE "Datasets" (
+	datasets TEXT, 
+	PRIMARY KEY (datasets)
+);
+
 CREATE TABLE "Group" (
 	neurodata_type_def TEXT, 
 	neurodata_type_inc TEXT, 
@ -51,6 +56,11 @@ CREATE TABLE "Group" (
 	PRIMARY KEY (neurodata_type_def, neurodata_type_inc, name, default_name, doc, quantity, linkable, attributes, datasets, groups, links)
 );

+CREATE TABLE "Groups" (
+	groups TEXT, 
+	PRIMARY KEY (groups)
+);
+
 CREATE TABLE "Link" (
 	name TEXT, 
 	doc TEXT NOT NULL, 
@ -85,8 +95,8 @@ CREATE TABLE "ReferenceDtype" (
 CREATE TABLE "Schema" (
 	source TEXT, 
 	namespace TEXT, 
-	doc TEXT NOT NULL, 
 	title TEXT, 
 	neurodata_types TEXT, 
-	PRIMARY KEY (source, namespace, doc, title, neurodata_types)
+	doc TEXT, 
+	PRIMARY KEY (source, namespace, title, neurodata_types, doc)
 );
--- a/nwb-schema-language/pyproject.toml
+++ b/nwb-schema-language/pyproject.toml
@ -1,5 +1,5 @@
 [tool.poetry]
-name = "nwb_schema_linkml"
+name = "nwb_schema_language"
 version = "0.1.0"
 description = "Translation of the nwb-schema-language to LinkML"
 authors = ["Jonny Saunders <j@nny.fyi>"]
--- a/nwb-schema-language/src/data/tests/nwb.base.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.base.yaml
--- a/nwb-schema-language/src/data/tests/nwb.behavior.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.behavior.yaml
--- a/nwb-schema-language/src/data/tests/nwb.device.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.device.yaml
--- a/nwb-schema-language/src/data/tests/nwb.ecephys.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.ecephys.yaml
--- a/nwb-schema-language/src/data/tests/nwb.epoch.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.epoch.yaml
--- a/nwb-schema-language/src/data/tests/nwb.file.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.file.yaml
--- a/nwb-schema-language/src/data/tests/nwb.icephys.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.icephys.yaml
--- a/nwb-schema-language/src/data/tests/nwb.image.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.image.yaml
--- a/nwb-schema-language/src/data/tests/nwb.misc.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.misc.yaml
--- a/nwb-schema-language/src/data/tests/nwb.namespace.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.namespace.yaml
--- a/nwb-schema-language/src/data/tests/nwb.ogen.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.ogen.yaml
--- a/nwb-schema-language/src/data/tests/nwb.ophys.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.ophys.yaml
--- a/nwb-schema-language/src/data/tests/nwb.retinotopy.yaml
+++ b/nwb-schema-language/src/data/tests/nwb.retinotopy.yaml
--- a/nwb-schema-language/src/docs/about.md
+++ b/nwb-schema-language/src/docs/about.md
--- a/nwb-schema-language/src/nwb_schema_language/init.py
+++ b/nwb-schema-language/src/nwb_schema_language/init.py
@ -0,0 +1,9 @@
+from .datamodel.nwb_schema_pydantic import Namespace, \
+    Namespaces, \
+    Schema, \
+    Group, \
+    Attribute, \
+    Link, \
+    Dataset, \
+    ReferenceDtype, \
+    CompoundDtype
--- a/nwb-schema-language/src/nwb_schema_language/_version.py
+++ b/nwb-schema-language/src/nwb_schema_language/_version.py
--- a/nwb-schema-language/src/nwb_schema_language/datamodel/init.py
+++ b/nwb-schema-language/src/nwb_schema_language/datamodel/init.py
--- a/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_language.py
+++ b/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_language.py
@ -1,5 +1,5 @@
 # Auto generated from nwb_schema_language.yaml by pythongen.py version: 0.0.1
-# Generation date: 2023-08-16T23:21:37
+# Generation date: 2023-08-18T00:36:55
 # Schema: nwb-schema-language
 #
 # id: https://w3id.org/p2p_ld/nwb-schema-language
@ -95,7 +95,6 @@ class Namespace(YAMLRoot):
        if self.date is not None and not isinstance(self.date, XSDDate):
            self.date = XSDDate(self.date)

-        print(self.schema)
        if not isinstance(self.schema, list):
            self.schema = [self.schema] if self.schema is not None else []
        self.schema = [v if isinstance(v, Schema) else Schema(**as_dict(v)) for v in self.schema]
@ -115,7 +114,9 @@ class Namespaces(YAMLRoot):
    namespaces: Optional[Union[Union[dict, Namespace], List[Union[dict, Namespace]]]] = empty_list()

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
-        self._normalize_inlined_as_dict(slot_name="namespaces", slot_type=Namespace, key_name="doc", keyed=False)
+        if not isinstance(self.namespaces, list):
+            self.namespaces = [self.namespaces] if self.namespaces is not None else []
+        self.namespaces = [v if isinstance(v, Namespace) else Namespace(**as_dict(v)) for v in self.namespaces]

        super().__post_init__(**kwargs)

@ -129,18 +130,13 @@ class Schema(YAMLRoot):
    class_name: ClassVar[str] = "Schema"
    class_model_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Schema

-    doc: str = None
    source: Optional[str] = None
    namespace: Optional[str] = None
    title: Optional[str] = None
    neurodata_types: Optional[Union[str, List[str]]] = empty_list()
+    doc: Optional[str] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
-        if self._is_empty(self.doc):
-            self.MissingRequiredField("doc")
-        if not isinstance(self.doc, str):
-            self.doc = str(self.doc)
-
        if self.source is not None and not isinstance(self.source, str):
            self.source = str(self.source)

@ -154,6 +150,9 @@ class Schema(YAMLRoot):
            self.neurodata_types = [self.neurodata_types] if self.neurodata_types is not None else []
        self.neurodata_types = [v if isinstance(v, str) else str(v) for v in self.neurodata_types]

+        if self.doc is not None and not isinstance(self.doc, str):
+            self.doc = str(self.doc)
+
        super().__post_init__(**kwargs)


@ -204,15 +203,38 @@ class Group(YAMLRoot):

        self._normalize_inlined_as_dict(slot_name="attributes", slot_type=Attribute, key_name="name", keyed=False)

-        self._normalize_inlined_as_dict(slot_name="datasets", slot_type=Dataset, key_name="doc", keyed=False)
+        if not isinstance(self.datasets, list):
+            self.datasets = [self.datasets] if self.datasets is not None else []
+        self.datasets = [v if isinstance(v, Dataset) else Dataset(**as_dict(v)) for v in self.datasets]

-        self._normalize_inlined_as_dict(slot_name="groups", slot_type=Group, key_name="doc", keyed=False)
+        if not isinstance(self.groups, list):
+            self.groups = [self.groups] if self.groups is not None else []
+        self.groups = [v if isinstance(v, Group) else Group(**as_dict(v)) for v in self.groups]

        self._normalize_inlined_as_dict(slot_name="links", slot_type=Link, key_name="doc", keyed=False)

        super().__post_init__(**kwargs)


+@dataclass
+class Groups(YAMLRoot):
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Groups
+    class_class_curie: ClassVar[str] = "nwb_schema_language:Groups"
+    class_name: ClassVar[str] = "Groups"
+    class_model_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Groups
+
+    groups: Optional[Union[Union[dict, Group], List[Union[dict, Group]]]] = empty_list()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if not isinstance(self.groups, list):
+            self.groups = [self.groups] if self.groups is not None else []
+        self.groups = [v if isinstance(v, Group) else Group(**as_dict(v)) for v in self.groups]
+
+        super().__post_init__(**kwargs)
+
+
@dataclass
 class Attribute(YAMLRoot):
    _inherited_slots: ClassVar[List[str]] = []
@ -356,6 +378,25 @@ class Dataset(YAMLRoot):
        super().__post_init__(**kwargs)


+@dataclass
+class Datasets(YAMLRoot):
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Datasets
+    class_class_curie: ClassVar[str] = "nwb_schema_language:Datasets"
+    class_name: ClassVar[str] = "Datasets"
+    class_model_uri: ClassVar[URIRef] = NWB_SCHEMA_LANGUAGE.Datasets
+
+    datasets: Optional[Union[Union[dict, Dataset], List[Union[dict, Dataset]]]] = empty_list()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if not isinstance(self.datasets, list):
+            self.datasets = [self.datasets] if self.datasets is not None else []
+        self.datasets = [v if isinstance(v, Dataset) else Dataset(**as_dict(v)) for v in self.datasets]
+
+        super().__post_init__(**kwargs)
+
+
@dataclass
 class ReferenceDtype(YAMLRoot):
    _inherited_slots: ClassVar[List[str]] = []
@ -674,12 +715,12 @@ slots.target_type = Slot(uri=NWB_SCHEMA_LANGUAGE.target_type, name="target_type"
 slots.reftype = Slot(uri=NWB_SCHEMA_LANGUAGE.reftype, name="reftype", curie=NWB_SCHEMA_LANGUAGE.curie('reftype'),
                   model_uri=NWB_SCHEMA_LANGUAGE.reftype, domain=None, range=Optional[Union[str, "ReftypeOptions"]])

+slots.schema__doc = Slot(uri=NWB_SCHEMA_LANGUAGE.doc, name="schema__doc", curie=NWB_SCHEMA_LANGUAGE.curie('doc'),
+                   model_uri=NWB_SCHEMA_LANGUAGE.schema__doc, domain=None, range=Optional[str])
+
 slots.Namespace_name = Slot(uri=NWB_SCHEMA_LANGUAGE.name, name="Namespace_name", curie=NWB_SCHEMA_LANGUAGE.curie('name'),
                   model_uri=NWB_SCHEMA_LANGUAGE.Namespace_name, domain=Namespace, range=str)

-slots.Schema_doc = Slot(uri=NWB_SCHEMA_LANGUAGE.doc, name="Schema_doc", curie=NWB_SCHEMA_LANGUAGE.curie('doc'),
-                   model_uri=NWB_SCHEMA_LANGUAGE.Schema_doc, domain=Schema, range=str)
-
 slots.Attribute_name = Slot(uri=NWB_SCHEMA_LANGUAGE.name, name="Attribute_name", curie=NWB_SCHEMA_LANGUAGE.curie('name'),
                   model_uri=NWB_SCHEMA_LANGUAGE.Attribute_name, domain=Attribute, range=str)

--- a/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
+++ b/nwb-schema-language/src/nwb_schema_language/datamodel/nwb_schema_pydantic.py
@ -0,0 +1,241 @@
+from __future__ import annotations
+from datetime import datetime, date
+from enum import Enum
+from typing import List, Dict, Optional, Any, Union
+from pydantic import BaseModel as BaseModel, Field
+import sys
+if sys.version_info >= (3, 8):
+    from typing import Literal
+else:
+    from typing_extensions import Literal
+
+
+metamodel_version = "None"
+version = "None"
+
+class WeakRefShimBaseModel(BaseModel):
+   __slots__ = '__weakref__'
+
+class ConfiguredBaseModel(WeakRefShimBaseModel,
+                validate_assignment = True,
+                validate_all = True,
+                underscore_attrs_are_private = True,
+                extra = 'forbid',
+                arbitrary_types_allowed = True,
+                use_enum_values = True):
+    pass
+
+
+class ReftypeOptions(str, Enum):
+    
+    # Reference to another group or dataset of the given target_type
+    ref = "ref"
+    # Reference to another group or dataset of the given target_type
+    reference = "reference"
+    # Reference to another group or dataset of the given target_type
+    object = "object"
+    # Reference to a region (i.e. subset) of another dataset of the given target_type
+    region = "region"
+    
+    
+
+class QuantityEnum(str, Enum):
+    
+    # Zero or more instances, equivalent to zero_or_many
+    ASTERISK = "*"
+    # Zero or one instances, equivalent to zero_or_one
+    QUESTION_MARK = "?"
+    # One or more instances, equivalent to one_or_many
+    PLUS_SIGN = "+"
+    # Zero or more instances, equivalent to *
+    zero_or_many = "zero_or_many"
+    # One or more instances, equivalent to +
+    one_or_many = "one_or_many"
+    # Zero or one instances, equivalent to ?
+    zero_or_one = "zero_or_one"
+    
+    
+
+class FlatDtype(str, Enum):
+    
+    # single precision floating point (32 bit)
+    float = "float"
+    # single precision floating point (32 bit)
+    float32 = "float32"
+    # double precision floating point (64 bit)
+    double = "double"
+    # double precision floating point (64 bit)
+    float64 = "float64"
+    # signed 64 bit integer
+    long = "long"
+    # signed 64 bit integer
+    int64 = "int64"
+    # signed 32 bit integer
+    int = "int"
+    # signed 32 bit integer
+    int32 = "int32"
+    # signed 16 bit integer
+    int16 = "int16"
+    # signed 16 bit integer
+    short = "short"
+    # signed 8 bit integer
+    int8 = "int8"
+    # unsigned 32 bit integer
+    uint = "uint"
+    # unsigned 32 bit integer
+    uint32 = "uint32"
+    # unsigned 16 bit integer
+    uint16 = "uint16"
+    # unsigned 8 bit integer
+    uint8 = "uint8"
+    # unsigned 64 bit integer
+    uint64 = "uint64"
+    # any numeric type (i.e., any int, uint, float)
+    numeric = "numeric"
+    # 8-bit Unicode
+    text = "text"
+    # 8-bit Unicode
+    utf = "utf"
+    # 8-bit Unicode
+    utf8 = "utf8"
+    # 8-bit Unicode
+    utf_8 = "utf-8"
+    # ASCII text
+    ascii = "ascii"
+    # 8 bit integer with valid values 0 or 1
+    bool = "bool"
+    # ISO 8601 datetime string
+    isodatetime = "isodatetime"
+    
+    
+
+class Namespace(ConfiguredBaseModel):
+    
+    doc: str = Field(..., description="""Description of corresponding object.""")
+    name: str = Field(...)
+    full_name: Optional[str] = Field(None, description="""Optional string with extended full name for the namespace.""")
+    version: str = Field(...)
+    date: Optional[date] = Field(None, description="""Date that a namespace was last modified or released""")
+    author: List[str] = Field(default_factory=list, description="""List of strings with the names of the authors of the namespace.""")
+    contact: List[str] = Field(default_factory=list, description="""List of strings with the contact information for the authors. Ordering of the contacts should match the ordering of the authors.""")
+    schema_: Optional[List[Schema]] = Field(alias="schema", default_factory=list, description="""List of the schema to be included in this namespace.""")
+    
+
+class Namespaces(ConfiguredBaseModel):
+    
+    namespaces: Optional[List[Namespace]] = Field(default_factory=list)
+    
+
+class Schema(ConfiguredBaseModel):
+    
+    source: Optional[str] = Field(None, description="""describes the name of the YAML (or JSON) file with the schema specification. The schema files should be located in the same folder as the namespace file.""")
+    namespace: Optional[str] = Field(None, description="""describes a named reference to another namespace. In contrast to source, this is a reference by name to a known namespace (i.e., the namespace is resolved during the build and must point to an already existing namespace). This mechanism is used to allow, e.g., extension of a core namespace (here the NWB core namespace) without requiring hard paths to the files describing the core namespace. Either source or namespace must be specified, but not both.""")
+    title: Optional[str] = Field(None, description="""a descriptive title for a file for documentation purposes.""")
+    neurodata_types: Optional[List[Union[Dataset, Group]]] = Field(default_factory=list, description="""an optional list of strings indicating which data types should be included from the given specification source or namespace. The default is null indicating that all data types should be included.""")
+    doc: Optional[str] = Field(None)
+    
+
+class Groups(ConfiguredBaseModel):
+    
+    groups: Optional[List[Group]] = Field(default_factory=list)
+    
+
+class Link(ConfiguredBaseModel):
+    
+    name: Optional[str] = Field(None)
+    doc: str = Field(..., description="""Description of corresponding object.""")
+    target_type: str = Field(..., description="""Describes the neurodata_type of the target that the reference points to""")
+    quantity: Optional[Union[QuantityEnum, int]] = Field(1)
+    
+
+class Datasets(ConfiguredBaseModel):
+    
+    datasets: Optional[List[Dataset]] = Field(default_factory=list)
+    
+
+class ReferenceDtype(ConfiguredBaseModel):
+    
+    target_type: str = Field(..., description="""Describes the neurodata_type of the target that the reference points to""")
+    reftype: Optional[ReftypeOptions] = Field(None, description="""describes the kind of reference""")
+    
+
+class CompoundDtype(ConfiguredBaseModel):
+    
+    name: str = Field(...)
+    doc: str = Field(..., description="""Description of corresponding object.""")
+    dtype: Union[FlatDtype, ReferenceDtype] = Field(...)
+    
+
+class DtypeMixin(ConfiguredBaseModel):
+    
+    dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
+    
+
+class Attribute(DtypeMixin):
+    
+    name: str = Field(...)
+    dims: Optional[List[str]] = Field(default_factory=list)
+    shape: Optional[List[str]] = Field(default_factory=list)
+    value: Optional[Any] = Field(None, description="""Optional constant, fixed value for the attribute.""")
+    default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
+    doc: str = Field(..., description="""Description of corresponding object.""")
+    required: Optional[bool] = Field(True, description="""Optional boolean key describing whether the attribute is required. Default value is True.""")
+    dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
+    
+
+class NamingMixin(ConfiguredBaseModel):
+    """
+    require either neurodata_type_def or name to be present
+    """
+    None
+    
+
+class Group(NamingMixin):
+    
+    neurodata_type_def: Optional[str] = Field(None)
+    neurodata_type_inc: Optional[str] = Field(None)
+    name: Optional[str] = Field(None)
+    default_name: Optional[str] = Field(None)
+    doc: str = Field(..., description="""Description of corresponding object.""")
+    quantity: Optional[Union[QuantityEnum, int]] = Field(1)
+    linkable: Optional[bool] = Field(None)
+    attributes: Optional[List[Attribute]] = Field(default_factory=list)
+    datasets: Optional[List[Dataset]] = Field(default_factory=list)
+    groups: Optional[List[Group]] = Field(default_factory=list)
+    links: Optional[List[Link]] = Field(default_factory=list)
+    
+
+class Dataset(NamingMixin, DtypeMixin):
+    
+    neurodata_type_def: Optional[str] = Field(None)
+    neurodata_type_inc: Optional[str] = Field(None)
+    name: Optional[str] = Field(None)
+    default_name: Optional[str] = Field(None)
+    dims: Optional[List[str]] = Field(default_factory=list)
+    shape: Optional[List[str]] = Field(default_factory=list)
+    value: Optional[Any] = Field(None, description="""Optional constant, fixed value for the attribute.""")
+    default_value: Optional[Any] = Field(None, description="""Optional default value for variable-valued attributes.""")
+    doc: str = Field(..., description="""Description of corresponding object.""")
+    quantity: Optional[Union[QuantityEnum, int]] = Field(1)
+    linkable: Optional[bool] = Field(None)
+    attributes: Optional[List[Attribute]] = Field(default_factory=list)
+    dtype: Optional[List[Union[CompoundDtype, FlatDtype, ReferenceDtype]]] = Field(default_factory=list)
+    
+
+
+# Update forward refs
+# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/
+Namespace.update_forward_refs()
+Namespaces.update_forward_refs()
+Schema.update_forward_refs()
+Groups.update_forward_refs()
+Link.update_forward_refs()
+Datasets.update_forward_refs()
+ReferenceDtype.update_forward_refs()
+CompoundDtype.update_forward_refs()
+DtypeMixin.update_forward_refs()
+Attribute.update_forward_refs()
+NamingMixin.update_forward_refs()
+Group.update_forward_refs()
+Dataset.update_forward_refs()
+
--- a/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml
+++ b/nwb-schema-language/src/nwb_schema_language/schema/nwb_schema_language.yaml
@ -46,10 +46,9 @@ classes:
    slots:
      - source
      - namespace
-      - doc
      - title
      - neurodata_types
-    slot_usage:
+    attributes:
      doc:
        required: false
    rules:
@ -82,6 +81,10 @@ classes:
      - groups
      - links

+  Groups:
+    slots:
+      - groups
+
  Attribute:
    mixins:
      - DtypeMixin
@ -122,6 +125,10 @@ classes:
      - linkable
      - attributes

+  Datasets:
+    slots:
+      - datasets
+
  ReferenceDtype:
    slots:
      - target_type
@ -137,7 +144,10 @@ classes:
        required: true
      dtype:
        required: true
-        range: FlatDtype
+        any_of:
+          - range: ReferenceDtype
+          - range: FlatDtype
+        multivalued: false

  DtypeMixin:
    mixin: true
@ -216,6 +226,7 @@ slots:
  namespaces:
    multivalued: true
    range: Namespace
+    inlined_as_list: true
  neurodata_types:
    multivalued: true
    any_of:
@ -256,19 +267,22 @@ slots:
  datasets:
    range: Dataset
    multivalued: true
+    inlined_as_list: true
  groups:
    range: Group
    multivalued: true
+    inlined_as_list: true
  links:
    range: Link
    multivalued: true

  # attributes
  dtype:
-    exactly_one_of:
+    any_of:
      - range: FlatDtype
      - range: CompoundDtype
      - range: ReferenceDtype
+    multivalued: true
  dims:
    multivalued: true
    range: string
@ -298,9 +312,9 @@ slots:
    description: Describes the neurodata_type of the target that the reference points
      to
    required: true
-    any_of:
-      - range: Dataset
-      - range: Group
+#    any_of:
+#      - range: Dataset
+#      - range: Group
  reftype:
    description: describes the kind of reference
    range: reftype_options
--- a/nwb-schema-language/tests/init.py
+++ b/nwb-schema-language/tests/init.py
--- a/nwb-schema-language/tests/test_data.py
+++ b/nwb-schema-language/tests/test_data.py
--- a/nwb-schema-language/utils/get-value.sh
+++ b/nwb-schema-language/utils/get-value.sh
--- a/nwb-schema-linkml/project/excel/nwb_schema_language.xlsx
+++ b/nwb-schema-linkml/project/excel/nwb_schema_language.xlsx
--- a/nwb-schema-linkml/src/nwb_schema_language/init.py
+++ b/nwb-schema-linkml/src/nwb_schema_language/init.py
--- a/translate_nwb/constants.py
+++ b/translate_nwb/constants.py
--- a/nwb_linkml/io.py
+++ b/nwb_linkml/io.py
@ -0,0 +1,49 @@
+"""
+Loading/saving NWB Schema yaml files
+"""
+from pathlib import Path
+from typing import TypedDict, List
+from pprint import pprint
+
+from linkml_runtime.loaders import yaml_loader
+import yaml
+
+from nwb_schema_language import Namespaces, Group, Dataset
+from nwb_linkml.namespaces import GitRepo, NamespaceRepo
+
+
+
+def load_namespaces(path:Path|NamespaceRepo) -> Namespaces:
+    if isinstance(path, NamespaceRepo):
+        path = path.provide_from_git()
+
+    namespaces = yaml_loader.load(str(path), target_class=Namespaces)
+    return namespaces
+
+class SchemaFile(TypedDict):
+    datasets: List[Dataset]
+    groups: List[Group]
+
+def load_schema_file(path:Path) -> List[Dataset | Group]:
+    with open(path, 'r') as yfile:
+        source = yaml.safe_load(yfile)
+
+    schema = []
+
+    for dataset in source.get('datasets', []):
+        try:
+            schema.append(Dataset(**dataset))
+        except Exception as e:
+            pprint(dataset)
+            raise e
+
+    #schema.extend([Dataset(**dataset) for dataset in source.get('datasets', [])])
+    #schema.extend([Group(**group) for group in source.get('groups', [])])
+    return schema
+
+
+
+
+
+
+
--- a/nwb_linkml/maps/init.py
+++ b/nwb_linkml/maps/init.py
--- a/nwb_linkml/namespaces.py
+++ b/nwb_linkml/namespaces.py
@ -0,0 +1,172 @@
+"""
+Define and manage NWB namespaces in external repositories
+"""
+import warnings
+from pathlib import Path
+import tempfile
+import subprocess
+import shutil
+
+from pydantic import BaseModel, HttpUrl, FilePath, DirectoryPath, Field
+
+class NamespaceRepo(BaseModel):
+    """
+    Definition of one NWB namespaces file to import from a git repository
+    """
+    name: str = Field(description="Short name used to refer to this namespace (usually equivalent to the name field within a namespaces NWB list)")
+    repository: HttpUrl | DirectoryPath = Field(description="URL or local absolute path to the root repository")
+    path: Path = Field(description="Relative path from the repository root to the namespace file")
+
+    def provide_from_git(self, commit:str|None=None) -> Path:
+        git = GitRepo(self, commit)
+        git.clone()
+        return git.namespace_file
+
+# Constant namespaces
+NWB_CORE_REPO = NamespaceRepo(
+    name="core",
+    repository="https://github.com/NeurodataWithoutBorders/nwb-schema",
+    path=Path("core/nwb.namespace.yaml")
+)
+
+HDMF_COMMON_REPO = NamespaceRepo(
+    name="hdmf-common",
+    repository="https://github.com/hdmf-dev/hdmf-common-schema",
+    path=Path("common/namespace.yaml")
+)
+
+DEFAULT_REPOS = {
+    repo.name: repo for repo in [NWB_CORE_REPO, HDMF_COMMON_REPO]
+}
+
+
+class GitError(OSError):
+    pass
+
+class GitRepo:
+    """
+    Manage a temporary git repository that provides the NWB yaml files
+    """
+    def __init__(self, namespace:NamespaceRepo, commit:str|None=None):
+        self._temp_directory = None
+        self.namespace = namespace
+        self.commit = commit
+
+    def _git_call(self, *args) -> subprocess.CompletedProcess:
+        res = subprocess.run(
+            ['git', '-C', self.temp_directory, *args],
+            capture_output=True
+        )
+        if res.returncode != 0:
+            raise GitError(f'Git call did not complete successfully.\n---\nCall: {args}\nResult: {res.stderr}')
+        return res
+
+    @property
+    def temp_directory(self) -> Path:
+        """
+        Temporary directory where this repository will be cloned to
+        """
+        if self._temp_directory is None:
+            self._temp_directory = Path(tempfile.gettempdir()) / f'nwb_linkml__{self.namespace.name}'
+            if self._temp_directory.exists():
+                warnings.warn(f'Temporary directory already exists! {self._temp_directory}')
+            else:
+                self._temp_directory.mkdir(parents=True)
+
+        return self._temp_directory
+
+    @property
+    def remote(self) -> str:
+        """
+        URL for "origin" remote
+        """
+        res = self._git_call('remote', 'get-url', 'origin')
+        return res.stdout.decode('utf-8').strip()
+
+    @property
+    def active_commit(self) -> str:
+        """
+        Currently checked out commit
+        """
+        res = self._git_call('rev-parse', 'HEAD')
+        commit = res.stdout.decode('utf-8').strip()
+        return commit
+
+    @property
+    def namespace_file(self) -> Path:
+        """
+        Local path to the indicated namespace file.
+        """
+        return self.temp_directory / self.namespace.path
+
+    def check(self) -> bool:
+        """
+        Check if the repository is already cloned and checked out
+
+        Returns:
+            (bool) True if present, False if not
+        """
+        if not any(self.temp_directory.iterdir()):
+            # directory is empty
+            return False
+
+        try:
+            # check our commit, this also checks if we're a git repo
+            if self.active_commit != self.commit and self.commit is not None:
+                warnings.warn('At wrong commit')
+                return False
+
+        except GitError:
+            return False
+
+        # Check that the remote matches
+        if self.remote.strip('.git') != self.namespace.repository:
+            warnings.warn('Repository exists, but has the wrong remote URL')
+            return False
+
+        # otherwise we're good
+        return True
+
+    def cleanup(self):
+        """
+        Delete contents of temporary directory
+        """
+        if not str(self.temp_directory).startswith(tempfile.gettempdir()):
+            warnings.warn('Temp directory is outside of the system temp dir, not deleting in case this has been changed by mistake')
+            self._temp_directory = None
+            return
+
+        shutil.rmtree(str(self.temp_directory))
+        self._temp_directory = None
+
+    def clone(self, force:bool=False):
+        """
+        Clone the repository into the temporary directory
+
+        Args:
+            force (bool): If files are present in the temp directory,  delete them
+
+        Raises:
+            :class:`.GitError` - if the repository can't be cloned
+        """
+        if any(self.temp_directory.iterdir()):
+            if force:
+                self.cleanup()
+            else:
+                if not self.check():
+                    warnings.warn('Destination directory is not empty and does not pass checks for correctness! pass force to overwrite')
+                return
+
+        res = subprocess.run(['git', 'clone', str(self.namespace.repository), str(self.temp_directory)])
+        if res.returncode != 0:
+            raise GitError(f'Could not clone repository:\n{res.stderr}')
+
+
+
+
+
+
+
+
+
+
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,17 +1,21 @@
 [tool.poetry]
-name = "translate-nwb"
+name = "nwb_linkml"
 version = "0.1.0"
 description = "Translating NWB schema language to LinkML"
 authors = ["sneakers-the-rat <JLSaunders987@gmail.com>"]
 license = "GPL-3.0"
 readme = "README.md"
-packages = [{include = "translate_nwb"}]
+packages = [
+    {include = "nwb_linkml"},
+#    {include = "nwb_schema_language", from="nwb-schema-language/src"}
+]

 [tool.poetry.dependencies]
-python = "^3.9"
-schema-automator = "^0.3.0"
-pydantic = "<2.0"
+python = "^3.11"
 pyyaml = "^6.0"
+linkml-runtime = "^1.5.6"
+nwb_schema_language = { path = './nwb-schema-language', develop = true }
+pydantic = "<2"


 [build-system]