{
	"$schema": "https://json-schema.org/draft/2019-09/schema",
	"$id": "https://github.com/kit-data-manager/ML-Basic-Schema/edit/main/MLdata-Basic-Schema.json",
	"title": "ML Data Basic Schema",
	"description": "Information about a set of scientific training data for applying it to machine-learning. This schema servers as a basic for structuring this information and can be extended.",
	"definitions": {
		"checksum": {
			"type": "string"
		},
		"termDefinition": {
			"description": "Provides information about the human-readable term for a feature or a label, which is optimally defined in a vocabulary. This would support an automated re-featuring or -labeling in case the data needs to be harmonized with other training data. Additional properties could be implemneted to specify for example pre-prpcessing steps for the features and labels.",
			"type": "object",
			"required": [
				"term",
				"termValueDataType"
			],
			"properties": {
				"term": {
					"type": "string"
				},
				"definition": {
					"type": "string",
					"format": "uri"
				},
				"termValueDataType": {
					"description": "Defines the data type of the feature or label value, on which the data preprocessing steps depend.",
					"type": [
						"integer",
						"number",
						"string"
					]
				}
			},
			"additionalProperties": true
		},
		"annotation": {
			"type": "object",
			"properties": {
				"annotationFileRef": {
					"type": "string",
					"format": "uri"
				},
				"annotationChecksum": {
					"$ref": "#/definitions/checksum"
				}
			}
		},
		"tag": {
			"type": "string"
		},
		"externalLink": {
			"type": "string",
			"format": "uri"
		}
	},
	"featureProperties": {
		"description": "The input features of the data for the machine-learning algorithm. Properties will depend on the particular data.",
		"type": "array",
		"items": {
			"type": "object",
			"required": [
				"featureTerm",
				"correspondingResource"
			],
			"properties": {
				"featureTerm": {
					"$ref": "#/definitions/termDefinition"
				},
				"correspondingResource": {
					"type": "string",
					"enum": [
						{
							"$ref": "#/definitions/externalLink"
						},
						{
							"$ref": "#/definitions/checksum"
						}
					]
				}
			},
			"additionalProperties": true
		}
	},
	"labelProperties": {
		"description": "The features that represent the output variables (i.e. labels) and will be predicted by the input features.",
		"type": "array",
		"items": {
			"type": "object",
			"required": [
				"labelDescription",
				"labelTerm",
				"correspondingResource"
			],
			"properties": {
				"labelDescription": {
					"type": "string"
				},
				"labelTerm": {
					"$ref": "#/definitions/termDefinition"
				},
				"dataType": {
					"type": "string",
					"enum": ["image", "text", "audio"]
				},
				"labelType": {
					"type": [
						"string",
						"object"
					],
					"enum": [
						{
							"$ref": "#/definitions/annotation"
						},
						{
							"$ref": "#/definitions/tag"
						}
					]
				},
				"correspondingResource": {
					"type": "string",
					"enum": [
						{
							"$ref": "#/definitions/externalLink"
						},
						{
							"$ref": "#/definitions/checksum"
						}
					]
				},
				"additionalProperties": true
			}
		}
	}
}