enter image description hereI am trying to create a JSON schema which is expecting inputs from 3 sources (file/Table/Kafka message), at a time any one of them will get the data from external source.
Issue :
I am using "anyOf" keyword in JSON but when I am applying "anyOf" and testing the negative scenario that is removing one of required field from the input properties, it's not giving any validation error, which ideally should have given that required field is missing.
**Expectation: **
Need to get input from any one of the source (file/table/message). If external party sends data in the file format, file portion from JSON schema should get executed, similarly if gets data from table or kafka message then relevant portion from JSON schema should get executed.
JSON SCHEMA CODE:
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"type": "object",
"title": "Job",
"properties": {
"unique_name": {
"type": "string",
"format": "regex",
"pattern": "[a-zA-Z0-9_]+"
},
"processing_instructions_file": {
"type": "string",
"examples": [
"/path/to/some/file/processing123.yaml"
]
},
"variables": {
"type": "array",
"additionalItems": true,
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"examples": [
"environment_prefix"
]
},
"value": {
"type": "string",
"examples": [
"DEV"
]
}
},
"additionalProperties": true,
"required": [
"name",
"value"
]
}
},
"inputs": {
"type": "array",
"additionalItems": true,
"items": {
"type": "object",
"properties": {
"is_stream": {
"type": "boolean",
"examples": [
true
],
"default": false
}
},
"additionalProperties": true,
"anyOf": [
{
"type": "object",
"properties": {
"file": {
"type": "object",
"properties": {
"folder_structure": {
"type": "string",
"examples": [
"$environment_prefix\\$$yyyy-MMM-dd"
]
},
"naming_convention": {
"type": "string",
"examples": [
"STATIC_FILE_NAME:$$yyyy_MMM-dd"
]
},
"format type": {
"type": "string",
"enum": [
"CSV",
"EBCDIC"
],
"examples": [
"CSV"
]
},
"format_options": {
"type": "array",
"additionalItems": true,
"items": {
"type": "object",
"properties": {
"option_name": {
"type": "string",
"examples": [
"generate_record_id"
]
},
"option_value": {
"type": [
"string",
"boolean"
],
"examples": [
true
]
}
},
"additionalProperties": true,
"required": [
"option_name",
"option_value"
]
}
}
},
"additionalProperties": true,
"if": {
"properties": {
"format type": {
"type": "string",
"enum": [
"CSV",
"EBCDIC"
],
"examples": [
"CSV"
],
"const": "EBCDIC"
}
},
"required": [
"format type"
]
},
"then": {
"properties": {
"schema_location": {
"type": "string",
"examples": [
"some_path"
]
}
},
"required": [
"schema_location"
]
},
"else": {
"properties": {
"schema_location": {
"type": "string",
"examples": [
"some_path"
]
}
}
},
"required": [
"folder_structure",
"naming_convention",
"format type",
"format_options"
]
}
},
"additionalProperties": true
},
{
"type": "object",
"properties": {
"table": {
"type": "object",
"properties": {
"name": {
"type": "string",
"examples": [
"tbl_employees"
]
},
"jdbc_url": {
"type": "string",
"examples": [
"orcl#thinXXX"
]
},
"jdbc_library_jar": {
"type": "string",
"examples": [
"orcl.jar"
]
},
"schema": {
"type": "string",
"examples": [
"xxxxx"
]
},
"info_date_from_column_name": {
"type": "string",
"examples": [
"ASOF_DATE"
]
},
"info_date_to_column_name": {
"type": "string",
"examples": [
"END_DATE"
]
},
"primary_key_column_name": {
"type": "array",
"additionalItems": true,
"items": {
"type": "string"
}
}
},
"additionalProperties": true,
"required": [
"name",
"jdbc_url",
"schema"
]
}
},
"additionalProperties": true
},
{
"type": "object",
"properties": {
"message": {
"type": "object",
"properties": {
"topic_name": {
"type": "string",
"examples": [
"XXXXXX"
]
},
"group_id": {
"type": "string",
"examples": [
"XXXXX"
]
},
"port": {
"type": "integer",
"examples": [
8081
]
},
"kafka_cluster_ip": {
"type": "string",
"examples": [
"1.1.1.1"
]
},
"kafka_cluster_schema_registry": {
"type": "string",
"examples": [
"1.1.1.1"
]
}
},
"additionalProperties": true,
"required": [
"topic_name",
"group_id",
"port",
"kafka_cluster_ip",
"kafka_cluster_schema_registry"
]
}
},
"additionalProperties": true
}
]
}
},
"outputs": {
"type": "array",
"additionalItems": true,
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"examples": [
"output_3"
]
},
"is_stream": {
"type": "boolean",
"examples": [
true
]
},
"spark_save_mode": {
"type": "string",
"examples": [
"append"
]
},
"file": {
"type": "object",
"properties": {
"root_path": {
"type": "string",
"examples": [
"some_path"
]
},
"folder_structure": {
"type": "string",
"examples": [
"$environment_prefix\\$$yyyy-MMM-dd"
]
},
"naming_convention": {
"type": "string",
"examples": [
"STATIC_FILE_NAME:$$yyyy_MMM-dd"
]
},
"partition_by": {
"type": "array",
"additionalItems": true,
"items": {
"type": "string"
}
},
"format": {
"type": "object",
"properties": {
"format_type": {
"type": "string",
"examples": [
"CSV"
]
},
"format_options": {
"type": "array",
"additionalItems": true,
"items": {
"type": "object",
"properties": {
"option_name": {
"type": "string",
"examples": [
"include_header"
]
},
"option_value": {
"type": [
"string",
"boolean"
],
"examples": [
true
]
}
},
"additionalProperties": true,
"required": [
"option_name",
"option_value"
]
}
}
},
"additionalProperties": true,
"required": [
"format_type",
"format_options"
]
}
},
"additionalProperties": true,
"required": [
"root_path",
"folder_structure",
"naming_convention",
"partition_by",
"format"
]
},
"mongo": {
"type": "object",
"properties": {
"mongo_spark_connector_options": {
"type": "array",
"additionalItems": true,
"items": {
"type": "object",
"properties": {
"option_name": {
"type": "string",
"examples": [
"format"
]
},
"option_value": {
"type": "string"
}
},
"additionalProperties": true,
"required": [
"option_name",
"option_value"
]
}
}
},
"additionalProperties": true,
"required": [
"mongo_spark_connector_options"
]
},
"table": {
"type": "object",
"properties": {
"name": {
"type": "string",
"examples": [
"tbl_employees"
]
},
"jdbc_url": {
"type": "string",
"examples": [
"orcl#thin:XXX"
]
},
"jdbc_library_jar": {
"type": "string",
"examples": [
"orcl.jar"
]
},
"schema": {
"type": "string",
"examples": [
"xxxxx"
]
},
"info_date_from_column_name": {
"type": "string",
"examples": [
"ASOF_DATE"
]
},
"info_date_to_column_name": {
"type": "string",
"examples": [
"END_DATE"
]
},
"primary_key_column_name": {
"type": "array",
"additionalItems": true,
"items": {
"type": "string"
}
}
},
"additionalProperties": true,
"required": [
"name",
"jdbc_url",
"jdbc_library_jar",
"schema",
"info_date_from_column_name",
"info_date_to_column_name",
"primary_key_column_name"
]
},
"message": {
"type": "object",
"properties": {
"topic_name": {
"type": "string",
"examples": [
"XXXXXX"
]
},
"group_id": {
"type": "string",
"examples": [
"XXXXX"
]
},
"port": {
"type": "integer",
"examples": [
8081
]
},
"kafka_cluster_ip": {
"type": "string",
"examples": [
"1.1.1.1"
]
},
"kafka_cluster_schema_registry": {
"type": "string",
"examples": [
"1.1.1.1"
]
}
},
"additionalProperties": true,
"required": [
"topic_name",
"group_id",
"port",
"kafka_cluster_ip",
"kafka_cluster_schema_registry"
]
}
},
"additionalProperties": true,
"required": [
"name",
"is_stream"
]
}
}
},
"additionalProperties": true,
"required": [
"unique_name",
"processing_instructions_file",
"inputs",
"outputs"
]
}
JSON DATA
{
"unique_name": "aXeTGImcM6PrCalRpGjLigbj1puXXvK",
"processing_instructions_file": "/path/to/some/file/processing123.yaml",
"variables": [
{
"name": "environment_prefix",
"value": "DEV"
}
],
"inputs": [
{
"is_stream": false,
"file": {
"folder_structure": "$environment_prefix\\$$yyyy-MMM-dd",
"naming_convention": "STATIC_FILE_NAME:$$yyyy_MMM-dd",
"format type": "CSV",
"format_options": [
{
"option_name": "generate_record_id",
"option_value": "Lorem"
}
],
"schema_location": "some_path"
}
}
],
"outputs": [
{
"name": "output_3",
"is_stream": true,
"spark_save_mode": "append",
"file": {
"root_path": "some_path",
"folder_structure": "$environment_prefix\\$$yyyy-MMM-dd",
"naming_convention": "STATIC_FILE_NAME:$$yyyy_MMM-dd",
"partition_by": [
"Lorem"
],
"format": {
"format_type": "CSV",
"format_options": [
{
"option_name": "include_header",
"option_value": "Lorem"
}
]
}
},
"mongo": {
"mongo_spark_connector_options": [
{
"option_name": "format",
"option_value": "Lorem"
}
]
},
"table": {
"name": "tbl_employees",
"jdbc_url": "orcl#thin:XXX",
"jdbc_library_jar": "orcl.jar",
"schema": "xxxxx",
"info_date_from_column_name": "ASOF_DATE",
"info_date_to_column_name": "END_DATE",
"primary_key_column_name": [
"Lorem"
]
},
"message": {
"topic_name": "XXXXXX",
"group_id": "XXXXX",
"port": 8081,
"kafka_cluster_ip": "1.1.1.1",
"kafka_cluster_schema_registry": "1.1.1.1"
}
}
]
}
cant build validation for simple case:
if sources field contains "OTHER" in values then "sourceOtherDescription" must be required.
Shall pass validation
{
"sources": ["RENTS"]
}
{
"sources": ["RENTS", "OTHER"],
"sourceOtherDescription": "other income"
}
This should not pass validation since sources contains "OTHER"
{
"sources": ["RENTS", "OTHER"]
}
The schema that I was able to produce. Does not really work
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "money-sources",
"title": "Money Sources",
"description": "Money Sources definitions",
"type": "object",
"required": ["sources"],
"properties": {
"sources": {
"type": "array",
"items": {
"type": "string",
"enum": [
"RENTS",
"MEMBER_FEES",
"PROFIT",
"SALES_SECURITIES",
"INTERNAL_GROUP_TRANSFERS",
"OTHER"
]
},
"uniqueItems": true
},
"sourceOtherDescription": { "type": "string", "minLength": 3}
},
"additionalProperties": false,
"oneOf": [
{
"properties": {
"sources": {
"type": "array",
"contains": {"const": "OTHER"}
}
},
"required": ["sourceOtherDescription"]
},
{
"properties": {
"sources": {
"type": "array",
"contains": {
"enum": [
"RENTS",
"MEMBER_FEES",
"PROFIT",
"SALES_SECURITIES"
]
}
}
}
}
, false
]
}
Using if-then it works for me this way:
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "money-sources",
"title": "Money Sources",
"description": "Money Sources definitions",
"type": "object",
"required": [ "sources" ],
"properties": {
"sources": {
"type": "array",
"items": {
"type": "string",
"enum": [
"RENTS",
"MEMBER_FEES",
"PROFIT",
"SALES_SECURITIES",
"INTERNAL_GROUP_TRANSFERS",
"OTHER"
]
},
"uniqueItems": true
},
"sourceOtherDescription": {
"type": "string",
"minLength": 3
}
},
"additionalProperties": false,
"if": {
"properties": {
"sources": {
"type": "array",
"contains": {
"const": "OTHER"
}
}
}
},
"then": {
"required": [ "sourceOtherDescription" ]
}
}
I have following jsonschema:
{
"$schema": "http://json-schema.org/schema#",
"type": "object",
"properties": {
"abc": {
"type": "array",
"item": {
"type": "object",
"minItems": 1,
"properties": {
"a" : {"type": "string"},
"b" : {"type": "string"}
},
"required": [ "a", "b" ]
}
}
},
"required": [ "abc" ]
}
If I pass to validator following data:
{
"abc": [
{
},
{
}
]
}
validator will output no error, but such data incorrect.
You used item rather than items.
Additionally, "minItems": 1 needs to be moved up to the parent object.
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"abc": {
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"properties": {
"a": {
"type": "string"
},
"b": {
"type": "string"
}
},
"required": [
"a",
"b"
]
}
}
},
"required": [
"abc"
]
}
Checked and validated using https://jsonschema.dev
I have created a JSON schema following the draft v3 specifications. Schema looks like this:
{
"$schema": "http://json-schema.org/draft-03/schema#",
"additionalProperties": false,
"type": "object",
"properties": {
"ExecutionPlanList": {
"type": "array",
"items": [{
"type": "object",
"properties": {
"model": {
"required": true,
"properties": {
"featureList": {
"required": true,
"items": {
"properties": {
"featureName": {
"type": ["string", "null"]
},
"featureType": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"modelId": {
"required": true,
"type": "string"
}
},
"type": "object"
},
"cascadeSteps": {
"required": false,
"items": {
"properties": {
"binaryModel": {
"$ref": "#/properties/ExecutionPlanList/items/properties/model",
"required": true
},
"threshold": {
"required": true,
"default": "0.0",
"maximum": 100.0,
"type": "number"
},
"backupModel": {
"$ref": "#/properties/ExecutionPlanList/items/properties/model",
"required": true
}
}
},
"type": "array"
},
"marketplaceId": {
"required": true,
"type": "integer"
}
}
}]
}
},
"required": true
}
Essentially, executionPlanList contains list of model and cascadeStep, and each cascadeStep contains two models with a number. So I'm trying to re-use the schema for model in cascadeStep, but validation (https://www.jsonschemavalidator.net/) is failing with Could not resolve schema reference '#/properties/ExecutionPlanList/items/properties/model'.
Would appreciate any pointers on what's wrong with this schema.
what about adding a 'definitions' and refer like this:
{
"$schema": "http://json-schema.org/draft-03/schema#",
"additionalProperties": false,
"type": "object",
"definitions": {
"model": {
"required": true,
"properties": {
"featureList": {
"required": true,
"items": {
"properties": {
"featureName": {
"type": ["string", "null"]
},
"featureType": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"modelId": {
"required": true,
"type": "string"
}
},
"type": "object"
}
},
"properties": {
"ExecutionPlanList": {
"type": "array",
"items": [{
"type": "object",
"properties": {
"model": {
"$ref" : "#/definitions/model"
},
"cascadeSteps": {
"required": false,
"items": {
"properties": {
"binaryModel": {
"$ref" : "#/definitions/model",
"required": true
},
"threshold": {
"required": true,
"default": "0.0",
"maximum": 100.0,
"type": "number"
},
"backupModel": {
"$ref" : "#/definitions/model",
"required": true
}
}
},
"type": "array"
},
"marketplaceId": {
"required": true,
"type": "integer"
}
}
}]
}
},
"required": true
}
It should be '#/properties/ExecutionPlanList/items/0/properties/model'
The schema validates only the first item, by the way.
I have two possible JSON objects for one request:
{
"from": "string",
"to": "string",
"text": "string"
}
or
{
"number": "integer",
"text": "string"
}
In both cases "text" property is optional. Other properties are required (either "number, or both "from" and "to").
What will be the correct JSON schema to validate this?
Here is another solution that I think is a bit more clear. The dependencies clause ensures that "from" and "to" always come as a pair. Then the oneOf clause can be really simple and avoid the not-required boilerplate.
{
"type": "object",
"properties": {
"from": { "type": "string" },
"to": { "type": "string" },
"number": { "type": "integer" },
"text": { "type": "string" }
},
"dependencies": {
"from": ["to"],
"to": ["from"]
},
"oneOf": [
{ "required": ["from"] },
{ "required": ["number"] }
]
}
Finally managed to build the correct scheme.
{
"definitions": {
"interval": {
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
},
"text": {
"type": "string"
}
},
"required": ["from", "to"],
"not": {
"required": ["number"]
}
},
"top": {
"type": "object",
"properties": {
"number": {
"type": "integer"
},
"text": {
"type": "string"
}
},
"required": ["number"],
"allOf": [
{
"not": {
"required": ["from"]
}
},
{
"not": {
"required": ["to"]
}
}
]
}
},
"type": "object",
"oneOf": [
{"$ref": "#/definitions/interval"},
{"$ref": "#/definitions/top"}
]
}