Export BigQuery table schema to JSON Schema - google-bigquery

It is possible to export a bigquery table schema to a JSON file but the resulting JSON file is a bigquery table schema and not a JSON schema.
I am looking for a way to generate a JSON schema using a bigquery table based on the standard available here: https://json-schema.org/
This looks something like this:
{
"definitions": {},
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://example.com/root.json",
"type": "object",
"title": "The Root Schema",
"required": [
"glossary"
],
"properties": {
"glossary": {
"$id": "#/properties/glossary",
"type": "object",
"title": "The Glossary Schema",
"required": [
"title",
"GlossDiv"
],
"properties": {
"title": {
"$id": "#/properties/glossary/properties/title",
"type": "string",
"title": "The Title Schema",
"default": "",
"examples": [
"example glossary"
],
"pattern": "^(.*)$"
},
"GlossDiv": {
"$id": "#/properties/glossary/properties/GlossDiv",
"type": "object",
"title": "The Glossdiv Schema",
"required": [
"title",
"GlossList"
],
"properties": {
"title": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/title",
"type": "string",
"title": "The Title Schema",
"default": "",
"examples": [
"S"
],
"pattern": "^(.*)$"
},
"GlossList": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList",
"type": "object",
"title": "The Glosslist Schema",
"required": [
"GlossEntry"
],
"properties": {
"GlossEntry": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry",
"type": "object",
"title": "The Glossentry Schema",
"required": [
"ID",
"SortAs",
"GlossTerm",
"Acronym",
"Abbrev",
"GlossDef",
"GlossSee"
],
"properties": {
"ID": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/ID",
"type": "string",
"title": "The Id Schema",
"default": "",
"examples": [
"SGML"
],
"pattern": "^(.*)$"
},
"SortAs": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/SortAs",
"type": "string",
"title": "The Sortas Schema",
"default": "",
"examples": [
"SGML"
],
"pattern": "^(.*)$"
},
"GlossTerm": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/GlossTerm",
"type": "string",
"title": "The Glossterm Schema",
"default": "",
"examples": [
"Standard Generalized Markup Language"
],
"pattern": "^(.*)$"
},
"Acronym": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/Acronym",
"type": "string",
"title": "The Acronym Schema",
"default": "",
"examples": [
"SGML"
],
"pattern": "^(.*)$"
},
"Abbrev": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/Abbrev",
"type": "string",
"title": "The Abbrev Schema",
"default": "",
"examples": [
"ISO 8879:1986"
],
"pattern": "^(.*)$"
},
"GlossDef": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/GlossDef",
"type": "object",
"title": "The Glossdef Schema",
"required": [
"para",
"GlossSeeAlso"
],
"properties": {
"para": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/GlossDef/properties/para",
"type": "string",
"title": "The Para Schema",
"default": "",
"examples": [
"A meta-markup language, used to create markup languages such as DocBook."
],
"pattern": "^(.*)$"
},
"GlossSeeAlso": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/GlossDef/properties/GlossSeeAlso",
"type": "array",
"title": "The Glossseealso Schema",
"items": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/GlossDef/properties/GlossSeeAlso/items",
"type": "string",
"title": "The Items Schema",
"default": "",
"examples": [
"GML",
"XML"
],
"pattern": "^(.*)$"
}
}
}
},
"GlossSee": {
"$id": "#/properties/glossary/properties/GlossDiv/properties/GlossList/properties/GlossEntry/properties/GlossSee",
"type": "string",
"title": "The Glosssee Schema",
"default": "",
"examples": [
"markup"
],
"pattern": "^(.*)$"
}
}
}
}
}
}
}
}
}
}
}

BigQuery does not use the json-schema standard for the tables schema. I found two projects that have the code available to go from json-schema to BigQuery schema:
jsonschema-bigquery
jsonschema-transpiler
You could try using those projects as reference to create the opposite transformation. Also, you could create a feature request to the BigQuery team, asking to include the json-schema standard as an output format option.

No this is not possible without writing a program to do so for you.
There is a feature request made by me that requests this functionality.
https://issuetracker.google.com/issues/145308573

Related

Concatenate/ build Json objects From PostgresSQl database

Im trying to build Json array from data existing in a database.
I shall build a Json file that shall match the following Json-file with PostgresSQL.
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Student iformation",
"type": "object",
"required": [
"student",
"name",
"login",
"program",
"branch",
"finished",
],
"properties": {
"student": {
"type": "string",
"minLength": 10,
"maxLength": 10,
"title": "A national identification number, 10 digits"
},
"name": {
"type": "string",
"title": "The name of the student"
},
"login": {
"type": "string",
"title": "The univerity issued computer login"
},
"program": {
"type": "string",
},
"branch": {
"anyOf":[{"type": "string"},{"type": "null"}],
},
"finished": {
"type": "array",
"title": "A list of read courses",
"items": {
"type": "object",
"required": [
"course",
"code",
"credits",
"grade"
],
"properties": {
"course": {
"type": "string",
"title": "Course name"
},
"code": {
"type": "string",
"minLength": 6,
"maxLength": 6,
"title": "Course code"
},
"credits": {
"type": "number",
"title": "Academic credits"
},
"grade": {
"enum" : ["U", "3", "4", "5"]
}
}
}
}
I have tried to do the following to get a better understanding of how to concatenate, build and arrange data that exists in the database:
SELECT array_to_json(array_agg(row_to_json(t))) FROM (
SELECT idnr, name, login, program from students) t;
and
select json_build_object('properties',
json_build_object('student',
json_build_object('idnr',idnr),'name',
json_build_object('name',name),'login',
json_build_object('login',login),'program',
json_build_object('program',program),'branch',
json_build_object('branch',branch)))
from Basicinformation;
How do I build, concatenate objects with PostgresSQL?

How to display json schema using Dataweave in Mule 4?

in Mule 4 I just want to display the JSONschema using DataWeave but I get an error for reference ids or any field started with '$' in JSON schema. The mime/type is application/json. The goal is to display the schema, I'll appreciate any suggestions. Thanks!
SAMPLE SCHEMA
{
"definitions": {},
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://example.com/example.json",
"type": "object",
"title": "The Root Schema",
"properties": {
"checked": {
"$id": "/properties/checked",
"type": "boolean",
"title": "The Checked Schema",
"default": false,
"examples": [
false
]
},
"dimensions": {
"$id": "/properties/dimensions",
"type": "object",
"title": "The Dimensions Schema",
"required": [
"width",
"height"
],
"properties": {
"width": {
"$id": "/properties/dimensions/properties/width",
"type": "integer",
"title": "The Width Schema",
"default": 0,
"examples": [
5
]
},
"height": {
"$id": "/properties/dimensions/properties/height",
"type": "integer",
"title": "The Height Schema",
"default": 0,
"examples": [
10
]
}
}
},
"id": {
"$id": "/properties/id",
"type": "integer",
"title": "The Id Schema",
"default": 0,
"examples": [
1
]
}
}
}
Use blackslash \ before $, this will print your schema with $ in column names. Unfortuanely, $ as a prefix is not treated as part of string in Mule
Example
"\$id": "/properties/dimensions/properties/width"
will print
"$id": "/properties/dimensions/properties/width"

Content of an property dependent on another property value

I have the following schema, which "works", but does not enforce all the rules required of it.
I get JSON with a series of questions that have a templateType and data properties. There are different templates for each type, and the type must fit the template (or the client doesn't know how to layout the data).
The schema validates the templateType as an enum, and that the data fits one of the templates, but there's no correlation between the type and data structure (e.g. I could get templateType yesNo and data structure for multiSelect).
I'd like it to validate that the templateType matches the data structure. I can't change the format of the generated JSON, only the schema that validates it. None of the questions I've looked at seem to provide a solution.
For help, the schema can be pasted into the editor at http://jeremydorn.com/json-editor/, which generates a form from the schema and JSON data based on selections and data entered into the form.
{
"definitions": {
"question": {
"type": "array",
"title": "Question",
"items": {
"$ref": "#/definitions/template"
}
},
"template": {
"type": "object",
"title": "Question template",
"required": ["templateType","data"],
"properties": {
"templateType": {
"type": "string",
"enum": ["yesNo","multiSelect"]
},
"data": {
"oneOf": [
{"$ref": "#/definitions/yesNo"},
{"$ref": "#/definitions/multiSelect"}
]
}
}
},
"yesNo": {
"type": "object",
"title": "Yes/No question",
"additionalProperties": false,
"properties": {
"label": {
"type": "string"
}
}
},
"multiSelect": {
"type": "array",
"title": "Multi-select question",
"items": {
"type": "string",
"title": "Label for option",
"additionalProperties": false
}
}
},
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"items": {
"$ref": "#/definitions/question"
}
}
Have you considered using if, then, else keywords in your schema? They're part of JSON Schema draft-07
It would look like this:
{
"definitions": {
"question": {
"type": "array",
"title": "Question",
"items": {
"$ref": "#/definitions/template"
}
},
"template": {
"type": "object",
"title": "Question template",
"required": ["templateType","data"],
"properties": {
"templateType": {
"type": "string",
"enum": ["yesNo","multiSelect"]
},
"data": {
"if": { "properties": { "templateType": { "pattern": "^yesNo$" } } },
"then": { "$ref": "#/definitions/yesNo" },
"else": { "$ref": "#/definitions/multiSelect" }
}
}
},
"yesNo": {
"type": "object",
"title": "Yes/No question",
"additionalProperties": false,
"properties": {
"label": {
"type": "string"
}
}
},
"multiSelect": {
"type": "array",
"title": "Multi-select question",
"items": {
"type": "string",
"title": "Label for option",
"additionalProperties": false
}
}
},
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"items": {
"$ref": "#/definitions/question"
}
}
If if/then/else isn't supported by your validator, an alternative could be:
{
"definitions": {
"question": {
"type": "array",
"title": "Question",
"items": {
"$ref": "#/definitions/template"
}
},
"template": {
"type": "object",
"title": "Question template",
"required": ["templateType","data"],
"anyOf": [
{
"properties": {
"templateType": { "type": "string", "pattern": "yesNo" },
"data": { "$ref": "#/definitions/yesNo" }
}
},
{
"properties": {
"templateType": { "type": "string", "pattern": "multiSelect" },
"data": { "$ref": "#/definitions/multiSelect" }
}
}
]
},
"yesNo": {
"type": "object",
"title": "Yes/No question",
"additionalProperties": false,
"properties": {
"label": {
"type": "string"
}
}
},
"multiSelect": {
"type": "array",
"title": "Multi-select question",
"items": {
"type": "string",
"title": "Label for option",
"additionalProperties": false
}
}
},
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"items": {
"$ref": "#/definitions/question"
}
}

Resolving error: returned "Output field used as input"

I'm trying to create a BigQuery table using Python. Other operations (queries, retrieving table bodies etc.) are working fine, but when trying to create a table I'm stuck with an error:
apiclient.errors.HttpError: https://www.googleapis.com/bigquery/v2/projects/marechal-consolidation/datasets/marechal_results/tables?alt=json
returned "Output field used as input">
Here's the command I'm executing:
projectId = 'xxxx'
dataSet = 'marechal_results'
with open(filePath+'tableStructure.json') as data_file:
structure = json.load(data_file)
table_result = tables.insert(projectId=projectId, datasetId=dataSet, body=structure).execute()
JSON table:
{
"kind": "bigquery#table",
"tableReference": {
"projectId": "xxxx",
"tableId": "xxxx",
"datasetId": "xxxx"
},
"type": "table",
"schema": {
"fields": [
{
"mode": "REQUIRED",
"type": "STRING",
"description": "Company",
"name": "COMPANY"
},
{
"mode": "REQUIRED",
"type": "STRING",
"description": "Currency",
"name": "CURRENCY"
}
// bunch of other fields follow...
]
}
}
Why am I receiving this error?
EDIT: Here's the JSON object I'm passing as parameter:
{
"kind": "bigquery#table",
"type": "TABLE",
"tableReference": {
"projectId": "xxxx",
"tableId": "xxxx",
"datasetId": "xxxx"
},
"schema": {
"fields": [
{
"type": "STRING",
"name": "COMPANY"
},
{
"type": "STRING",
"name": "YEAR"
},
{
"type": "STRING",
"name": "COUNTRY_ISO"
},
{
"type": "STRING",
"name": "COUNTRY"
},
{
"type": "STRING",
"name": "COUNTRY_GROUP"
},
{
"type": "STRING",
"name": "REGION"
},
{
"type": "STRING",
"name": "AREA"
},
{
"type": "STRING",
"name": "BU"
},
{
"type": "STRING",
"name": "REFERENCE"
},
{
"type": "FLOAT",
"name": "QUANTITY"
},
{
"type": "FLOAT",
"name": "NET_SALES"
},
{
"type": "FLOAT",
"name": "GROSS_SALES"
},
{
"type": "STRING",
"name": "FAM_GRP"
},
{
"type": "STRING",
"name": "FAMILY"
},
{
"type": "STRING",
"name": "PRESENTATION"
},
{
"type": "STRING",
"name": "ORIG_FAMILY"
},
{
"type": "FLOAT",
"name": "REF_PRICE"
},
{
"type": "STRING",
"name": "CODE1"
},
{
"type": "STRING",
"name": "CODE4"
}
]
}
}
This is probably too late to help you but hopefully it helps the next poor soul like me. It took me a while figure out what "Output field used as input" meant.
Though the API specifies the same object for the request (input) and response (output), some fields are only allowed in the response. In the docs you will see their descriptions prefixed with "Output only". From looking at your table definition I see that you have "type": "TABLE" and "type" is listed as an "Output only" property. So I would gander that if you remove it then that error will go away. Here is the link to the docs: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
It would help if they told you what field the violation was on.

Json schema dynamic key validation

Facing an issue with schema validation.
schema :
{
"type": "object",
"$schema": "http://json-schema.org/draft-03/schema",
"id": "#",
"required": true,
"patternProperties": {
"^[A-Z0-9._%+-]+#[A-Z0-9.-]+\.[A-Z]{2,6}$": {
"type": "object",
"required": true,
"properties": {
"_from": {
"id": "_from",
"type": "string",
"required": true
},
"message": {
"type": "object",
"id": "message",
"properties": {
"detail": {
"type": "string",
"id": "detail",
"required": true
},
"from": {
"type": "string",
"id": "from",
"required": true
}
}
}
}
}
}
}
json :
{
"tom#example.com": {
"_from": "giles#gmail.com",
"message": {
"from": "Giles#gmail.com",
"detail": "AnyonewanttomeetmeinParis"
}
},
"harry#example.com": {
"_from": "giles#gmail.com",
"message": {
"from": "Giles#gmail.com",
"detail": "AnyonewanttomeetmeinParis"
}
}
}
Here the key email address is dynamic, somehow it doesn't validate regex for email validation.
Can you please advise me to correct the schema.
I am validating using : http://json-schema-validator.herokuapp.com/index.jsp
I see in your pattern that you seem to have forgotten to escape some characters or didn't do it correctly:
"^[A-Z0-9._%+-]+#[A-Z0-9.-]+\.[A-Z]{2,6}$"
and it causes the error that you can see when you hover the mouse over the link at the top of the validator:
it should be:
"^[A-Z0-9\\._%\\+-]+#[A-Z0-9\\.-]+\\.[A-Z]{2,6}$"
or without escaping the inner/class characters but I'd use the first pattern because I think its intention is clearer:
"^[A-Z0-9._%+-]+#[A-Z0-9.-]+\\.[A-Z]{2,6}$"
You need to have two \ because the first \ is an escape for the second \. With a single one it wouldn't work because there is no escape sequence like \. or \+ in javascript. You want to have a \in the pattern itself.
However json schema patternProperties are case sensitive by default so you need to extend your email pattern by adding a-z to it:
"^[A-Za-z0-9\\._%\\+-]+#[A-Za-z0-9\\.-]+\\.[A-Za-z]{2,6}$"
(I didn't find any other way to make it case insensitive)
You also need to exclude any other property names by adding "additionalProperties": false next to the patternProperties or otherwise it catches everything else that does not match the pattern.
The working schema should then look like this:
{
"type": "object",
"$schema": "http://json-schema.org/draft-03/schema",
"id": "#",
"required": true,
"patternProperties": {
"^[A-Za-z0-9\\._%\\+-]+#[A-Za-z0-9\\.-]+\\.[A-Za-z]{2,6}$": {
"type": "object",
"required": true,
"properties": {
"_from": {
"id": "_from",
"type": "string",
"required": true
},
"message": {
"type": "object",
"id": "message",
"properties": {
"detail": {
"type": "string",
"id": "detail",
"required": true
},
"from": {
"type": "string",
"id": "from",
"required": true
}
}
}
}
}
},
"additionalProperties": false
}
I've tested it on: http://jsonschemalint.com/
Changed the schema as per draft 04 :
{
"type": "object",
"$schema": "http://json-schema.org/draft-04/schema",
"patternProperties": {
"^[A-Za-z0-9\\._%\\+-]+#[A-Za-z0-9\\.-]+\\.[A-Za-z]{2,6}$": {
"type": "object",
"properties": {
"__from": {
"type": "string"
},
"message": {
"type": "object",
"properties": {
"from": {
"type": "string"
},
"detail": {
"type": "string"
}
},
"required": [ "from","detail"]
}
},
"required": [ "__from","message"]
}
},
"additionalProperties": false
}