Add two json values dynamically in azure data factory - azure-data-factory-2

I need to add two json value which is coming dynamically from one activity and one variable value of pipeline in data factory.
I am doing it like this as below.
#union(activity('Get Order Events Data').output, json('{"orig_orderID" : "variables('orderid')"}'))
But it is showing error.
Missing comma between arguments
What i am doing wrong here.

But it is showing error. Missing comma between arguments
This is the expression variables('orderid') has ' in it which splits your expression.
You should use concat() function to do this #union(activity('Get Order Events Data').output, json(concat('{"orig_orderID" :',variables('orderid'),'}'))). But this
expression can't get your expected result due to it wouldn't add in your data. It would be like this:
{
"data": [
{
"id": 145,
"order_id": 256,
"created_at": "2021-06-20T11:48:20Z",
"type": 10,
"sender": -1,
"message": null,
"previous_status": 4,
"fas_user_id": null,
"event_data": "5",
"shopkeeper_timestamp": null,
"store_id": 123
}
],
"orig_orderID": "860"
}
You can try the following expression:#union(activity('Get Order Events Data').output.data[0], json(concat('{"orig_orderID" :',variables('orderid'),'}')))
it can get the result:
{
"id": 145,
"order_id": 256,
"created_at": "2021-06-20T11:48:20Z",
"type": 10,
"sender": -1,
"message": null,
"previous_status": 4,
"fas_user_id": null,
"event_data": "5",
"shopkeeper_timestamp": null,
"store_id": 123,
"orig_orderID": "860"
}

Related

Azure Data Factory JSON syntax

In Azure Data Factory, I have a copy activity. The data source is the response body from a REST API POST request.
The sink is a SQL table. The problem is that, even though my JSON data contains multiple rows, only the first row is getting copied.
The source data looks like the following:
{
"offset": 0,
"limit": 1000,
"total": 65,
"loaded": 34,
"unloaded": 31,
"cubeCaches": [
{
"id": "MxMUVDN0Q1MzAk5MDg6RDkxREQxMUU5RDBDNzR2NMTk6YWNsZGxwMTJtc3QuY2952aXppZW50aW5==",
"projectId": "15D91DD11E9D0C74B3319",
"source": {
"name": "12302021",
"id": "07EF95111EC7F954158",
"type": "cube"
},
"state": {
"active": true,
"dirty": false,
"infoDirty": false,
"persisted": true,
"processing": false,
"loadedState": "loaded"
},
"lastUpdateTime": "2022-01-24T14:22:30Z",
"lastHitTime": "2022-02-14T20:02:02Z",
"hitCount": 1,
"size": 798720,
"creatorId": "D4E8BFD56085",
"lastUpdateJob": 18937,
"openViewCount": 0,
"creationTime": "2022-01-24T15:07:24Z",
"historicHitCount": 22,
"dataLanguages": [],
"rowCount": 2726,
"columnCount": 9
},
{
"id": "UYwMTIxMUFNjkxMUU5RDBDMTRCNkMwMDgwRUYzNUQ0MUI6YWNsZjLmNvbQ==",
"projectId": "120D0C1480EF35D41B",
"source": {
"name": "All Clients (YTD)",
"id": "49E5B13466251CD0B54E8F",
"type": "cube"
},
"state": {
"active": true,
"dirty": false,
"infoDirty": false,
"persisted": true,
"processing": false,
"loadedState": "loaded"
},
"lastUpdateTime": "2022-01-03T01:00:01Z",
"hitCount": 0,
"size": 82488152,
"creatorId": "1E2AFB011E80EF35FF14",
"lastUpdateJob": 364091,
"openViewCount": 0,
"creationTime": "2022-02-14T01:04:55Z",
"historicHitCount": 0,
"dataLanguages": [],
"rowCount": 8146903,
"columnCount": 13
}
}
I want to add a row in the Sink table (SQL) for every "id" in the JSON. However, when I run the activity, only the first record gets copied. It's mapped correctly, but I want it to copy all rows in the JSON, not just 1.
My Mapping tab in Azure Data Factory looks like this:
What am I doing wrong here? I'm thinking there is something wrong with my "Source" syntax for each of the columns...
In $cubeCashes[0][...] you're explicitly mapping the first element from this array into columns, and that's why only one row lands in the Sink.
I don’t know a way to achieve what you intend with copy activity only. I would use the Mapping Data Flow here, and inlide I would flatten (Flatten activity) your data to get the array of objects.
Then from this flattened dataset you could use a Derived Column to map the fields in JSON into columns of your target, Select, to remove unwanted original fields, and Sink it into your target location.

How can I parse through an array of JSON objects with a SQL query?

I am trying to create a SQL query to gather information from a column in a database called "CarOptions". This column is an array that contains 1 or more JSON objects. Below is an example of the array.
I want to only grab the values of the name and the price. Could any provide me a query that can formulate a column with the name and price so that it would look like the example below or any readable format?
"Clear Guard 89500, Tint 0"
[
{
"id": 5,
"name": "Clear Guard",
"type": "ANY",
"grouping": "PREFER",
"price": 89500,
"oemOffering": false,
"learnMoreUrl": null,
"pricePercent": null,
"optionGroupId": 2,
"percentSource": null
},
{
"id": 119600,
"name": "Tint (Lifetime Warranty)",
"type": "NEW",
"grouping": "PREFER",
"price": 0,
"oemOffering": false,
"learnMoreUrl": null,
"pricePercent": null,
"optionGroupId": 18,
"percentSource": null
}
]
you can use openJson to pull the data out. Note you don't state your database, this is for SqlServer.
A very quick hacky example:
declare #json varchar(max)='[ { "id": 5, "name": "Clear Guard", "type": "ANY", "grouping": "PREFER", "price": 89500, "oemOffering": false, "learnMoreUrl": null, "pricePercent": null, "optionGroupId": 2, "percentSource": null }, { "id": 119600, "name": "Tint (Lifetime Warranty)", "type": "NEW", "grouping": "PREFER", "price": 0, "oemOffering": false, "learnMoreUrl": null, "pricePercent": null, "optionGroupId": 18, "percentSource": null } ]'
select j.[key] Id, x.[key], x.[value]
from OpenJson(#json)j
outer apply (
select [key],[value]
from OpenJson(value)
where [key] in ('name','price')
)x
Id key value
---- ---------- -------------------------
0 name Clear Guard
0 price 89500
1 name Tint (Lifetime Warranty)
1 price 0
(4 rows affected)

How to insert an Array that contains Objects into one column

I have an existing database that has an important column that's called InDays with nvarchar(150) datatype.
In the existing data there's an Array that has an Object inside and looks like that:
InDays
----------------------------------------------------------------------------------------------
[{ "day": 1, "from": "12:00am", "to": "2:00am"},{ "day": 4, "from": "2:00am", "to": "4:00am"}]
The Objects inside can be more than one.
I tried inserting it as it is, but i get [object Object] instead of the value.
EDIT--
The insert code.
DECLARE #InDays nvarchar(150) = [{ "day": 1, "from": "12:00am", "to": "2:00am"},{ "day": 4, "from": "2:00am", "to": "4:00am"}]
INSERT INTO Course (
InDays
)
VALUES
(
#InDays
)
I have ... an important column that's ... nvarchar(150) datatype.
So use that type with your insert:
DECLARE #InDays nvarchar(150) = '[{ "day": 1, "from": "12:00am", "to": "2:00am"},{ "day": 4, "from": "2:00am", "to": "4:00am"}]'
Though I have my doubts 150 will be large enough if you could end up with many of these. Stepping through just the first object, assuming it's typical, you'll run out of space already at just the 5th member of the array.
For future explorers
I simply had to convert the JSON format into a String.
Which is done by JSON.stringify([{"value": value}]) then directly store it into the sql database
Then JSON.parse("value") to convert it to JSON again.

bigquery nested object : No such field

I have a table with this schema :
I'm trying to upload some data from Google Coud Storage using the python client. The file is JSON newline delimited. Most of my lines don't have the field "passenger_origin.accuracy" but when the filed is present I have the following error :
Error while reading
data, error message: JSON parsing error in row starting at position
2122510: No such field: driver_origin.accuracy. (error code: invalid)
Error while reading
data, error message: JSON parsing error in row starting at position
2126317: No such field: passenger_origin.accuracy. (error code:
invalid)
Example of an invalid row :
{
"id": 1479443,
"is_obsolete": 0,
"seat_count": 1,
"is_ticket_checked": 0,
"score": 0.3709318902,
"is_multimodal": 0,
"fake_paths": 0,
"passenger_origin": {
"id": 2204,
"poi_uuid": "15b4e52c-7c58-442c-98df-1eb06079f6bb",
"user_id": 1987,
"accuracy": 250.0,
"disabled": 0,
"last_update": "2017-03-10T15:15:39",
"created": "2016-02-05T17:06:26",
"modified_by_user": 1,
"is_recurrent": 0,
"source": 1,
"hidden_by_user": 0,
"kind": 2,
},
"driver_origin": {
"id": 412491,
"poi_uuid": "47e90b6d-e178-4e02-9f02-f4ea5f8beaa1",
"user_id": 71471,
"disabled": 0,
"last_update": "2017-11-02T10:09:09",
"created": "2017-11-02T10:09:09",
"modified_by_user": 0,
"is_recurrent": 0,
"source": 1,
"hidden_by_user": 0,
"kind": 2,
},
"passenger_destination": {
"id": 2203,
"poi_uuid": "c531c3ca-47f0-4003-8098-1272fee8d018",
"user_id": 1987,
"accuracy": 250.0,
"disabled": 0,
"last_update": "2017-03-10T15:12:42",
"created": "2016-02-05T17:06:19",
"modified_by_user": 1,
"is_recurrent": 0,
"source": 1,
"hidden_by_user": 0,
"kind": 1,
}
}
The table is created before the upload of the data and is not modified since. I don't understand why the upload is failing on theses fields ? Do the RECORD fields have to be REPEATED ?
To ignore the fields that aren't present in the schema, use a combination of:
configuration.load.ignoreUnknownValues
configuration.load.maxBadRecords
Setting the first to true and the second to some arbitrarily-high number, e.g. 100000, will enable the load to succeed even if there are extra fields.
The problem was configuration.load.autodetect was set to True. I set it to False and the problem was fix

Freebase search_api and excluding results by specified type

is anyone know, how to exclude some topics with specified type(s) using search api and mql?
For example i'm try to find all topics "Voodoo People", and exclude only those, that have composition and release types, and sort result by score desc: http://tinyurl.com/3tjkb7y.
Sorting work perfect, but i can't find functionality for excluding :(
I'm try to use mql_filter: http://tinyurl.com/644xkow, but releases still there.
And one more question: i see in type_strict param possible values: "all", "any", "should". But there is no value "not" or "not in". Is needed result can be obtained in any other way?
The syntax that you're looking for is "optional" : "forbidden". In your query that would look like this:
[{
"search": {
"query": "Voodoo People",
"score": null,
"mql_filter": [{
"type": {
"id": "/music/release",
"optional": "forbidden"
}
}]
},
"name": null,
"id": null,
"type": [],
"/common/topic/notable_for": {
},
"limit": 15,
"sort": "-search.score"
}]​