Background:I have a script that makes a daily API call for financial data, returns the data as a JSON object, saves it into a pandas df before doing some manipulation on the df and finally saving as a .csv before being uploaded into a system.
Issue:My script worked fine until recently when presumably new data entered the JSON object, and I now receive the following ValueError -
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/3701323957.py in <module>
13
14 if __name__ == "__main__":
---> 15 main()
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/3701323957.py in main()
1 # Function that writes Exceptions Report and API Response as a consolidated .xlsx file.
2 def main():
----> 3 financial_accounts_df = dataframe_transformation()
4
5 # Writing dataframe to .csv
C:\Users\JONNY~1.FOR\AppData\Local\Temp/ipykernel_17600/203167952.py in dataframe_transformation()
19 # Step 3 - remove the parent rows, leaving only children
20 rows_to_remove = financial_accounts_df['FinServ__SourceSystemId__c'].isin(financial_accounts_df['Addepar_Direct_Owner_ID__c'])
---> 21 financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
22 'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
23 financial_accounts_df = financial_accounts_df[~rows_to_remove]
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
714
715 iloc = self if self.name == "iloc" else self.obj.iloc
--> 716 iloc._setitem_with_indexer(indexer, value, self.name)
717
718 def _validate_key(self, key, axis: int):
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer(self, indexer, value, name)
1686 if take_split_path:
1687 # We have to operate column-wise
-> 1688 self._setitem_with_indexer_split_path(indexer, value, name)
1689 else:
1690 self._setitem_single_block(indexer, value, name)
~\.conda\envs\JPDevelopment\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
1741 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1742
-> 1743 raise ValueError(
1744 "Must have equal len keys and value "
1745 "when setting with an iterable"
ValueError: Must have equal len keys and value when setting with an iterable
The script: I have removed the functions that call the API and instead wrote a function that will load the Sample Datasets offered below and simulate the ValueError I am receiving. Note: not all the libraries dependencies are being used:
# Importing depedencies
from configparser import ConfigParser
import datetime as date
import datetime as dt
import datetime
from datetime import timedelta
from datetime import date
import itertools
import pandas as pd
from pandas import json_normalize
import requests as requests
from requests.auth import HTTPBasicAuth
import time
import json
import jsonpath_ng as jp
import enlighten
import numpy as np
# Function to read API response / JSON Object
def response_writer():
with open('test_not_working.json') as f:
api_response = json.load(f)
return api_response
# api_response = response_writer()
api_response = response_writer()
# Set an auto_id_field which appears later as "json_path" in pandas dataframe columns
jp.jsonpath.auto_id_field = 'json_path'
def unpack_response():
# Create a dataframe from JSON response
expr = jp.parse('$..children.[*].json_path')
data = [{'json_path': m.value, **m.datum.value} for m in expr.find(api_response)]
df = pd.json_normalize(data).sort_values('json_path', ignore_index=True)
# Append a portfolio column
df['portfolio'] = df.loc[df.json_path.str.contains(r'total\.children\.\[\d+]$'), 'name']
df['portfolio'].fillna(method='ffill', inplace=True)
# Mapping column headers appropriately from the JSON
trans = {'columns.' + c['key']: c['display_name'] for c in api_response['meta']['columns']}
cols = ['json_path', 'name', 'FinServ__SourceSystemId__c', 'Addepar_Direct_Owner_ID__c', 'FinServ__FinancialAccountNumber__c', 'FinServ__OpenDate__c', 'FinServ__CloseDate__c', 'Display_Name__c',
'JP_Custodian__c', 'Online_Status__c', 'Custodian_Account_Name__c', 'Management_Style__c', 'Portfolio_Type__c', 'Advisory_Firm__c', 'FinServ__Balance__c', 'Target_Cash__c', 'Target_Cash_Notes__c']
df = df.rename(columns=trans)[cols]
# Then renaming those that could be phrased better.
df.rename(columns={'name': 'Name'}, inplace=True)
return df
# Function that takes df and performs varios manipulation, before saving in dataframe
def dataframe_transformation():
# Calling function that returns both dataframes
financial_accounts_df = unpack_response()
# Limiting character length of Name column
financial_accounts_df['Name'] = financial_accounts_df['Name'].str[:80]
# Removing Directly Owned Rows
financial_accounts_df = financial_accounts_df[financial_accounts_df['Name'].str.contains("Directly Owned")==False]
# Changing 'Holding Account' name to 'Name'
financial_accounts_df = financial_accounts_df.rename(columns={'Holding Account': 'Name'})
# Creating RecordTypeID Column and setting initial value of '0123h000000FPFjAAO'
financial_accounts_df['RecordTypeID'] = '0123h000000FPFjAAO'
# Step 1 - Search for 'Addepar_Direct_Owner_ID__c' values in 'FinServ__SourceSystemId__c' column.
# Step 2 - for rows where Step 1 is true, mirror 'Addepar_Direct_Owner_ID__c' value to match.
# Step 3 - remove the parent rows, leaving only children
rows_to_remove = financial_accounts_df['FinServ__SourceSystemId__c'].isin(financial_accounts_df['Addepar_Direct_Owner_ID__c'])
financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
financial_accounts_df = financial_accounts_df[~rows_to_remove]
# Duplicating the 'Addepar_Direct_Owner_ID__c' column and renaming duplication 'FinServ__PrimaryOwner__c'
financial_accounts_df['FinServ__PrimaryOwner__c'] = financial_accounts_df['Addepar_Direct_Owner_ID__c']
# Moving position of newly created column to index position 3 for ease of comparison
first_column = financial_accounts_df.pop('FinServ__PrimaryOwner__c')
financial_accounts_df.insert(3, 'FinServ__PrimaryOwner__c', first_column)
# Dropping the first df column / json_path
financial_accounts_df = financial_accounts_df.iloc[: , 1:]
return financial_accounts_df
# Function that writes dataframe to csv file
def main():
financial_accounts_df = dataframe_transformation()
# Writing dataframe to .csv
timestr = datetime.datetime.now().strftime("%Y-%m-%d")
filename = 'financial_accounts_'+timestr+'.csv'
# financial_accounts_df.to_csv(filename, encoding='utf-8')
financial_accounts_df.to_csv(filename, index=False)
print(f' Filename:',filename)
if __name__ == "__main__":
main()
Challenges identifying issue:In theory the returned data will be similar and the only change will be additional rows of data entering the JSON object.What concerns me is that I can't think of a solution that would allow me to identify what row of data (7000 rows in total) is causing this issue / what row of data shows my code for what it is (flawed in someway).I appreciate the information I am providing is light, however does anyone have suggestions about how I can either isolate the offending row of data in the pandas df and/or have any idea how my code might be flawed and causing this issue?
Sample Data:Here are 2x samples, one that works with the function and the other that does not. I hope this helps assist me triangulate the issue -
Sample 1 (Working) - this runs through the function without issue:
{
"meta": {
"columns": [
{
"key": "node_id",
"display_name": "FinServ__SourceSystemId__c",
"output_type": "Word"
},
{
"key": "direct_owner_id",
"display_name": "Addepar_Direct_Owner_ID__c",
"output_type": "Word"
},
{
"key": "bottom_level_holding_account_number",
"display_name": "FinServ__FinancialAccountNumber__c",
"output_type": "Word"
},
{
"key": "_custom_account_open_date_425913",
"display_name": "FinServ__OpenDate__c",
"output_type": "Date"
},
{
"key": "_custom_close_date_411160",
"display_name": "FinServ__CloseDate__c",
"output_type": "Date"
},
{
"key": "display_name",
"display_name": "Display_Name__c",
"output_type": "Word"
},
{
"key": "_custom_jp_custodian_305769",
"display_name": "JP_Custodian__c",
"output_type": "Word"
},
{
"key": "online_status",
"display_name": "Online_Status__c",
"output_type": "Word"
},
{
"key": "_custom_custodian_account_name_487351",
"display_name": "Custodian_Account_Name__c",
"output_type": "Word"
},
{
"key": "_custom_management_style_295599",
"display_name": "Management_Style__c",
"output_type": "Word"
},
{
"key": "_custom_portfolio_type_295600",
"display_name": "Portfolio_Type__c",
"output_type": "Word"
},
{
"key": "_custom_advisor_302684",
"display_name": "Advisory_Firm__c",
"output_type": "Word"
},
{
"key": "_custom_test1_679151",
"display_name": "FinServ__Balance__c",
"output_type": "Number"
},
{
"key": "_custom_new_target_cash_balance_gwl_453547",
"display_name": "Target_Cash__c",
"output_type": "Number"
},
{
"key": "_custom_target_cash_notes_341522",
"display_name": "Target_Cash_Notes__c",
"output_type": "Word"
}
],
"groupings": [
{
"key": "holding_account",
"display_name": "Holding Account"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"online_status": null,
"_custom_test1_679151": null,
"direct_owner_id": null,
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": null,
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": null
},
"children": [
{
"entity_id": 10663945,
"name": "10 Laverockbank LLC Hold (668168788)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 5045.08,
"direct_owner_id": "10710095",
"_custom_account_open_date_425913": "2021-05-14",
"display_name": "10 Madison LLC Hold",
"_custom_custodian_account_name_487351": "10 MADISON LLC | &HOLDING | LLC",
"_custom_portfolio_type_295600": "Cash Management",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "668168788",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "Holding",
"_custom_target_cash_notes_341522": null,
"node_id": "10663945"
},
"children": []
}
]
}
}
},
"included": []
}
Sample 2 (ValueError) - this creates a ValueError and is an example of 1/2 dozen children that throw up the ValueError:
{
"meta": {
"columns": [
{
"key": "node_id",
"display_name": "FinServ__SourceSystemId__c",
"output_type": "Word"
},
{
"key": "direct_owner_id",
"display_name": "Addepar_Direct_Owner_ID__c",
"output_type": "Word"
},
{
"key": "bottom_level_holding_account_number",
"display_name": "FinServ__FinancialAccountNumber__c",
"output_type": "Word"
},
{
"key": "_custom_account_open_date_425913",
"display_name": "FinServ__OpenDate__c",
"output_type": "Date"
},
{
"key": "_custom_close_date_411160",
"display_name": "FinServ__CloseDate__c",
"output_type": "Date"
},
{
"key": "display_name",
"display_name": "Display_Name__c",
"output_type": "Word"
},
{
"key": "_custom_jp_custodian_305769",
"display_name": "JP_Custodian__c",
"output_type": "Word"
},
{
"key": "online_status",
"display_name": "Online_Status__c",
"output_type": "Word"
},
{
"key": "_custom_custodian_account_name_487351",
"display_name": "Custodian_Account_Name__c",
"output_type": "Word"
},
{
"key": "_custom_management_style_295599",
"display_name": "Management_Style__c",
"output_type": "Word"
},
{
"key": "_custom_portfolio_type_295600",
"display_name": "Portfolio_Type__c",
"output_type": "Word"
},
{
"key": "_custom_advisor_302684",
"display_name": "Advisory_Firm__c",
"output_type": "Word"
},
{
"key": "_custom_test1_679151",
"display_name": "FinServ__Balance__c",
"output_type": "Number"
},
{
"key": "_custom_new_target_cash_balance_gwl_453547",
"display_name": "Target_Cash__c",
"output_type": "Number"
},
{
"key": "_custom_target_cash_notes_341522",
"display_name": "Target_Cash_Notes__c",
"output_type": "Word"
}
],
"groupings": [
{
"key": "holding_account",
"display_name": "Holding Account"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"online_status": null,
"_custom_test1_679151": null,
"direct_owner_id": null,
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": null,
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": null
},
"children": [
{
"entity_id": 15425904,
"name": " WF Rev Tr US Eq 2 Wrapper (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Offline",
"_custom_test1_679151": 99.86,
"direct_owner_id": "7400472",
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": null,
"_custom_portfolio_type_295600": null,
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": null,
"_custom_jp_custodian_305769": null,
"_custom_management_style_295599": null,
"_custom_target_cash_notes_341522": null,
"node_id": "15425904"
},
"children": [
{
"entity_id": 13845019,
"name": "WF Rev Tr US Eq 2 (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 99.86,
"direct_owner_id": "15425904",
"_custom_account_open_date_425913": null,
"display_name": "WF Rev Tr US Eq 2",
"_custom_custodian_account_name_487351": "RYAN HUDSON REVOCABLE TRUST",
"_custom_portfolio_type_295600": "Core Portfolio: Liquid",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "US Core Tax-Loss Harvesting",
"_custom_target_cash_notes_341522": null,
"node_id": "13845019"
},
"children": []
},
{
"entity_id": 15425937,
"name": "WF Rev Tr US Eq 2 Non-Discretionary (E79508009)",
"grouping": "holding_account",
"columns": {
"online_status": "Offline",
"_custom_test1_679151": 0,
"direct_owner_id": "15425904",
"_custom_account_open_date_425913": null,
"display_name": null,
"_custom_custodian_account_name_487351": "E79508009",
"_custom_portfolio_type_295600": "L.I.F.E. Assets",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "E79508009",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "N/A - Client Directed",
"_custom_jp_custodian_305769": "",
"_custom_management_style_295599": "External",
"_custom_target_cash_notes_341522": null,
"node_id": "15425937"
},
"children": []
}
]
},
{
"entity_id": 10663945,
"name": "10 Laverockbank LLC Hold (668168788)",
"grouping": "holding_account",
"columns": {
"online_status": "Online",
"_custom_test1_679151": 5045.08,
"direct_owner_id": "10710095",
"_custom_account_open_date_425913": "2021-05-14",
"display_name": "10 Madison LLC Hold",
"_custom_custodian_account_name_487351": "10 MADISON LLC | &HOLDING | LLC",
"_custom_portfolio_type_295600": "Cash Management",
"_custom_close_date_411160": null,
"bottom_level_holding_account_number": "668168788",
"_custom_new_target_cash_balance_gwl_453547": null,
"_custom_advisor_302684": "Advisory Name",
"_custom_jp_custodian_305769": "Custodian Name",
"_custom_management_style_295599": "Holding",
"_custom_target_cash_notes_341522": null,
"node_id": "10663945"
},
"children": []
}
]
}
}
},
"included": []
}
After reading your code for about half-hour, it is too complex for the structure and the syntax, which is not a good coding habit for you.
I'll try to understand as much as possible what you want to do.
Here is some advice for you.
If the json data you have downloaded and it super large, json path is not a good way to parse it, this will consume a lot of memory, or, If it is from requests like an HTTP response, json path is ok, cause, usually the data returned is not so large.
Fixing the order of columns and renaming it is not a suitable way. Cause you can not make sure the order is not the same every time, especially for the key of a dict object. Not to mention the content returned from the server, which means you can not control the content.
For your question
financial_accounts_df.loc[financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values),
'Addepar_Direct_Owner_ID__c'] = financial_accounts_df.loc[rows_to_remove, 'Addepar_Direct_Owner_ID__c'].to_numpy()
The first index financial_accounts_df['Addepar_Direct_Owner_ID__c'].isin(financial_accounts_df['FinServ__SourceSystemId__c'].values) is [True, False, False, False]
And the second index, rows_to_remove, is [False, True, True, False]
It is the main reason for your value error exception.
And, I don't know how you will handle the nest children element, but you have to make sure the number of Ture for each index is equal.
I am trying to parse data from the following JSON. I know there are differences between JSON_VALUE and JSON_QUERY but I am trying to just get the path structure correct. I am using the STRICT option to validate my path's and so far other then string$ for JSON Query everything fails by not finding the path. As soon as I add the .data.taskData the path seems to blow up. Any help would be greatly appreciated.
I am setting the following JSON to #json
declare #json nvarchar(max)
SELECT JSON_VALUE(#json, 'strict$.data.taskData.startedLocation') as json
select JSON_QUERY(#json, 'strict$.data.taskData.startedLocation') as json
Below is the JSON I am trying to parse
{"data.taskData":{"startedAtUtc":"2019-08-28T20:21:29.025Z","startedLocation":{"lat":60.7348366,"lon":-124.9856841},"additionalData":[],"bols":[{"number":"1234","product":{"id":"COFFEE","description":"GROUND COFFE 5LB CAN","plannedQuantity":1352,"uom":"PCS","supplier":"WALMART ","accountOf":"","class":"UNKNOWN","loadedQuantity":6600,"netQuantity":9993},"net":"9993"}],"compartments":[{"id":"1","capacity":3400,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"1","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"1000","bol":"1234"}],"loadedQuantity":1000,"productID":"COFFEE"},{"id":"2","capacity":2000,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"2","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"2000","bol":"1234"}],"loadedQuantity":2000,"productID":"COFFEE"},{"id":"3","capacity":1100,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"3","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"1100","bol":"1234"}],"loadedQuantity":1100,"productID":"COFFEE"},{"id":"4","capacity":2700,"commodity":null,"consignee":null,"plannedQuantity":0,"tankID":"4","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"2500","bol":"1234"}],"loadedQuantity":2500,"productID":"COFFEE"}],"detention":{"minutes":null,"reasonCode":null,"notes":null},"initialCompartments":[{"id":"1","capacity":3400,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"1"},{"id":"2","capacity":2000,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"2"},{"id":"3","capacity":1100,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"3"},{"id":"4","capacity":2700,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"4"}],"loadingComplete":"yes","loadingCompleteTime":"2019-08-28T20:23:05.453Z","uom":{"key":"PCS","category":"volume","shortDisplay":"p","longDisplay":"Pieces","conversionFactors":{"gal":0.0625,"L":0.2365882365,"c":4.2267528377}},"variances":[],"completedAtUtc":"2019-08-28T20:23:06.703Z","completedLocation":{"lat":61.7348308,"lon":-124.9856879},"finalCompartments":[{"id":"1","capacity":3400,"productID":"COFFEE","loadedQuantity":1000,"consignee":"KSUAC","tankID":"1"},{"id":"2","capacity":2000,"productID":"COFFEE","loadedQuantity":2000,"consignee":"KSUAC","tankID":"2"},{"id":"3","capacity":1100,"productID":"COFFEE","loadedQuantity":1100,"consignee":"KSUAC","tankID":"3"},{"id":"4","capacity":2700,"productID":"COFFEE","loadedQuantity":2500,"consignee":null,"tankID":"4"}]}}
Try this out. After the code I'll comment about the specialities:
DECLARE #json NVARCHAR(MAX)=N'{
"data.taskData": {
"startedAtUtc": "2019-08-28T20:21:29.025Z",
"startedLocation": {
"lat": 60.7348366,
"lon": -124.9856841
},
"additionalData": [],
"bols": [
{
"number": "1234",
"product": {
"id": "COFFEE",
"description": "GROUND COFFE 5LB CAN",
"plannedQuantity": 1352,
"uom": "PCS",
"supplier": "WALMART ",
"accountOf": "",
"class": "UNKNOWN",
"loadedQuantity": 6600,
"netQuantity": 9993
},
"net": "9993"
}
],
"compartments": [
{
"id": "1",
"capacity": 3400,
"commodity": null,
"consignee": "KSUAC",
"plannedQuantity": 0,
"tankID": "1",
"additionalData": [],
"allLoadsValid": true,
"complete": true,
"error": false,
"locked": false,
"loads": [
{
"isFirst": true,
"quantity": "1000",
"bol": "1234"
}
],
"loadedQuantity": 1000,
"productID": "COFFEE"
},
{
"id": "2",
"capacity": 2000,
"commodity": null,
"consignee": "KSUAC",
"plannedQuantity": 0,
"tankID": "2",
"additionalData": [],
"allLoadsValid": true,
"complete": true,
"error": false,
"locked": false,
"loads": [
{
"isFirst": true,
"quantity": "2000",
"bol": "1234"
}
],
"loadedQuantity": 2000,
"productID": "COFFEE"
},
{
"id": "3",
"capacity": 1100,
"commodity": null,
"consignee": "KSUAC",
"plannedQuantity": 0,
"tankID": "3",
"additionalData": [],
"allLoadsValid": true,
"complete": true,
"error": false,
"locked": false,
"loads": [
{
"isFirst": true,
"quantity": "1100",
"bol": "1234"
}
],
"loadedQuantity": 1100,
"productID": "COFFEE"
},
{
"id": "4",
"capacity": 2700,
"commodity": null,
"consignee": null,
"plannedQuantity": 0,
"tankID": "4",
"additionalData": [],
"allLoadsValid": true,
"complete": true,
"error": false,
"locked": false,
"loads": [
{
"isFirst": true,
"quantity": "2500",
"bol": "1234"
}
],
"loadedQuantity": 2500,
"productID": "COFFEE"
}
],
"detention": {
"minutes": null,
"reasonCode": null,
"notes": null
},
"initialCompartments": [
{
"id": "1",
"capacity": 3400,
"commodity": null,
"consignee": null,
"plannedQuantity": null,
"tankID": "1"
},
{
"id": "2",
"capacity": 2000,
"commodity": null,
"consignee": null,
"plannedQuantity": null,
"tankID": "2"
},
{
"id": "3",
"capacity": 1100,
"commodity": null,
"consignee": null,
"plannedQuantity": null,
"tankID": "3"
},
{
"id": "4",
"capacity": 2700,
"commodity": null,
"consignee": null,
"plannedQuantity": null,
"tankID": "4"
}
],
"loadingComplete": "yes",
"loadingCompleteTime": "2019-08-28T20:23:05.453Z",
"uom": {
"key": "PCS",
"category": "volume",
"shortDisplay": "p",
"longDisplay": "Pieces",
"conversionFactors": {
"gal": 0.0625,
"L": 0.2365882365,
"c": 4.2267528377
}
},
"variances": [],
"completedAtUtc": "2019-08-28T20:23:06.703Z",
"completedLocation": {
"lat": 61.7348308,
"lon": -124.9856879
},
"finalCompartments": [
{
"id": "1",
"capacity": 3400,
"productID": "COFFEE",
"loadedQuantity": 1000,
"consignee": "KSUAC",
"tankID": "1"
},
{
"id": "2",
"capacity": 2000,
"productID": "COFFEE",
"loadedQuantity": 2000,
"consignee": "KSUAC",
"tankID": "2"
},
{
"id": "3",
"capacity": 1100,
"productID": "COFFEE",
"loadedQuantity": 1100,
"consignee": "KSUAC",
"tankID": "3"
},
{
"id": "4",
"capacity": 2700,
"productID": "COFFEE",
"loadedQuantity": 2500,
"consignee": null,
"tankID": "4"
}
]
}
}';
--The query
SELECT FirstLevel.StartedAtUtc
,JSON_VALUE(FirstLevel.startedLocation,'$.lat') AS startedLocation_Lat
,JSON_VALUE(FirstLevel.startedLocation,'$.lon') AS startedLocation_Lon
,FirstLevel.additionalData
,FirstLevel.bols
,Sub_Compartments.id
,Sub_Compartments.capacity
,FirstLevel.loadingComplete
FROM OPENJSON(#json,'$."data.taskData"')
WITH(startedAtUtc DATETIME2
,startedLocation NVARCHAR(MAX) AS JSON
,additionalData NVARCHAR(MAX) AS JSON
,bols NVARCHAR(MAX) AS JSON
--compartments seems to be a 1:n related node
,compartments NVARCHAR(MAX) AS JSON
,loadingComplete NVARCHAR(10)
--and more
) FirstLevel
OUTER APPLY OPENJSON(FirstLevel.compartments)
WITH (id INT
,capacity INT
--more columns
) Sub_Compartments;
The idea in short:
Your JSON is a fully blown deeply nested structure with various data. Returnin the whole and everything would lead to a very redundant flat table. It will be a good idea to query this JSON with a question as narrow as possible.
As you were told already we can use one of these:
JSON_VALUE() to retrieve a scalar value from a given path. Nested JSON will be returned as a string type.
JSON_QUERY() to extract a fragment of the JSON and proceed with it as JSON
OPENJSON is needed to dive into a JSON with repeated elements in order to return the fragments as a derived set row-by-row. Very important is the WITH-clause, which allows to change OPENJSON's default output to a side-by-side of columns (similar to PIVOT).
In this case we can use OPENJSON to dive into the first level and return the objects found there. Some of them are scalar values and can be returned as a typed value, others are JSON objects. In this case we have to use NVARCHAR(MAX) as data type and we must sepcify AS JSON in order to proceed with the return values.
In your JSON the compartments are a 1:n related set. We can use a cascade of OPENJSON calls, using the fragment returned by the first as input and use another WITH-clause to extract the compartement's columns.
I hope, that this exampe will give you enough hints, that you can query any place of your JSON. Good luck!
This works:
select JSON_QUERY(#json, 'strict$."data.taskData".startedLocation') as json
SELECT JSON_VALUE(#json, 'strict$."data.taskData".startedLocation.lat') as json
You need to consider the following:
When you want to extract JSON object or scalar value and your path begins with a dollar sign $ or has special characters in the keys, you need to surround it with quotes ""
Function JSON_QUERY extracts an object or an array from a JSON string. If the value is not an object or an array, the result is NULL in lax mode and an error in strict mode.
Function JSON_VALUE extracts a scalar value from a JSON string. If the path points to not a scalar value, the result is NULL in lax mode and an error in strict mode
When you want to parse JSON string and get results as table, use OPENJSON table-valued function.
With your sample data, you may try the following example:
DECLARE #json nvarchar(max) = N'{"data.taskData":{"startedAtUtc":"2019-08-28T20:21:29.025Z","startedLocation":{"lat":60.7348366,"lon":-124.9856841},"additionalData":[],"bols":[{"number":"1234","product":{"id":"COFFEE","description":"GROUND COFFE 5LB CAN","plannedQuantity":1352,"uom":"PCS","supplier":"WALMART ","accountOf":"","class":"UNKNOWN","loadedQuantity":6600,"netQuantity":9993},"net":"9993"}],"compartments":[{"id":"1","capacity":3400,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"1","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"1000","bol":"1234"}],"loadedQuantity":1000,"productID":"COFFEE"},{"id":"2","capacity":2000,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"2","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"2000","bol":"1234"}],"loadedQuantity":2000,"productID":"COFFEE"},{"id":"3","capacity":1100,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"3","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"1100","bol":"1234"}],"loadedQuantity":1100,"productID":"COFFEE"},{"id":"4","capacity":2700,"commodity":null,"consignee":null,"plannedQuantity":0,"tankID":"4","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"2500","bol":"1234"}],"loadedQuantity":2500,"productID":"COFFEE"}],"detention":{"minutes":null,"reasonCode":null,"notes":null},"initialCompartments":[{"id":"1","capacity":3400,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"1"},{"id":"2","capacity":2000,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"2"},{"id":"3","capacity":1100,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"3"},{"id":"4","capacity":2700,"commodity":null,"consignee":null,"plannedQuantity":null,"tankID":"4"}],"loadingComplete":"yes","loadingCompleteTime":"2019-08-28T20:23:05.453Z","uom":{"key":"PCS","category":"volume","shortDisplay":"p","longDisplay":"Pieces","conversionFactors":{"gal":0.0625,"L":0.2365882365,"c":4.2267528377}},"variances":[],"completedAtUtc":"2019-08-28T20:23:06.703Z","completedLocation":{"lat":61.7348308,"lon":-124.9856879},"finalCompartments":[{"id":"1","capacity":3400,"productID":"COFFEE","loadedQuantity":1000,"consignee":"KSUAC","tankID":"1"},{"id":"2","capacity":2000,"productID":"COFFEE","loadedQuantity":2000,"consignee":"KSUAC","tankID":"2"},{"id":"3","capacity":1100,"productID":"COFFEE","loadedQuantity":1100,"consignee":"KSUAC","tankID":"3"},{"id":"4","capacity":2700,"productID":"COFFEE","loadedQuantity":2500,"consignee":null,"tankID":"4"}]}}'
SELECT
JSON_QUERY(#json, 'strict $."data.taskData".startedLocation') AS StartedLocation,
JSON_VALUE(#json, 'strict $."data.taskData".startedLocation.lat') as Lat,
JSON_VALUE(#json, 'strict $."data.taskData".startedLocation.lon') as Lon
SELECT *
FROM OPENJSON(#json, 'strict $."data.taskData".compartments') AS Compartments
Output:
----------------------------------------------------------------
StartedLocation Lat Lon
----------------------------------------------------------------
{"lat":60.7348366,"lon":-124.9856841} 60.7348366 -124.9856841
----------------
key value type
----------------
0 {"id":"1","capacity":3400,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"1","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"1000","bol":"1234"}],"loadedQuantity":1000,"productID":"COFFEE"} 5
1 {"id":"2","capacity":2000,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"2","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"2000","bol":"1234"}],"loadedQuantity":2000,"productID":"COFFEE"} 5
2 {"id":"3","capacity":1100,"commodity":null,"consignee":"KSUAC","plannedQuantity":0,"tankID":"3","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"1100","bol":"1234"}],"loadedQuantity":1100,"productID":"COFFEE"} 5
3 {"id":"4","capacity":2700,"commodity":null,"consignee":null,"plannedQuantity":0,"tankID":"4","additionalData":[],"allLoadsValid":true,"complete":true,"error":false,"locked":false,"loads":[{"isFirst":true,"quantity":"2500","bol":"1234"}],"loadedQuantity":2500,"productID":"COFFEE"} 5