Expanding a Struct of Struct to columns in bigquery

Expanding a Struct of Struct to columns in bigquery - google-bigquery

I am working with a BQ table that has a format of a STRUCT of STRUCTs.
It looks as follows:
I would like to have a table which looks like follows:
property_hs_email_last_click_date_value
currentlyinworkflow_value
hs_first_engagement_object_id_value
hs_first_engagement_object_id_value__st
5/5/2022 23:00:00
Y
1
'Hey'
The challenge is that there are 500 fields and I would like to make this efficient instead of writing out every single line as follows:
SELECT property_hs_email_last_click_date as property_hs_email_last_click_date_value,
properties.currentlyinworkflow.value as currentlyinworkflow_value,
properties.hs_first_engagement_object_id.value as properties.hs_first_engagement_object_id_value,
properties.hs_first_engagement_object_id.value__st as hs_first_engagement_object_id_value__st
Any suggestions on how to make this more efficient?
Edit:
Here's a query that creates a table such as this:
create or replace table `project.database.TestTable` (
property_hs_email_last_click_date STRUCT < value string >,
properties struct < currentlyinworkflow struct < value string > ,
hs_first_engagement_object_id struct < value numeric , value__st string >,
first_conversion_event_name struct < value string >
>
);
insert into `project.database.TestTable`
values (struct('12/2/2022 23:00:02'), struct(struct('Yes'), struct(1, 'Thursday'), struct('Festival')) );
insert into `project.database.TestTable`
values (struct('14/2/2021 12:00:02'), struct(struct('No'), struct(5, 'Friday'), struct('Phone')) )

Below is quite generic script that extracts all leaves in JSON and then presents them as columns
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function extract_all_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
create temp table temp_table as (
select offset, key, value, format('%t', t) row_id
from your_table t,
unnest([struct(to_json_string(t) as json)]),
unnest([struct(extract_all_leaves(json) as leaves)]),
unnest(extract_keys(leaves)) key with offset
join unnest(extract_values(leaves)) value with offset
using(offset)
);
execute immediate (select '''
select * except(row_id) from (select * except(offset) from temp_table)
pivot (any_value(value) for replace(key, '.', '__') in (''' || keys_list || '''
))'''
from (select string_agg('"' || replace(key, '.', '__') || '"', ',' order by offset) keys_list from (
select key, min(offset) as offset from temp_table group by key
))
);
if applied to sample data in your question
create temp table your_table as (
select struct('12/2/2022 23:00:02' as value) as property_hs_email_last_click_date ,
struct(
struct('Yes' as value) as currentlyinworkflow ,
struct(1 as value, 'Thursday' as value__st) as hs_first_engagement_object_id ,
struct('Festival' as value) as first_conversion_event_name
) as properties
union all
select struct('14/2/2021 12:00:02'), struct(struct('No'), struct(5, 'Friday'), struct('Phone'))
);
the output is

Related

Migrating data from jsonb to integer[] SQL

I have jsonb field(data) in Postgresql with a structure like:
{ "id" => { "some_key" => [1, 2, 3] } }
I need to migrate the value to a different field.
t.jsonb "data"
t.integer "portals", default: [], array: true
When I'm trying to do like this:
UPDATE table_name
SET portals = ARRAY[data -> '1' ->> 'portals']
WHERE id = 287766
It raises an error:
Caused by PG::DatatypeMismatch: ERROR: column "portals" is of type integer[] but expression is of type text[]

Here is one way to do it. But if you search the site, as you should had to do, you get more.
Schema
create table t (
data jsonb
);
insert into t values ('{"1" : { "k1" : [1,2,3,5]} }');
insert into t values ('{"2" : { "k2" : [4,5,6,7]} }');
create table i (
id int,
v int[]
)
Some tests
select data -> '1' -> 'k1'
from t
where data ? '1'
;
insert into i values(1,ARRAY[1,2,3]);
update i
set v = (select replace(replace(data -> '1' ->> 'k1', '[', '{'), ']', '}')::int[] from t where data ? '1')
where id = 1;
select * from i;
The above gets array as a text, as you did. After that, just some text replacements to cast the text to an integer array literal.
DB Fiddle

BigQuery SQL JSON Returning additional rows when current row contains multiple values

I have a table that looks like this
keyA | data:{"value":false}}
keyB | data:{"value":3}}
keyC | data:{"value":{"paid":10,"unpaid":20}}}
For keyA,keyB I can easily extract a single value with JSON_EXTRACT_SCALAR, but for keyC I would like to return multiple values and change the key name, so the final output looks like this:
keyA | false
keyB | 3
keyC-paid | 10
keyD-unpaid | 20
I know I can use UNNEST and JSON_EXTRACT multiple values and create additional but unsure how to combine them to adjust the key column name as well?

Even more generic approach
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function extract_all_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
select col || replace(replace(key, 'value', ''), '.', '-') as col, value,
from your_table,
unnest([struct(extract_all_leaves(data) as json)]),
unnest(extract_keys(json)) key with offset
join unnest(extract_values(json)) value with offset
using(offset)
if applied to sample data in your question - output is
Benefit of this approach is that it is quite generic and thus can handle any level of nesting in json
For example for below data/table
the output is

Try this one:
WITH sample AS (
SELECT 'keyA' AS col, '{"value":false}' AS data
UNION ALL
SELECT 'keyB' AS col, '{"value":3}' AS data
UNION ALL
SELECT 'keyC' AS col, '{"value":{"paid":10,"unpaid":20}}' AS data
)
SELECT col || IFNULL('-' || k, '') AS col,
IFNULL(v, JSON_VALUE(data, '$.value')) AS data
FROM (
SELECT col, data,
`bqutil.fn.json_extract_keys`(JSON_QUERY(data, '$.value')) AS keys,
`bqutil.fn.json_extract_values`(JSON_QUERY(data, '$.value')) AS vals
FROM sample
) LEFT JOIN UNNEST(keys) k WITH OFFSET ki
LEFT JOIN UNNEST(vals) v WITH OFFSET vi ON ki = vi;

sql server, replace chars in string with values in table

how can i replace values in string with values that are in a table?
for example
select *
into #t
from
(
select 'bla'c1,'' c2 union all
select 'table'c1,'TABLE' c2 union all
select 'value'c1,'000' c2 union all
select '...'c1,'' c2
)t1
declare #s nvarchaR(max)='this my string and i want to replace all values that are in table #t'
i have some values in my table and i want to replace C1 with C2 in my string.
the results should be
this my string and i want to replace all 000 that are in TABLE #t
UPDATE:
i solved with a CLR
using System;
using Microsoft.SqlServer.Server;
using System.Data.SqlTypes;
using System.Data.Linq;
namespace ReplaceValues
{
public partial class Functions
{
[SqlFunction
(
//DataAccess = DataAccessKind.Read,
SystemDataAccess = SystemDataAccessKind.Read
)
]
public static string ReplaceValues(string row, string delimitator, string values, string replace/*, bool CaseSensitive*/)
{
//return row;
string[] tmp_values = values.Split(new string[] { delimitator }, StringSplitOptions.None);
string[] tmp_replace = replace.Split(new string[] { delimitator }, StringSplitOptions.None);
row = row.ToUpper();
for (int i = 0; i < Math.Min(tmp_values.Length, tmp_replace.Length); i++)
{
row = row.Replace(tmp_values[i].ToUpper(), tmp_replace[i]);
}
return row;
}
}
}
and then
select *
into #t
from
(
select 'value1'OldValue,'one'NewValue union all
select 'value2'OldValue,'two'NewValue union all
select 'value3'OldValue,'three'NewValue union all
select 'value4'OldValue,'four'NewValue
)t1
select dbo.ReplaceValues(t1.column,'|',t2.v,t2.r)
from MyTable t1
cross apply
(
select dbo.inlineaggr(i1.OldValue,'|',1,1)v,
dbo.inlineaggr(i1.NewValue,'|',1,1)r
from #t i1
)t2
i have to improved it to manage better the case sensitive, but performance are not bad.
(also 'inlineaggr' is a CLR i wrote years ago)

You can do this via recursion. Assuming you have a table of find-replace pairs, you can number the rows and then use recursive cte:
create table #t(c1 nvarchar(100), c2 nvarchar(100));
insert into #t(c1, c2) values
('bla', ''),
('table', 'table'),
('value', '000'),
('...', '');
declare #s nvarchar(max) = 'this my string and i want to replace all values that are in table #t';
with ncte as (
select row_number() over (order by (select null)) as rn, *
from #t
), rcte as (
select rn, replace(#s, c1, c2) as newstr
from ncte
where rn = 1
union all
select ncte.rn, replace(rcte.newstr, ncte.c1, ncte.c2)
from ncte
join rcte on ncte.rn = rcte.rn + 1
)
select *
from rcte
where rn = 4

Generate a WITH clause/UNIONs from a SELECT

I want to generate a WITH clause/UNIONs — for the purpose of easily sharing small samples of data (10-20 rows).
I want to do that without creating tables or inserting rows.
Example:
Take a table or query like this:
...and generate this:
with cte as(
select 10 as asset_id, 1 as vertex_num, 118.56 as x, 3.8 as y from dual
union all
select 10 as asset_id, 2 as vertex_num, 118.62 as x, 1.03 as y from dual
union all
select 10 as asset_id, 3 as vertex_num, 121.93 as x, 1.03 as y from dual)
--There are lots more rows. But it's too much work to write them all out.
select * from cte
Using SQL, how can I automatically generate a WITH clause/UNIONs from the resultset?
I believe there is OOTB export functionality in Toad that can do that. But I don't think there are any tools in SQL Developer that can do it, which is what I'm using.
When attempting this with SQL, I think the main challenge is to loop through n columns. I'm not sure how to do that.

It would be easier to use xmltable or (json_table for Oracle 12+) for such purposes.
Example with xmltable:
Just aggregate all the required data into xmltype:
you can use xmltype(cursor(select...from...)):
select xmltype(cursor(select * from test)) xml from dual;
or dbms_xmlgen.getxmltype(query_string):
select dbms_xmlgen.getxmltype('select * from test') xml from dual;
then you can use the returned XML with
xmltable('/ROWSET/ROW' passing xmltype(your_xml) columns ...)
Example:
select *
from xmltable(
'/ROWSET/ROW'
passing xmltype(q'[<?xml version="1.0"?>
<ROWSET>
<ROW>
<ASSET_ID>10</ASSET_ID>
<VERTEX_NUM>1</VERTEX_NUM>
<X>118.56</X>
<Y>3.8</Y>
</ROW>
<ROW>
<ASSET_ID>10</ASSET_ID>
<VERTEX_NUM>2</VERTEX_NUM>
<X>118.62</X>
<Y>1.03</Y>
</ROW>
</ROWSET>
]')
columns
asset_id,vertex_num,x,y
) test
Full example on DBFiddle: https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=036b718f2b18df898c3e3de722c97378

You could use dbms_sql to execute a query against your real table, interrogate the data types, and use that information to generate the CTE and its inner queries.
As a first stab:
create or replace procedure print_cte (p_statement varchar2) as
-- dbms_sql variables
l_c pls_integer;
l_col_cnt pls_integer;
l_rows pls_integer;
l_desc_t dbms_sql.desc_tab;
l_first_row boolean := true;
l_varchar2 varchar2(4000);
l_number number;
l_date date;
-- etc.
begin
-- ideally add some checks for p_statement being a sinple query
l_c := dbms_sql.open_cursor;
dbms_sql.parse(c => l_c, statement => p_statement, language_flag => dbms_sql.native);
l_rows := dbms_sql.execute(c => l_c);
dbms_sql.describe_columns(c => l_c, col_cnt => l_col_cnt, desc_t => l_desc_t);
-- define columns, and output CTE columns at the same time
dbms_output.put('with cte (');
for i in 1..l_col_cnt loop
case l_desc_t(i).col_type
when 1 then
dbms_sql.define_column(c => l_c, position=> i, column => l_varchar2, column_size => 4000);
when 2 then
dbms_sql.define_column(c => l_c, position=> i, column => l_number);
when 12 then
dbms_sql.define_column(c => l_c, position=> i, column => l_date);
-- etc. plus else to skip or throw error for anything not handled
end case;
if i > 1 then
dbms_output.put(', ');
end if;
dbms_output.put('"' || l_desc_t(i).col_name || '"');
end loop;
dbms_output.put(') as (');
while dbms_sql.fetch_rows(c => l_c) > 0 loop
if (l_first_row) then
l_first_row := false;
else
dbms_output.put(' union all');
end if;
dbms_output.new_line;
for i in 1..l_col_cnt loop
if i = 1 then
dbms_output.put(' select ');
else
dbms_output.put(', ');
end if;
case l_desc_t(i).col_type
when 1 then
dbms_sql.column_value(c => l_c, position => i, value => l_varchar2);
dbms_output.put(q'[']' || l_varchar2 || q'[']');
when 2 then
dbms_sql.column_value(c => l_c, position => i, value => l_number);
dbms_output.put(l_number);
when 12 then
dbms_sql.column_value(c => l_c, position => i, value => l_date);
dbms_output.put(q'[to_date(']'
|| to_char(l_date, 'SYYYY-MM-DD-HH24:MI:SS')
|| q'[', 'SYYYY-MM-DD HH24:MI:SS')]');
-- etc. plus else to skip or throw error for anything not handled
end case;
end loop;
dbms_output.put(' from dual');
dbms_output.new_line;
end loop;
dbms_output.put_line(')');
dbms_output.put_line('select * from cte;');
dbms_sql.close_cursor(c => l_c);
end print_cte;
/
and then you can do:
begin
print_cte('select * from your_table');
end;
/
which produces:
with cte ("ASSET_ID", "VERTEX_NUM", "X", "Y") as (
select 10, 1, 118.56, 3.8 from dual
union all
select 10, 2, 118.62, 1.03 from dual
union all
select 10, 3, 121.93, 1.03 from dual
)
select * from cte;
Your client has to be configured to handle dbms_output, of course.
As noted in the inline comments you should check the passed-in statement isn't going to do something nasty; and you need to add handling for other data types. This is just a starting point.
db<>fiddle

I know my answer is not pure SQL or PLSQL.
But I suggest you to use Javascript to generate CTE query because data is small.
Javascript is very easy and maintainable than SQL or PLSQL in your case.
You can use this small script in here whenever you want. (No additional editor needed)
https://jsfiddle.net/pLvgr8oh/
Or you can run script with Chrome browser if you worried about security.
https://developer.chrome.com/docs/devtools/javascript/snippets/
function convertToSelect(tsv, firstRowIsColumn, columnsComma, typesComma) {
function getCol(column, value, type) {
// In case type is 'date', after colon is date format
const [ typeNew, dateFormat ] = type.split(':')
switch (typeNew) {
case 'string': return `'${value}' as ${column}`
case 'number': return `${value} as ${column}`
case 'date': return `to_date('${value}', '${dateFormat}') as ${column}`
}
}
const columns = columnsComma ? columnsComma.split(',') : []
const types = typesComma ? typesComma.split(',') : []
// Split row by '\r\n' or '\n'
const list = tsv.split(/\r*\n/)
const colCount = list[0].split(/\t/).length
let columnsNew = []
let typesNew = types
// If first row is column name
if (firstRowIsColumn) {
columnsNew = list[0].split(/\t/);
list.shift(0)
}
// If column name is specified then override column names in first row
if (columns.length > 0) {
columnsNew = columns
}
// If type is not specified set all type to 'string'
if (typesNew.length === 0) {
typesNew = [...Array(colCount)].map(t => 'string')
}
const rows = list.map(item => {
// [ '2 as F_INT', '2.223 as F_LONG'... ]
const cols = item
.split(/\t/)
.map((value, index) => {
return getCol(columnsNew[index], value, typesNew[index])
})
.join(', ')
// select 2 as F_INT, 2.223 as F_LONG... from dual
return ` select ${cols} from dual`
})
const selectUnion = rows.join(`
union all
`)
return `with cte as
(
${selectUnion}
)
select * from cte;
`
}
const tsv = `F_INT F_LONG F_VARCHAR F_DATE
1 1.123 a 2022-12-01
2 2.223 b 2022-12-02
3 3.323 c 2022-12-03`
const firstRowIsColumn = true
const columnsComma = 'v_num,v_lng,v_str,v_date'
//const columnsComma = ''
const typesComma = 'number,number,string,date:YYYY-MM-DD'
//const typesComma = ''
const ret = convertToSelect(tsv, firstRowIsColumn, columnsComma, typesComma)
console.log(ret)
which generates (if the snippet doesn't mangle the tab characters):
with cte as
(
select 1 as v_num, 1.123 as v_lng, 'a' as v_str, to_date('2022-12-01', 'YYYY-MM-DD') as v_date from dual
union all
select 2 as v_num, 2.223 as v_lng, 'b' as v_str, to_date('2022-12-02', 'YYYY-MM-DD') as v_date from dual
union all
select 3 as v_num, 3.323 as v_lng, 'c' as v_str, to_date('2022-12-03', 'YYYY-MM-DD') as v_date from dual
)
select * from cte;

bigquery code not translating Guillemets « and » correctly

The following bigquery code does not display correctly Guillemets « and ». In the output of the code below, notice that the Guillements are 'translated' as xAB and xBB. The expected answer should preserve the current translation but replace xAB with « and xBB with ».
CREATE TEMP FUNCTION
decode(word string) AS ((
SELECT
IF
(STARTS_WITH(word, '&#x'),
safe.code_points_to_STRING(ARRAY(
SELECT
ifnull(SAFE_CAST(value AS int64),
ASCII(value))
FROM
UNNEST(SPLIT(REPLACE(word, '&#', '0'),';')) value
WHERE
NOT value = '' )),
word) ));
WITH
DATA AS (
SELECT
'Arabic' AS lang,
'https://www.elwatannews.com/news/details/5516935' AS url,
`'تطورات «مذبحة أبو حزام ».. دفن 10 جثث وضبط 19 من عائلتي المجزرة'` AS title)
SELECT
url,
lang,
(
SELECT
STRING_AGG(decode(chars), ''
ORDER BY
OFFSET
)
FROM
UNNEST(REGEXP_EXTRACT_ALL(title, r'(?:&#x.{3};)+|[^&]+')) chars
WITH
OFFSET
) AS translate
FROM
DATA

CREATE TEMP FUNCTION
decode(word string) AS ((
SELECT
IF
(STARTS_WITH(word, '&#x'),
safe.code_points_to_STRING(ARRAY(
SELECT
ifnull(SAFE_CAST(value AS int64),
ASCII(value))
FROM
UNNEST(SPLIT(REPLACE(word, '&#', '0'),';')) value
WHERE
NOT value = '' )),
word) ));
WITH
DATA AS (
SELECT
'Arabic' AS lang,
'https://www.elwatannews.com/news/details/5516935' AS url,
'تطورات «مذبحة أبو حزام ».. دفن 10 جثث وضبط 19 من عائلتي المجزرة' AS title)
SELECT
# url,
lang,
(
SELECT
STRING_AGG(decode(chars), ''
ORDER BY
OFFSET
)
FROM
UNNEST(REGEXP_EXTRACT_ALL(title, r'(?:&#x.{2,3};)+|[^&]+')) chars
WITH
OFFSET
) AS translate
FROM
DATA
with output

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Expanding a Struct of Struct to columns in bigquery - google-bigquery

Related

Migrating data from jsonb to integer[] SQL

BigQuery SQL JSON Returning additional rows when current row contains multiple values

sql server, replace chars in string with values in table

Generate a WITH clause/UNIONs from a SELECT

bigquery code not translating Guillemets « and » correctly

Categories

Resources