Convert String to Tuple in BigQuery - sql

I have a variable passed as an argument in BigQuery which is in the format "('a','b','c')"
with vars as (
select "{0}" as var1,
)
-- where, {0} = "('a','b','c')"
To use it in BigQuery I need to make it a tuple ('a','b','c').
How can it be done?
Any alternate approach is also welcome.
Example:
with vars as (
select "('a','b','c')" as index
)
select * from `<some_other_db>.table` where index in (
select index from vars)
-- gives me empty results because index is now a string
Present output:
select * from <db_name>.table where index in "('a','b','c')"
Required output:
select * from <db_name>.table where index in ('a','b','c')

Below is for BigQuery Standard SQL
#standardSQL
WITH vars AS (
SELECT "('a','b','c')" AS var
)
SELECT *
FROM `<some_other_db>.table`
WHERE index IN UNNEST((
SELECT SPLIT(REGEXP_REPLACE(var, r'[()\']', ''))
FROM vars
))
You can test, play with above using some dummy data as in below example
#standardSQL
WITH vars AS (
SELECT "('a','b','c')" AS var
), `<some_other_db>.table` AS (
SELECT 1 id, 'a' index UNION ALL
SELECT 2, 'd' UNION ALL
SELECT 3, 'c' UNION ALL
SELECT 4, 'e'
)
SELECT *
FROM `<some_other_db>.table`
WHERE index IN UNNEST((
SELECT SPLIT(REGEXP_REPLACE(var, r'[()\']', ''))
FROM vars
))
with output
Row id index
1 1 a
2 3 c

I think this does what you are asking for:
with vars as ( select "('a','b','c')" as var1)
select as struct
MAX(CASE WHEN n = 0 then var END) as f1,
MAX(CASE WHEN n = 1 then var END) as f2,
MAX(CASE WHEN n = 2 then var END) as f3
from vars v cross join
unnest(SPLIT(REPLACE(REPLACE(var1, '(', ''), ')', ''), ',')) var with offset n;

Related

Replace some variables by data of another table in sql oracle

I have a table with two columns
type
TXT
A
this is some text for %1 and %2
B
this is another step for %1
in a translation table I have the signification of the variables %X that looks like
Type
variable
descr
A
%1
#person1#
A
%2
#person2#
B
%1
#manager#
I want to replace in my first table all the variables by the description, so the result has to looks like this:
type
TXT
A
this is some text for #person1# and #person2#
B
this is another step for #manager#
I tried with a replace, but I didn't figured out how to make it work
To replace all variables you could use a recursive algorithm:
with data(typ, txt) as (
select 'A', 'this is some text for %1 and %2' from dual union all
select 'B', 'this is another step for %1' from dual
),
translations(typ, var, description) as (
select 'A', '%1', '#person1#' from dual union all
select 'A', '%2', '#person2#' from dual union all
select 'B', '%1', '#manager#' from dual -- union all
),
rtranslations(typ, var, description,rn) as (
select t.*, row_number() over(partition by typ order by var) as rn
from translations t
),
replacecte(typ, txt, replaced_txt, rn) as (
select d.typ, d.txt, replace(d.txt, t.var, t.description), t.rn
from data d
join rtranslations t on t.typ = d.typ
where t.rn = 1
union all
select r.typ, r.txt, replace(r.replaced_txt, t.var, t.description), t.rn
from replacecte r
join rtranslations t on t.typ = r.typ and t.rn = r.rn + 1
)
select r.typ, r.txt, replaced_txt from replacecte r
where rn = length(txt) - length(replace(txt,'%',''))
;
You can also do it this way without recursion. data and descr are of course just mock ups for your tables, you would not need any WITH clauses. This method uses the steps (1) break up the sentences into words, (2) outer join using those words to your description table, replacing any matches with the description values, (3) reassemble the words back into sentences using LISTAGG.
WITH data AS(SELECT 'A' type, 'this is some text for %1 and %2' txt FROM dual
UNION ALL
SELECT 'B' type, 'this is another step for %1' txt FROM dual
),
descr AS (SELECT 'A' type, '%1' variable,'#person1#' description FROM dual
UNION ALL
SELECT 'A' type, '%2' variable,'#person2#' description FROM dual
UNION ALL
SELECT 'B' type, '%1' variable,'#manager#' description FROM dual)
SELECT type,
LISTAGG(new_word,' ') WITHIN GROUP (ORDER BY seq) txt
FROM (SELECT x.type,
NVL(descr.description,x.word) new_word,
seq
FROM (SELECT type,SUBSTR(' '||txt,INSTR(' '||txt,' ',1,seq)+1,INSTR(' '||txt||' ',' ',1,seq+1) - (INSTR(' '||txt,' ',1,seq)+1)) word,seq
FROM data,
(SELECT ROWNUM seq FROM dual CONNECT BY LEVEL <= 50) x) x,
descr
WHERE x.type = descr.type(+)
AND x.word = descr.variable(+))
GROUP BY type
You could use PIVOT to get the var values from rows into columns (geting all vars in the same row with text) and then do multiple replaces depending on number of var values:
SELECT t.A_TYPE,
CASE WHEN d.V3 Is Not Null THEN REPLACE(REPLACE(REPLACE(t.TXT, '%1', d.V1), '%2', d.V2), '%3', d.V3)
WHEN d.V2 Is Not Null THEN REPLACE(REPLACE(t.TXT, '%1', d.V1), '%2', d.V2)
WHEN d.V1 Is Not Null THEN REPLACE(t.TXT, '%1', d.V1)
ELSE t.TXT
END "TXT"
FROM tbl t
INNER JOIN ( SELECT *
FROM ( Select A_TYPE, VAR, DESCRIPTION FROM descr )
PIVOT ( MAX(DESCRIPTION) For VAR IN('%1' "V1", '%2' "V2", '%' "V3") )
) d ON(d.A_TYPE = t.A_TYPE)
With sample data as:
WITH
tbl (A_TYPE, TXT) AS
(
Select 'A', 'this is some text for %1 and %2' From Dual Union All
Select 'B', 'this is another step for %1' From dual
),
descr (A_TYPE, VAR, DESCRIPTION) AS
(
Select 'A', '%1', '#person1#' From Dual UNION ALL
Select 'A', '%2', '#person2#' From Dual UNION ALL
Select 'B', '%1', '#manager#' From Dual
)
... the result should be
A_TYPE TXT
------ -----------------------------------------------
A this is some text for #person1# and #person2#
B this is another step for #manager#

Oracle JSON output group by key

I generate SQL statements dynamically from the input file and I want to have the output in JSON format grouped by a key which I provide in an alias in the select statement.
The input file comes from another system and it looks like this:
'abc' AS [x.test1],
'cde' AS [y.test2],
'fgh' AS [y.test3]
In SQL Server I have a working query like this:
SELECT
(SELECT
'abc' AS [x.test1],
'cde' AS [y.test2],
'fgh' AS [y.test3]
FROM "dbo"."TEST"
FOR JSON PATH,
WITHOUT_ARRAY_WRAPPER
) AS RESULT
It returns this output which is grouped by key and this is working perfectly:
{"x":{"test1":"abc"},"y":{"test2":"cde","test3":"fgh"}}
I want to achieve the same output with oracle.
Currently, I got to here:
SELECT
(
SELECT json_object(
KEY '[x.test1]' IS 'abc',
KEY '[y.test2]' IS 'cde',
KEY '[y.test3]' IS 'fgh'
)
FROM test
)
AS RESULT from DUAL;
Problem is that this doesn't group my output by the key:
{"[x.test1]":"abc","[y.test2]":"cde","[y.test3]":"fgh"}
You could nest json_object() calls:
SELECT json_object(
KEY 'x' IS json_object(
KEY 'test1' IS 'abc'
),
KEY 'y' IS json_object(
KEY 'test2' IS 'cde',
KEY 'test3' IS 'fgh'
)
)
AS RESULT from DUAL;
RESULT
{"x":{"test1":"abc"},"y":{"test2":"cde","test3":"fgh"}}
fiddle
Or as you refer to grouping, if your data is really coming from tables, you could use json_objectagg() and the table data, with something like:
select json_object(
'x' value json_object(x.j_key value x.j_value),
'y' value json_objectagg(y.j_key, y.j_value)
) as result
from x
left join y on y.id = x.id
group by x.id, x.j_key, x.j_value
RESULT
{"x":{"test1":"abc"},"y":{"test2":"cde","test3":"fgh"}}
fiddle
WITH data (expr) AS (
SELECT q'~'abc' AS [x.test1],'cde' AS [y.test2],'fgh' AS [y.test3]~' FROM DUAL
),
rdata(expr) AS (
SELECT regexp_substr(expr,'[^,]+',1,LEVEL) AS expr
FROM data
CONNECT BY regexp_substr(expr,'[^,]+',1,LEVEL) IS NOT NULL
),
exprs AS (
SELECT expr, regexp_substr(expr, q'~'(.*)'~', 1, 1, 'i', 1) as val,
regexp_substr(expr, q'~\[(.*)\]~', 1, 1, 'i', 1) as path
FROM rdata
),
spaths AS (
SELECT e.*, LEVEL as lvl, regexp_substr(path,'[^\.]+',1,LEVEL) as pitem
FROM exprs e
CONNECT BY regexp_substr(path,'[^\.]+',1,LEVEL) IS NOT NULL AND prior val = val AND PRIOR sys_guid() IS NOT NULL
)
SELECT json_object(
s.pitem VALUE json_objectagg(
p.pitem VALUE p.val
)
) AS js
FROM spaths s
JOIN spaths p ON s.val = p.val AND p.lvl = 2
WHERE s.lvl = 1
GROUP BY s.pitem
;
JS
{"x":{"test1":"abc"}}
{"y":{"test2":"cde","test3":"fgh"}}

Bigquery SQL to convert single value string to multi value based on separator/prefix

I have a column value string with + or - orefix as below :
id val
1 +a+b+c-d-e-f+g
Now based on + or - separator I need to build the dataset as follows :
id new_val prefix
1 a +
1 b +
1 c +
1 d -
1 e -
1 f -
1 g +
And to add the string is not fixed length ie it would continue with either separator (+ or -) for different rows.
Any guide on big-query SQL to do this transformation would be helpful.
Update :
I am using this query but missing some value though :
with mytable as (
select 1 as id, '+a+b+c-d-f+g' as val1,
)
select * from (
select id, new_val1 , '+' symbol
from mytable, unnest(split(val1, '+')) as new_val1 WITH OFFSET AS val1_offset
union all
select id, new_val1 , '-' symbol
from mytable, unnest(split(val1, '-')) as new_val1 WITH OFFSET AS val1_offset
) where length(new_val1) = 1 and new_val1 is not null
Consider below approach
select id, substr(part, 2) new_val, substr(part, 1, 1) prefix
from `project.dataset.table`,
unnest(regexp_extract_all(val, r'[+-][^+-]+')) part
If applied to sample data in your question - output is
The split into substrings can be done, by adding a further separator, which does the string do not contains:
select id, substr(vals,2) as new_val, substr(vals,1,1) as prefix
from (
SELECT id, split(substr(replace(replace(val,'-',';-'),'+',';+'),2) ,';') as val_tmp
from (select 1 as id, "+a+b+c-d-e-f+g" as val)
) as t, unnest(t.val_tmp) as vals
If you have more than + and -, regex would be a better option:
SELECT id, split(substr(REGEXP_REPLACE(val,r"([+-]+)", ";\\1"),2) ,';') as val_tmp

Set variable with an array element

I have a table with apps versions (v1.1.1, v1.1.2, v1.10.1, etc.).
Using REGEXP_EXTRACT_ALL, I have an arrays with the numbers as elements.
Any idea why I can't set the max of each element to a variable?
This is the code I use:
DECLARE x DEFAULT 0;
SET x = (
SELECT
max(REGEXP_EXTRACT_ALL(app_version, "\\d+")[SAFE_ORDINAL(2)])
FROM
'table_with_app_version');
Thanks
Below is for BigQuery Standard SQL
#standardsql
create temp function normaizedsemanticversion(semanticversion string)
as ((
select string_agg(
if(isdigit, repeat('0', 8 - length(chars)) || chars, chars), '' order by grp
) || '..zzzzzzzzzzzzzz'
from (
select grp, isdigit, string_agg(char, '' order by offset) chars,
from (
select offset, char, isdigit,
countif(not isdigit) over(order by offset) as grp
from unnest(split(semanticversion, '')) as char with offset,
unnest([char in ('1','2','3','4','5','6','7','8','9','0')]) isdigit
)
group by grp, isdigit
)));
create temp function comparesemanticversions(
normsemanticversion1 string,
normsemanticversion2 string)
as ((
select
case
when v1 < v2 then 'v2 newer than v1'
when v1 > v2 then 'v1 newer than v2'
else 'same versions'
end
from unnest([struct(
normaizedsemanticversion(normsemanticversion1) as v1,
normaizedsemanticversion(normsemanticversion2) as v2
)])
));
with test as (
select 'v1.0.0-alpha' version union all
select 'v1.0.0-alpha.1' union all
select 'v1.0.0-alpha.beta' union all
select 'v1.0.0-beta' union all
select 'v1.0.0-beta.2' union all
select 'v1.0.0-beta.11' union all
select 'v1.0.0-rc.1' union all
select 'v1.0.0' union all
select 'v1.1.1' union all
select 'v1.1.2' union all
select 'v1.10.1'
)
select string_agg(version order by normaizedsemanticversion(version) desc limit 1)
from test
with output
As alternative you can use below variation of final select statement
select version
from test
order by normaizedsemanticversion(version) desc
limit 1

Find way for gathering data and replace with values from another table

I am looking for an Oracle SQL query to find a specific pattern and replace them with values from another table.
Scenario:
Table 1:
No column1
-----------------------------------------
12345 user:12345;group:56789;group:6785;...
Note: field 1 may be has one or more pattern
Table2 :
Id name type
----------------------
12345 admin user
56789 testgroup group
Result must be the same
No column1
-----------------------------------
12345 user: admin;group:testgroup
Logic:
First split the concatenated string to individual rows using connect
by clause and regex.
Join the newly created table(split_tab) with Table2(tab2).
Use listagg function to concatenate data in the columns.
Query:
WITH tab1 AS
( SELECT '12345' NO
,'user:12345;group:56789;group:6785;' column1
FROM DUAL )
,tab2 AS
( SELECT 12345 id
,'admin' name
,'user' TYPE
FROM DUAL
UNION
SELECT 56789 id
,'testgroup' name
,'group' TYPE
FROM DUAL )
SELECT no
,listagg(category||':'||name,';') WITHIN GROUP (ORDER BY tab2.id) column1
FROM ( SELECT NO
,REGEXP_SUBSTR( column1, '(\d+)', 1, LEVEL ) id
,REGEXP_SUBSTR( column1, '([a-z]+)', 1, LEVEL ) CATEGORY
FROM tab1
CONNECT BY LEVEL <= regexp_count( column1, '\d+' ) ) split_tab
,tab2
WHERE split_tab.id = tab2.id
GROUP BY no
Output:
No Column1
12345 user:admin;group:testgroup
with t1 (no, col) as
(
-- start of test data
select 1, 'user:12345;group:56789;group:6785;' from dual union all
select 2, 'user:12345;group:56789;group:6785;' from dual
-- end of test data
)
-- the lookup table which has the substitute strings
-- nid : concatenation of name and id as in table t1 which requires the lookup
-- tname : required substitute for each nid
, t2 (id, name, type, nid, tname) as
(
select t.*, type || ':' || id, type || ':' || name from
(
select 12345 id, 'admin' name, 'user' type from dual union all
select 56789, 'testgroup', 'group' from dual
) t
)
--select * from t2;
-- cte table calculates the indexes for the substrings (eg, user:12345)
-- no : sequence no in t1
-- col : the input string in t1
-- si : starting index of each substring in the 'col' input string that needs attention later
-- ei : ending index of each substring in the 'col' input string
-- idx : the order of substring to put them together later
,cte (no, col, si, ei, idx) as
(
select no, col, 1, case when instr(col,';') = 0 then length(col)+1 else instr(col,';') end, 1 from t1 union all
select no, col, ei+1, case when instr(col,';', ei+1) = 0 then length(col)+1 else instr(col,';', ei+1) end, idx+1 from cte where ei + 1 <= length(col)
)
,coll(no, col, sstr, idx, newstr) as
(
select
a.no, a.col, a.sstr, a.idx,
-- when a substitute is not found in t2, use the same input substring (eg. group:6785)
case when t2.tname is null then a.sstr else t2.tname end
from
(select cte.*, substr(col, si, ei-si) as sstr from cte) a
-- we don't want to miss if there is no substitute available in t2 for a substring
left outer join
t2
on (a.sstr = t2.nid)
)
select no, col, listagg(newstr, ';') within group (order by no, col, idx) from coll
group by no, col;