Generate CASE WHEN statement using another table - google-bigquery

I would like to create a query that does the following:
Using a regex_mapping table, find all rows in the sample data that REGEXP_MATCH on x
WITH sample_data AS (
SELECT x, y
FROM (SELECT "asd rmkt asdf" AS x, true AS y UNION ALL -- should map to remekartier
SELECT "as asdf", true UNION ALL -- should map to ali sneider
SELECT "asdafsd", false) -- should map to NULL
),
regex_mapping AS (
SELECT regex, map
FROM (SELECT "as" AS regex, "ali sneider" AS map UNION ALL
SELECT "rmkt" AS regex, "remekartier" AS map )
)
SELECT sample_data.*, mapped_item
FROM sample_data
-- but here, use multiple REGEXP_MATCH with CASE WHEN looping over the regex_mappings.
-- e.g. CASE WHEN REGEXP_MATCH(x, "as") THEN "ali sneider"
WHEN REGEXP_MATCH(x, "rmkt") THEN "remakrtier" END AS mapped_item)

Try this -
WITH sample_data AS (
SELECT x, y
FROM (SELECT "asd rmkt asdf" AS x, true AS y UNION ALL -- should map to remekartier
SELECT "as asdf", true UNION ALL -- should map to ali sneider
SELECT "asdafsd", false)
),
regex_mapping AS (
SELECT regex, map
FROM (SELECT "as" AS regex, "ali sneider" AS map UNION ALL
SELECT "rmkt" AS regex, "remekartier" AS map )
)
SELECT s.*, r.map
FROM sample_data s, regex_mapping r
WHERE regexp_contains(s.x,concat('\\b',r.regex,'\\b'))
The results ->
Second way: Instead of cross-join, use a scalar subquery. I have used limit so that the subquery doesn't return more than 1 row and if multiple regexp matches, then it will return only one of them
--- same WITH clause as above query ---
SELECT s.*, (SELECT r.map
FROM regex_mapping r
WHERE regexp_contains(s.x,concat('\\b',r.regex,'\\b'))
LIMIT 1) as map
FROM sample_data s
The results ->
Third way: Deduplicated Data
WITH sample_data AS (
SELECT campaign_name, placement_name
FROM (SELECT "as_rmkt_asdf" AS campaign_name, "xdd" AS placement_name UNION ALL -- should map to remekartier
SELECT "as_asdf", "sdfsdf" UNION ALL -- should map to ali sneider
SELECT "as_rmkt_dafsd", "sdfg" UNION ALL -- should map to rmkt
SELECT "asf_adsdf", "gdf" -- should map to NULL (because higher priority)
)
),
regex_mapping AS (
SELECT regex, map, priority
FROM (SELECT "rmkt" AS regex, "remekartier" AS map, 1 AS priority UNION ALL
SELECT "as" AS regex, "ali sneider" AS map, 2 AS priority)
),
X AS (
SELECT s.*,
CASE WHEN regexp_contains(s.campaign_name, concat('(^|_)',r.regex,'($|_)')) THEN r.map ELSE NULL END AS map,
ROW_NUMBER() OVER (PARTITION BY s.campaign_name ORDER BY regexp_contains(s.campaign_name, concat('(^|_)',r.regex,'($|_)')) DESC, r.priority) AS rn
FROM sample_data s
CROSS JOIN regex_mapping r
)
SELECT * EXCEPT (rn)
FROM X
WHERE rn = 1

Related

how to delete repeated values in UNION query using count

Hello I have been trying to delete a repeated value on the following UNION query with the following results (image). How can I filter out the value LW_ID=8232 with AANTALLN =0. I need to find a way taht if in the first query AANTALLN >0 is found, then on the second part of the union query not insert it again. Thanks "
With LESEENHEIDLOOPBAAN as (
SELECT
LE_AGENDA_FK,
LE_CODE,
LE_ID,
LE_KLAS_FK,
LE_KLASPARTITIE_FK,
LE_OMSCHRIJVING,
LE_VERANDERDDOOR,
LE_VERANDERDOP,
Count(LH_ID) As AantalLln
FROM
LESEENHEID
INNER JOIN LOOPBAANLESEENHEID on (LH_LESEENHEID_FK = LE_ID)
INNER JOIN LOOPBAAN ON (LH_LOOPBAAN_FK = LB_ID)
WHERE
(
'2022/09/28' BETWEEN LB_VAN
AND LB_TOT
)
AND (
LE_ID in (8277, 8276, 8232)
)
GROUP BY
LE_AGENDA_FK,
LE_CODE,
LE_ID,
LE_KLAS_FK,
LE_KLASPARTITIE_FK,
LE_OMSCHRIJVING,
LE_VERANDERDDOOR,
LE_VERANDERDOP
),
LESEENHEIDLOOPBAANNULL AS (
SELECT
LE_AGENDA_FK,
LE_CODE,
LE_ID,
LE_KLAS_FK,
LE_KLASPARTITIE_FK,
LE_OMSCHRIJVING,
LE_VERANDERDDOOR,
LE_VERANDERDOP,
0 As AantalLln
FROM
LESEENHEID
where
LE_ID in (8277, 8276, 8232)
and EXISTS (
SELECT
*
FROM
LESEENHEIDLOOPBAAN
)
)
SELECT
*
FROM
LESEENHEIDLOOPBAAN
UNION
SELECT
*
FROM
LESEENHEIDLOOPBAANNULL ROWS 1000
Try this out using ROW_NUMBER:
SELECT * FROM (
SELECT
ROW_NUMBER () OVER (PARTITION BY LW_ID ORDER BY AANTALLN DESC) AS RN
,* FROM
(
SELECT * FROM
LESEENHEIDLOOPBAAN
UNION
SELECT
*
FROM
LESEENHEIDLOOPBAANNULL ROWS 1000
)
)
) WHERE RN = 1
This way you eliminate the duplicates.

Set variable with an array element

I have a table with apps versions (v1.1.1, v1.1.2, v1.10.1, etc.).
Using REGEXP_EXTRACT_ALL, I have an arrays with the numbers as elements.
Any idea why I can't set the max of each element to a variable?
This is the code I use:
DECLARE x DEFAULT 0;
SET x = (
SELECT
max(REGEXP_EXTRACT_ALL(app_version, "\\d+")[SAFE_ORDINAL(2)])
FROM
'table_with_app_version');
Thanks
Below is for BigQuery Standard SQL
#standardsql
create temp function normaizedsemanticversion(semanticversion string)
as ((
select string_agg(
if(isdigit, repeat('0', 8 - length(chars)) || chars, chars), '' order by grp
) || '..zzzzzzzzzzzzzz'
from (
select grp, isdigit, string_agg(char, '' order by offset) chars,
from (
select offset, char, isdigit,
countif(not isdigit) over(order by offset) as grp
from unnest(split(semanticversion, '')) as char with offset,
unnest([char in ('1','2','3','4','5','6','7','8','9','0')]) isdigit
)
group by grp, isdigit
)));
create temp function comparesemanticversions(
normsemanticversion1 string,
normsemanticversion2 string)
as ((
select
case
when v1 < v2 then 'v2 newer than v1'
when v1 > v2 then 'v1 newer than v2'
else 'same versions'
end
from unnest([struct(
normaizedsemanticversion(normsemanticversion1) as v1,
normaizedsemanticversion(normsemanticversion2) as v2
)])
));
with test as (
select 'v1.0.0-alpha' version union all
select 'v1.0.0-alpha.1' union all
select 'v1.0.0-alpha.beta' union all
select 'v1.0.0-beta' union all
select 'v1.0.0-beta.2' union all
select 'v1.0.0-beta.11' union all
select 'v1.0.0-rc.1' union all
select 'v1.0.0' union all
select 'v1.1.1' union all
select 'v1.1.2' union all
select 'v1.10.1'
)
select string_agg(version order by normaizedsemanticversion(version) desc limit 1)
from test
with output
As alternative you can use below variation of final select statement
select version
from test
order by normaizedsemanticversion(version) desc
limit 1

Find way for gathering data and replace with values from another table

I am looking for an Oracle SQL query to find a specific pattern and replace them with values from another table.
Scenario:
Table 1:
No column1
-----------------------------------------
12345 user:12345;group:56789;group:6785;...
Note: field 1 may be has one or more pattern
Table2 :
Id name type
----------------------
12345 admin user
56789 testgroup group
Result must be the same
No column1
-----------------------------------
12345 user: admin;group:testgroup
Logic:
First split the concatenated string to individual rows using connect
by clause and regex.
Join the newly created table(split_tab) with Table2(tab2).
Use listagg function to concatenate data in the columns.
Query:
WITH tab1 AS
( SELECT '12345' NO
,'user:12345;group:56789;group:6785;' column1
FROM DUAL )
,tab2 AS
( SELECT 12345 id
,'admin' name
,'user' TYPE
FROM DUAL
UNION
SELECT 56789 id
,'testgroup' name
,'group' TYPE
FROM DUAL )
SELECT no
,listagg(category||':'||name,';') WITHIN GROUP (ORDER BY tab2.id) column1
FROM ( SELECT NO
,REGEXP_SUBSTR( column1, '(\d+)', 1, LEVEL ) id
,REGEXP_SUBSTR( column1, '([a-z]+)', 1, LEVEL ) CATEGORY
FROM tab1
CONNECT BY LEVEL <= regexp_count( column1, '\d+' ) ) split_tab
,tab2
WHERE split_tab.id = tab2.id
GROUP BY no
Output:
No Column1
12345 user:admin;group:testgroup
with t1 (no, col) as
(
-- start of test data
select 1, 'user:12345;group:56789;group:6785;' from dual union all
select 2, 'user:12345;group:56789;group:6785;' from dual
-- end of test data
)
-- the lookup table which has the substitute strings
-- nid : concatenation of name and id as in table t1 which requires the lookup
-- tname : required substitute for each nid
, t2 (id, name, type, nid, tname) as
(
select t.*, type || ':' || id, type || ':' || name from
(
select 12345 id, 'admin' name, 'user' type from dual union all
select 56789, 'testgroup', 'group' from dual
) t
)
--select * from t2;
-- cte table calculates the indexes for the substrings (eg, user:12345)
-- no : sequence no in t1
-- col : the input string in t1
-- si : starting index of each substring in the 'col' input string that needs attention later
-- ei : ending index of each substring in the 'col' input string
-- idx : the order of substring to put them together later
,cte (no, col, si, ei, idx) as
(
select no, col, 1, case when instr(col,';') = 0 then length(col)+1 else instr(col,';') end, 1 from t1 union all
select no, col, ei+1, case when instr(col,';', ei+1) = 0 then length(col)+1 else instr(col,';', ei+1) end, idx+1 from cte where ei + 1 <= length(col)
)
,coll(no, col, sstr, idx, newstr) as
(
select
a.no, a.col, a.sstr, a.idx,
-- when a substitute is not found in t2, use the same input substring (eg. group:6785)
case when t2.tname is null then a.sstr else t2.tname end
from
(select cte.*, substr(col, si, ei-si) as sstr from cte) a
-- we don't want to miss if there is no substitute available in t2 for a substring
left outer join
t2
on (a.sstr = t2.nid)
)
select no, col, listagg(newstr, ';') within group (order by no, col, idx) from coll
group by no, col;

Get every combination of sort order and value of a csv

If I have a string with numbers separated by commas, like this:
Declare #string varchar(20) = '123,456,789'
And would like to return every possible combination + sort order of the values by doing this:
Select Combination FROM dbo.GetAllCombinations(#string)
Which would in result return this:
123
456
789
123,456
456,123
123,789
789,123
456,789
789,456
123,456,789
123,789,456
456,789,123
456,123,789
789,456,123
789,123,456
As you can see not only is every combination returned, but also each combination+sort order as well. The example shows only 3 values separated by commas, but should parse any amount--Recursive.
The logic needed would be somewhere in the realm of using a WITH CUBE statement, but the problem with using WITH CUBE (in a table structure instead of CSV of course), is that it won't shuffle the order of the values 123,456 456,123 etc., and will only provide each combination, which is only half of the battle.
Currently I have no idea what to try. If someone can provide some assistance it would be appreciated.
I use a User Defined Table-valued Function called split_delimiter that takes 2 values: the #delimited_string and the #delimiter_type.
CREATE FUNCTION [dbo].[split_delimiter](#delimited_string VARCHAR(8000), #delimiter_type CHAR(1))
RETURNS TABLE AS
RETURN
WITH cte10(num) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,cte100(num) AS
(
SELECT 1
FROM cte10 t1, cte10 t2
)
,cte10000(num) AS
(
SELECT 1
FROM cte100 t1, cte100 t2
)
,cte1(num) AS
(
SELECT TOP (ISNULL(DATALENGTH(#delimited_string),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM cte10000
)
,cte2(num) AS
(
SELECT 1
UNION ALL
SELECT t.num+1
FROM cte1 t
WHERE SUBSTRING(#delimited_string,t.num,1) = #delimiter_type
)
,cte3(num,[len]) AS
(
SELECT t.num
,ISNULL(NULLIF(CHARINDEX(#delimiter_type,#delimited_string,t.num),0)-t.num,8000)
FROM cte2 t
)
SELECT delimited_item_num = ROW_NUMBER() OVER(ORDER BY t.num)
,delimited_value = SUBSTRING(#delimited_string, t.num, t.[len])
FROM cte3 t;
Using that I was able to parse the CSV to a table and join it back to itself multiple times and use WITH ROLLUP to get the permutations you are looking for.
WITH Numbers as
(
SELECT delimited_value
FROM dbo.split_delimiter('123,456,789',',')
)
SELECT CAST(Nums1.delimited_value AS VARCHAR)
,ISNULL(CAST(Nums2.delimited_value AS VARCHAR),'')
,ISNULL(CAST(Nums3.delimited_value AS VARCHAR),'')
,CAST(Nums4.delimited_value AS VARCHAR)
FROM Numbers as Nums1
LEFT JOIN Numbers as Nums2
ON Nums2.delimited_value not in (Nums1.delimited_value)
LEFT JOIN Numbers as Nums3
ON Nums3.delimited_value not in (Nums1.delimited_value, Nums2.delimited_value)
LEFT JOIN Numbers as Nums4
ON Nums4.delimited_value not in (Nums1.delimited_value, Nums2.delimited_value, Nums3.delimited_value)
GROUP BY CAST(Nums1.delimited_value AS VARCHAR)
,ISNULL(CAST(Nums2.delimited_value AS VARCHAR),'')
,ISNULL(CAST(Nums3.delimited_value AS VARCHAR),'')
,CAST(Nums4.delimited_value AS VARCHAR) WITH ROLLUP
If you will potentially have more than 3 or 4, you'll want to expand your code accordingly.

Can you create nested WITH clauses for Common Table Expressions?

WITH y AS (
WITH x AS (
SELECT * FROM MyTable
)
SELECT * FROM x
)
SELECT * FROM y
Does something like this work? I tried it earlier but I couldn't get it to work.
While not strictly nested, you can use common table expressions to reuse previous queries in subsequent ones.
To do this, the form of the statement you are looking for would be
WITH x AS
(
SELECT * FROM MyTable
),
y AS
(
SELECT * FROM x
)
SELECT * FROM y
You can do the following, which is referred to as a recursive query:
WITH y
AS
(
SELECT x, y, z
FROM MyTable
WHERE [base_condition]
UNION ALL
SELECT x, y, z
FROM MyTable M
INNER JOIN y ON M.[some_other_condition] = y.[some_other_condition]
)
SELECT *
FROM y
You may not need this functionality. I've done the following just to organize my queries better:
WITH y
AS
(
SELECT *
FROM MyTable
WHERE [base_condition]
),
x
AS
(
SELECT *
FROM y
WHERE [something_else]
)
SELECT *
FROM x
With does not work embedded, but it does work consecutive
;WITH A AS(
...
),
B AS(
...
)
SELECT *
FROM A
UNION ALL
SELECT *
FROM B
EDIT
Fixed the syntax...
Also, have a look at the following example
SQLFiddle DEMO
These answers are pretty good, but as far as getting the items to order properly, you'd be better off looking at this article
http://dataeducation.com/dr-output-or-how-i-learned-to-stop-worrying-and-love-the-merge
Here's an example of his query.
WITH paths AS (
SELECT
EmployeeID,
CONVERT(VARCHAR(900), CONCAT('.', EmployeeID, '.')) AS FullPath
FROM EmployeeHierarchyWide
WHERE ManagerID IS NULL
UNION ALL
SELECT
ehw.EmployeeID,
CONVERT(VARCHAR(900), CONCAT(p.FullPath, ehw.EmployeeID, '.')) AS FullPath
FROM paths AS p
JOIN EmployeeHierarchyWide AS ehw ON ehw.ManagerID = p.EmployeeID
)
SELECT * FROM paths order by FullPath
we can create nested cte.please see the below cte in example
;with cte_data as
(
Select * from [HumanResources].[Department]
),cte_data1 as
(
Select * from [HumanResources].[Department]
)
select * from cte_data,cte_data1
I was trying to measure the time between events with the exception of what one entry that has multiple processes between the start and end. I needed this in the context of other single line processes.
I used a select with an inner join as my select statement within the Nth cte. The second cte I needed to extract the start date on X and end date on Y and used 1 as an id value to left join to put them on a single line.
Works for me, hope this helps.
cte_extract
as
(
select ps.Process as ProcessEvent
, ps.ProcessStartDate
, ps.ProcessEndDate
-- select strt.*
from dbo.tbl_some_table ps
inner join (select max(ProcessStatusId) ProcessStatusId
from dbo.tbl_some_table
where Process = 'some_extract_tbl'
and convert(varchar(10), ProcessStartDate, 112) < '29991231'
) strt on strt.ProcessStatusId = ps.ProcessStatusID
),
cte_rls
as
(
select 'Sample' as ProcessEvent,
x.ProcessStartDate, y.ProcessEndDate from (
select 1 as Id, ps.Process as ProcessEvent
, ps.ProcessStartDate
, ps.ProcessEndDate
-- select strt.*
from dbo.tbl_some_table ps
inner join (select max(ProcessStatusId) ProcessStatusId
from dbo.tbl_some_table
where Process = 'XX Prcss'
and convert(varchar(10), ProcessStartDate, 112) < '29991231'
) strt on strt.ProcessStatusId = ps.ProcessStatusID
) x
left join (
select 1 as Id, ps.Process as ProcessEvent
, ps.ProcessStartDate
, ps.ProcessEndDate
-- select strt.*
from dbo.tbl_some_table ps
inner join (select max(ProcessStatusId) ProcessStatusId
from dbo.tbl_some_table
where Process = 'YY Prcss Cmpltd'
and convert(varchar(10), ProcessEndDate, 112) < '29991231'
) enddt on enddt.ProcessStatusId = ps.ProcessStatusID
) y on y.Id = x.Id
),
.... other ctes
Nested 'With' is not supported, but you can always use the second With as a subquery, for example:
WITH A AS (
--WITH B AS ( SELECT COUNT(1) AS _CT FROM C ) SELECT CASE _CT WHEN 1 THEN 1 ELSE 0 END FROM B --doesn't work
SELECT CASE WHEN count = 1 THEN 1 ELSE 0 END AS CT FROM (SELECT COUNT(1) AS count FROM dual)
union all
select 100 AS CT from dual
)
select CT FROM A