SQL - Two Columns into One Distinct Ordered Column - sql

If I have a table like this:
Col 1 | Col 2
-------------
A | 1
A | 2
B | 1
C | 1
C | 2
C | 3
How can I write a query to pull one column that looks like this --
Col 1
------
A
1
2
B
1
C
1
2
3

SELECT col1
FROM Some_Table_You_Did_Not_Name
UNION ALL
SELECT col2
FROM Some_Table_You_Did_Not_Name
If the order matters in your example then you want this:
WITH data AS
(
SELECT col1, col2, ROW_NUMBER() OVER (ORDER BY col1, col2) as RN
FROM Some_Table_You_Did_Not_Name
)
SELECT col
FROM (
SELECT DISTINCT col1 as col, RN, 1 as O
FROM data
UNION ALL
SELECT DISTINCT col2 as col, RN, 2 as O
FROM data
) JC_IS_THAT_GUY
ORDER BY RN ASC, O ASC, col ASC

You can use a query like the following:
SELECT Col1
FROM (
SELECT DISTINCT Col1, Col1 AS Col2, 0 AS grp
FROM mytable
UNION ALL
SELECT Col2 AS Col1, Col1 AS Col2, 1 AS grp
FROM mytable) AS t
ORDER BY Col2, grp, Col1
Demo here

There is absolutely no need to do a UNION, UNION ALL or reference the table more than once to unpivot data...
-- if Col2 is always a well ordered sequense like the test data...
SELECT
Col1 = x.Value
FROM
#TestData td
CROSS APPLY ( VALUES (IIF(td.Col2 = 1, td.Col1, NULL)), (CAST(td.Col2 AS CHAR(1))) ) x (Value)
WHERE
x.Value IS NOT NULL;
-- if it isn't...
WITH
cre_Add_RN AS (
SELECT
td.Col1,
td.Col2,
RN = ROW_NUMBER() OVER (PARTITION BY td.Col1 ORDER BY td.Col2)
FROM
#TestData td
)
SELECT
x.Value
FROM
cre_Add_RN arn
CROSS APPLY ( VALUES (IIF(arn.RN = 1, arn.Col1, NULL)), (CAST(arn.Col2 AS CHAR(1))) ) x (Value)
WHERE
x.Value IS NOT NULL;
HTH,
Jason

Related

In bigquery how can I check if at least one elemnt from one array is in another array? [duplicate]

I have a column, like ['11999999999','12999999999','31999999999'] and anothher column, like ['5511777777777','5512888888888','5531999999999']. I want to do a CASE WHEN to return 1, if any item on the first column is in any item of the second column. How to do this?
Consider below approach
select *, if(exists (
select * from t.col1 intersect distinct
select * from t.col2
), 1, 0) as has_overlap
from your_table t
if applied to sample data like in your question - output is
See if following helps:
with sample as (
select array_agg(col1) as col1, array_agg(col2) as col2
from (
select '11999999999' as col1, '123345567' as col2
union all
select '12999999999' as col1 , '31999999999' as col2
union all
select '31999999999' as col1 , '5512888888888' as col2
)
)
select (case when array_length(array((SELECT * FROM UNNEST(sample.col1)) INTERSECT DISTINCT (( SELECT * FROM UNNEST(sample.col2))))) > 0 then true else false end) from sample
results => true (because 31999999999 from col1 is in col2 as well)
You can use JOIN to check an element exisits in both arrays.
WITH sample AS (
SELECT ['11999999999','12999999999','31999999999' ] col1,
['5511777777777','5512888888888','5531999999999', '11999999999'] col2
)
SELECT (SELECT 1 FROM UNNEST(col1) c1 JOIN UNNEST(col2) c2 ON c1 = c2)
FROM sample;
--or
SELECT (SELECT 1 FROM UNNEST(col1) c1, UNNEST(col2) c2 WHERE c1 = c2)
FROM sample;
Query results:
+-----+------+
| Row | f0_ |
+-----+------+
| 1 | 1 |
+-----+------+

How to check if an item of a list is in another item of list on Bigquery?

I have a column, like ['11999999999','12999999999','31999999999'] and anothher column, like ['5511777777777','5512888888888','5531999999999']. I want to do a CASE WHEN to return 1, if any item on the first column is in any item of the second column. How to do this?
Consider below approach
select *, if(exists (
select * from t.col1 intersect distinct
select * from t.col2
), 1, 0) as has_overlap
from your_table t
if applied to sample data like in your question - output is
See if following helps:
with sample as (
select array_agg(col1) as col1, array_agg(col2) as col2
from (
select '11999999999' as col1, '123345567' as col2
union all
select '12999999999' as col1 , '31999999999' as col2
union all
select '31999999999' as col1 , '5512888888888' as col2
)
)
select (case when array_length(array((SELECT * FROM UNNEST(sample.col1)) INTERSECT DISTINCT (( SELECT * FROM UNNEST(sample.col2))))) > 0 then true else false end) from sample
results => true (because 31999999999 from col1 is in col2 as well)
You can use JOIN to check an element exisits in both arrays.
WITH sample AS (
SELECT ['11999999999','12999999999','31999999999' ] col1,
['5511777777777','5512888888888','5531999999999', '11999999999'] col2
)
SELECT (SELECT 1 FROM UNNEST(col1) c1 JOIN UNNEST(col2) c2 ON c1 = c2)
FROM sample;
--or
SELECT (SELECT 1 FROM UNNEST(col1) c1, UNNEST(col2) c2 WHERE c1 = c2)
FROM sample;
Query results:
+-----+------+
| Row | f0_ |
+-----+------+
| 1 | 1 |
+-----+------+

SQL Server : get max of the column2 and column3 value must be 1

I have an output of some part of my stored proedure like this:
col1 col2 col3 col4
--------------------------
2016-05-05 1 2 2
2016-05-05 1 3 32
2016-05-12 2 1 11
2016-05-12 3 1 31
Now I need to get result based on this condition
col2 = 1 and col3 = max or col3 = 1
and col2 = max
The final result should be
col1 col2 col3 col4
-------------------------
2016-05-05 1 3 32
2016-05-12 3 1 31
Not sure if thats the most efficient way , but you can use ROW_NUMBER() :
SELECT * FROM (
SELECT t.*,
ROW_NUMBER() OVER(PARTITION BY t.col1 ORDER BY t.col3 DESC) as rnk,
WHERE t.col2 = 1
UNION ALL
SELECT t.*,
ROW_NUMBER() OVER(PARTITION BY t.col1 ORDER BY t.col2 DESC) as rnk,
WHERE t.col3 = 1) tt
WHERE rnk = 1
This will give you all the records with
(col2=1 and col3=max) or (col3=1 and col2=max)
This is a bit tricky. Your data has no ambiguities, such as duplicate maximuma in col4 or "1" values in both col2 and col3.
The following is a direct translation of the logic in your question:
select t.*
from t
where t.col4 = (select max(t2.col4)
from t t2
where t2.col1 = t.col1 and (t2.col2 = 1 or t2.col3 = 1)
);
Try this. Note if there are more than 1 same max value, then you need all of those in output. And it will work for all scenarios, even when col1 is not in sync with col2 and col3.
I am first finding highest values of col2 and col3 and assigning them value as 1. Then in outer query, I am using your join condition. Demo created for Postgres DB as SQLServer wasn't available.
SQLFiddle Demo
select col1,col2,col3,col4
from
(
select t.*,
RANK() OVER(ORDER BY col3 DESC) as col3_max,
RANK() OVER(ORDER BY col2 DESC) as col2_max
from your_table t
) t1
where
(col2=1 and col3_max=1)
OR
(col3=1 and col2_max=1)
Alternative way:
SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY col1 ORDER BY iif(col2 = 1, col3, col2) DESC) as r
FROM tbl) t
WHERE r = 1

Cumulative string concatenation

I have a requirement where I have to show data in cumulative concatenation style, just like running total by group.
Sample data
Col1 Col2
1 a
1 b
2 c
2 d
2 e
Expected output:
Col1 Col2
1 a
1 b,a
2 c
2 d,c
2 e,d,c
The concatenation needs to be broken down by Col1. Any help regarding how to get this result by Oracle SQL will be appreciated.
Assuming something on the way you need to order, this can be a solution, based on Hierarchical Queries:
with test as
(
select 1 as col1, 'a' as col2 from dual union all
select 1 as col1, 'b' as col2 from dual union all
select 2 as col1, 'c' as col2 from dual union all
select 2 as col1, 'd' as col2 from dual union all
select 2 as col1, 'e' as col2 from dual
)
select col1, col2
from (
select col1 AS col1, sys_connect_by_path(col2, ',') AS col2, connect_by_isleaf leaf
from (
select row_number() over (order by col1 asc, col2 desc) as num, col1, col2
from test
)
connect by nocycle prior col1 = col1 and prior num = num -1
)
where leaf = 1
order by col1, col2
Try:
WITH d AS (
select col1, col2,
row_number() over (partition by col1 order by col2) as x
from tab_le
),
d1( col1, col2, x, col22) as (
SELECT col1, col2, x, col2 col22 FROM d WHERE x = 1
UNION ALL
SELECT d.col1, d.col2, d.x, d.col2 || ',' || d1.col22
FROM d
JOIN d1 ON (d.col1 = d1.col1 AND d.x = d1.x + 1)
)
SELECT * FROM d1
order by 1,2;
I'm not sure you can do this with listagg as it doesn't seem to support windowing clauses. If you're on 11g or higher you can use recursive subquery factoring to achieve your result.
with your_table (col1, col2) as (
select 1, 'a' from dual
union all select 1, 'b' from dual
union all select 2, 'c' from dual
union all select 2, 'd' from dual
union all select 2, 'e' from dual
), t as (
select col1, col2, row_number() over (partition by col1 order by col2) as rn
from your_table
), r (col1, col2, rn) as (
select col1, col2, rn
from t
where rn = 1
union all
select r.col1, t.col2 ||','|| r.col2, t.rn
from r
join t on t.col1 = r.col1 and t.rn = r.rn + 1
)
select col1, col2
from r
order by col1, rn;
COL1 COL2
---------- --------------------
1 a
1 b,a
2 c
2 d,c
2 e,d,c
The your_table CTE is just to mimic your base data. The t CTE adds a row_number() analytic column to provide a sequence for the next part. The interesting part is the r recursive CTE. The anchor member starts with the first row (according to rn from the previous CTE). The recursive member then finds the next row (against according to rn) for that col1, and for that it concatenates the current col2 with the previous one, which may itself already be a concatenation.

Apply the distinct on 2 fields and also fetch the unique data for each columns

According to some weird requirement, i need to select the record where all the output values in both the columns should be unique.
Input looks like this:
col1 col2
1 x
1 y
2 x
2 y
3 x
3 y
3 z
Expected Output is:
col1 col2
1 x
2 y
3 z
or
col1 col2
1 y
2 x
3 z
I tried applying the distinct on 2 fields but that returns all the records as overall they are distinct on both the fields. What we want to do is that if any value is present in the col1, then it cannot be repeated in the col2.
Please let me know if this is even possible and if yes, how to go about it.
Great problem! Armunin has picked up on the deeper structural issue here, this is a recursive enumerable problem description and can only be resolved with a recursive solution - base relational operators (join/union/etc) are not going to get you there. As Armunin cited, one approach is to bring out the PL/SQL, and though I haven't checked it in detail, I'd assume the PL/SQL code will work just fine. However, Oracle is kind enough to support recursive SQL, through which we can build the solution in just SQL:
-- Note - this SQL will generate every solution - you will need to filter for SOLUTION_NUMBER=1 at the end
with t as (
select 1 col1, 'x' col2 from dual union all
select 1 col1, 'y' col2 from dual union all
select 2 col1, 'x' col2 from dual union all
select 2 col1, 'y' col2 from dual union all
select 3 col1, 'x' col2 from dual union all
select 3 col1, 'y' col2 from dual union all
select 3 col1, 'z' col2 from dual
),
t0 as
(select t.*,
row_number() over (order by col1) id,
dense_rank() over (order by col2) c2_rnk
from t),
-- recursive step...
t1 (c2_rnk,ids, str) as
(-- base row
select c2_rnk, '('||id||')' ids, '('||col1||')' str
from t0
where c2_rnk=1
union all
-- induction
select t0.c2_rnk, ids||'('||t0.id||')' ids, str||','||'('||t0.col1||')'
from t1, t0
where t0.c2_rnk = t1.c2_rnk+1
and instr(t1.str,'('||t0.col1||')') =0
),
t2 as
(select t1.*,
rownum solution_number
from t1
where c2_rnk = (select max(c2_rnk) from t1)
)
select solution_number, col1, col2
from t0, t2
where instr(t2.ids,'('||t0.id||')') <> 0
order by 1,2,3
SOLUTION_NUMBER COL1 COL2
1 1 x
1 2 y
1 3 z
2 1 y
2 2 x
2 3 z
You can use a full outer join to merge two numbered lists together:
SELECT col1, col2
FROM ( SELECT col1, ROW_NUMBER() OVER ( ORDER BY col1 ) col1_num
FROM your_table
GROUP BY col1 )
FULL JOIN
( SELECT col2, ROW_NUMBER() OVER ( ORDER BY col2 ) col2_num
FROM your_table
GROUP BY col2 )
ON col1_num = col2_num
Change ORDER BY if you require a different order and use ORDER BY NULL if you're happy to let Oracle decide.
What would be the result if another row of
col1 value as 1 and col2 value as xx ?
A single row is better in this case:
SELECT DISTINCT TO_CHAR(col1) FROM your_table
UNION ALL
SELECT DISTINCT col2 FROM your_table;
My suggestion is something like this:
begin
EXECUTE IMMEDIATE 'CREATE global TEMPORARY TABLE tmp(col1 NUMBER, col2 VARCHAR2(50))';
end;
/
DECLARE
cur_print sys_refcursor;
col1 NUMBER;
col2 VARCHAR(50);
CURSOR cur_dist
IS
SELECT DISTINCT
col1
FROM
ttable;
filtered sys_refcursor;
BEGIN
FOR rec IN cur_dist
LOOP
INSERT INTO tmp
SELECT
col1,
col2
FROM
ttable t1
WHERE
t1.col1 = rec.col1
AND t1.col2 NOT IN
(
SELECT
tmp.col2
FROM
tmp
)
AND t1.col1 NOT IN
(
SELECT
tmp.col1
FROM
tmp
)
AND ROWNUM = 1;
END LOOP;
FOR rec in (select col1, col2 from tmp) LOOP
DBMS_OUTPUT.PUT_LINE('col1: ' || rec.col1 || '|| col2: ' || rec.col2);
END LOOP;
EXECUTE IMMEDIATE 'DROP TABLE tmp';
END;
/
May still need some refining, I am especially not happy with the ROWNUM = 1 part.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE tbl ( col1, col2 ) AS
SELECT 1, 'x' FROM DUAL
UNION ALL SELECT 1, 'y' FROM DUAL
UNION ALL SELECT 2, 'x' FROM DUAL
UNION ALL SELECT 2, 'y' FROM DUAL
UNION ALL SELECT 3, 'x' FROM DUAL
UNION ALL SELECT 3, 'y' FROM DUAL
UNION ALL SELECT 4, 'z' FROM DUAL;
Query 1:
WITH c1 AS (
SELECT DISTINCT
col1,
DENSE_RANK() OVER (ORDER BY col1) AS rank
FROM tbl
),
c2 AS (
SELECT DISTINCT
col2,
DENSE_RANK() OVER (ORDER BY col2) AS rank
FROM tbl
)
SELECT c1.col1,
c2.col2
FROM c1
FULL OUTER JOIN c2
ON ( c1.rank = c2.rank)
ORDER BY COALESCE( c1.rank, c2.rank)
Results:
| COL1 | COL2 |
|------|--------|
| 1 | x |
| 2 | y |
| 3 | z |
| 4 | (null) |
And to address the additional requirement:
What we want to do is that if any value is present in the col1, then it cannot be repeated in the col2.
Query 2:
WITH c1 AS (
SELECT DISTINCT
col1,
DENSE_RANK() OVER (ORDER BY col1) AS rank
FROM tbl
),
c2 AS (
SELECT DISTINCT
col2,
DENSE_RANK() OVER (ORDER BY col2) AS rank
FROM tbl
WHERE col2 NOT IN ( SELECT TO_CHAR( col1 ) FROM c1 )
)
SELECT c1.col1,
c2.col2
FROM c1
FULL OUTER JOIN c2
ON ( c1.rank = c2.rank)
ORDER BY COALESCE( c1.rank, c2.rank)