Select distinct values based on multiple column from table - sql

I am having below dummy table
select * from (
select 'A' as col1, 'B' as col2 from dual
union
select 'B' as col1, 'A' as col2 from dual
union
select 'A' as col1, 'C' as col2 from dual
union
select 'C' as col1, 'A' as col2 from dual
union
select 'A' as col1, 'D' as col2 from dual
)a
which will give output as below
col1 col2
A B
A C
A D
B A
C A
I wants to find the distinct values from that table like below
col1 col2
A B
A C
A D
first row can be A B or B A same as second can be A C or C A
Is it possible??
We got the solution for above problem which is below
select distinct least(col1, col2), greatest(col1, col2)
from the_table;
but if there is more than 2 column, then i wouldn't work
Let us assume the below scenario
Input
col1 col2 col3
A B E
A C E
A D E
B A F
C A E
Output
col1 col2 col3
A B E
A D E
B A F
C A E
then what would be the possible solution ?

Here is one method:
select col1, col2
from t
where col1 <= col2
union all
select col1, col2
from t
where col1 > col2 and
not exists (select 1 from t t2 where t2.col1 = t.col2 and t2.col2 = t.col1);

The following will work for Oracle and Postgres:
select distinct least(col1, col2), greatest(col1, col2)
from the_table;
Online example: http://rextester.com/BZXC69735

select DISTINCT * from (
select 'A' as col1, 'B' as col2 from dual
union
select 'B' as col1, 'A' as col2 from dual
union
select 'A' as col1, 'C' as col2 from dual
union
select 'C' as col1, 'A' as col2 from dual
union
select 'A' as col1, 'D' as col2 from dual
)a

select col1, col2 from t where col1 <= col2
union
select col2, col1 from t where col1 > col2

Related

Ordering within ARRAY_AGG after GROUP BY in BigQuery [duplicate]

This question already has an answer here:
How to create lines from ordered groupbyed points in BigQuery?
(1 answer)
Closed 10 months ago.
I have a BigQuery table:
create or replace table `project.table.mock` as (
select 1 as col0, 'a' as col1, 'x' as col2
union all
select 2 as col0, 'a' as col1, 'y' as col2
union all
select 4 as col0, 'b' as col1, 'z' as col2
union all
select 8 as col0, 'b' as col1, 'X' as col2
union all
select 7 as col0, 'b' as col1, 'Y' as col2
)
Visualization:
I would like to group by column col1, and array_agg the results from col2. I would like to have the elements appearing in each array to be sorted by col0.
I am now at:
select array_agg(col2) as col1arrays from `project.table.mock` group by col1;
which gives me:
The desired output in the second row would be [z, Y, X] (as the row where z appears in col2 has 4 in col0, the row where Y appears in col2 has 7 in col0 and the row where X appears in col2 has 8 in col0, and 4 < 7 < 8.
How can I achieve ordering within array_agg, as described above, in BigQuery?
You can add ORDER BY clause in ARRAY_AGG() function.
SELECT ARRAY_AGG(col2 ORDER BY col1 ASC) AS col1arrays
FROM `project.table.mock`
GROUP BY col1;
https://cloud.google.com/bigquery/docs/reference/standard-sql/aggregate_functions#array_agg
WITH mock as (
select 1 as col0, 'a' as col1, 'x' as col2
union all
select 2 as col0, 'a' as col1, 'y' as col2
union all
select 4 as col0, 'b' as col1, 'z' as col2
union all
select 8 as col0, 'b' as col1, 'X' as col2
union all
select 7 as col0, 'b' as col1, 'Y' as col2
)
select array_agg(col2 ORDER BY col0) as col1arrays from mock group by col1;
output:
+------------+
| col1arrays |
+------------+
| [x,y] |
| [z,Y,X] |
+------------+

Count on case Oracle

WE have below data in oracle database -
col1 col2
Z1 A
Z1 B
Z2 A
Z2 C
Z3 A
Z4 D
I want count on column two in such a way that -
Ouput -
col2 count
A 3 (Z1,Z2,Z3)
B 0 (Dont count if A is already present for record)
C 0
D 1 (Z4)
Best Regards
You can use window function rank() to achieve this.
select col2, count(case when rn = 1 then 1 end) cnt from (
select t.*,
rank() over (partition by col1 order by case when col2 = 'A' then 1 else 2 end) rn
from table t
) group by col2;
The most general solution to your propositions where each key COL1 is counted only in the first occurrence of the key COL2 (in alphabetical order)
WITH tab AS
(
SELECT 'Z1' col1, 'A' col2 FROM dual UNION ALL
SELECT 'Z1' col1, 'B' col2 FROM dual UNION ALL
SELECT 'Z2' col1, 'A' col2 FROM dual UNION ALL
SELECT 'Z2' col1, 'C' col2 FROM dual UNION ALL
SELECT 'Z3' col1, 'A' col2 FROM dual UNION ALL
SELECT 'Z4' col1, 'D' col2 FROM dual
), tab2 as (
select COL1, COL2,
row_number() over (partition by COL1 order by COL2) as rn
from tab)
select COL1, COL2,
case when rn = 1 then 1 else 0 end is_valid
from tab2
order by 1,2
;
COL1 COL2 IS_VALID
---- ---- ----------
Z1 A 1
Z1 B 0
Z2 A 1
Z2 C 0
Z3 A 1
Z4 D 1
The rest is simple group by with a SUM on IS_VALID
select COL2, sum(is_valid) cnt from tab3 -- TAB3 is the above row source
group by COL2
order by 1
COL2 CNT
---- ----------
A 3
B 0
C 0
D 1
Thanks Guys. But I could do this way -
select count(case
when (LISTAGG(col2,'-') WITHIN GROUP (ORDER BY col2)) like '%A%' then 1
else null
end) A,
count(case
when (LISTAGG(col2,'-') WITHIN GROUP (ORDER BY col2)) = 'B' then 1
else null
end) B,
count(case
when (LISTAGG(col2,'-') WITHIN GROUP (ORDER BY col2)) = 'C' then 1
else null
end) C,
count(case
when (LISTAGG(col2,'-') WITHIN GROUP (ORDER BY col2)) = 'D' then 1
else null
end) D
from T
GROUP BY col1
Thanks for your replies
Assume your table name is table_name, One way to do it is using this:
WITH table_a AS
(
SELECT DISTINCT col1
FROM table_name
WHERE col2 = 'A'
)
SELECT col2,
SUM(CASE WHEN col1 IN (SELECT col1 FROM table_a)
THEN DECODE(col2, 'A', 1, 0)
ELSE 1 END
) count
FROM table_name
GROUP BY col2
ORDER BY col2;
Tested ok:
WITH table_name AS
(
SELECT 'Z1' col1, 'A' col2 FROM dual UNION ALL
SELECT 'Z1' col1, 'B' col2 FROM dual UNION ALL
SELECT 'Z2' col1, 'A' col2 FROM dual UNION ALL
SELECT 'Z2' col1, 'C' col2 FROM dual UNION ALL
SELECT 'Z3' col1, 'A' col2 FROM dual UNION ALL
--SELECT 'Z4' col1, 'B' col2 FROM dual UNION ALL
SELECT 'Z4' col1, 'D' col2 FROM dual
)
, table_a AS
(
SELECT DISTINCT col1
FROM table_name
WHERE col2 = 'A'
)
SELECT col2,
SUM(CASE WHEN col1 IN (SELECT col1 FROM table_a)
THEN DECODE(col2, 'A', 1, 0)
ELSE 1 END
) count
FROM table_name
GROUP BY col2
ORDER BY col2;
You want to count each record where either col2 is 'A' or no 'A' record exists for col1.
select
col2,
count(
case
when col2 = 'A' or col1 not in (select col1 from table_name where col2 = 'A') then 1
end) as cnt
from table_name
group by col2;
select col2, count(case when col2 = col3 then 'x' end) as ct
from ( select col2, min(col2) over (partition by col1) as col3
from your_table
)
group by col2
order by col2 -- if needed
;
Explanation:
There is an inner query (a.k.a. "subquery") which returns one row for each row in the original table. It returns col2 as is, and an additional (new) column, labeled col3. col3 is calculated as the "first" or min() value of col2 (in alphabetical order) for all the rows in the original table that have the same value in col1 as the current row does. This is a typical example of an analytic function; partition by col1 is similar to group by col1 but it returns all the rows in the group (all the original rows from the original table) instead of one row per group, as would an aggregate function.
To see what the inner query does by itself, select it and run it in your favorite front-end. You may add col1 to the select in the inner query - that will make what's going on in this query even clearer. You'll get the initial table, with one more column, col3, that shows the "min" col2 for each value of col1. I didn't include col1 in the subquery because I don't need it, but add it back to see what the subquery really does.
Then in the outer query I take the results from the inner query and I group by col2. For each col2 I count just how many times it is equal to the "min" value of col2 for the corresponding col1 value. That's what the case expression does in the count() function; when col2 is not equal to col3, then case returns null (by default) so the expression - and therefore the row - is not counted.
I should add that the query written this way assumes there are no duplicate (col1, col2) rows in the original table. If there are, then the inner subquery should select from a sub-subquery; line 3 of my code should be
from (select distinct col1, col2 from your_table)
Use the below script:
SELECT A.COL2, NVL(B.CNT, 0) AS CNT
FROM (SELECT DISTINCT COL2 FROM TET) A
LEFT JOIN (SELECT COL2, COUNT(COL2) AS CNT
FROM (SELECT SUBSTR(F, 1, INSTR(F, ',') - 1) AS COL2,
ROW_NUMBER() OVER(PARTITION BY SUBSTR(F, 1, INSTR(F, ',') - 1) ORDER BY SUBSTR(F, 1, INSTR(F, ',') - 1)) AS U
FROM (SELECT COL1,
LISTAGG(COL2, ',') WITHIN GROUP(ORDER BY COL2) || ',' AS F
FROM TET
GROUP BY COL1)) A
GROUP BY COL2) B
ON A.COL2 = B.COL2
ORDER BY A.COL2;

Cumulative string concatenation

I have a requirement where I have to show data in cumulative concatenation style, just like running total by group.
Sample data
Col1 Col2
1 a
1 b
2 c
2 d
2 e
Expected output:
Col1 Col2
1 a
1 b,a
2 c
2 d,c
2 e,d,c
The concatenation needs to be broken down by Col1. Any help regarding how to get this result by Oracle SQL will be appreciated.
Assuming something on the way you need to order, this can be a solution, based on Hierarchical Queries:
with test as
(
select 1 as col1, 'a' as col2 from dual union all
select 1 as col1, 'b' as col2 from dual union all
select 2 as col1, 'c' as col2 from dual union all
select 2 as col1, 'd' as col2 from dual union all
select 2 as col1, 'e' as col2 from dual
)
select col1, col2
from (
select col1 AS col1, sys_connect_by_path(col2, ',') AS col2, connect_by_isleaf leaf
from (
select row_number() over (order by col1 asc, col2 desc) as num, col1, col2
from test
)
connect by nocycle prior col1 = col1 and prior num = num -1
)
where leaf = 1
order by col1, col2
Try:
WITH d AS (
select col1, col2,
row_number() over (partition by col1 order by col2) as x
from tab_le
),
d1( col1, col2, x, col22) as (
SELECT col1, col2, x, col2 col22 FROM d WHERE x = 1
UNION ALL
SELECT d.col1, d.col2, d.x, d.col2 || ',' || d1.col22
FROM d
JOIN d1 ON (d.col1 = d1.col1 AND d.x = d1.x + 1)
)
SELECT * FROM d1
order by 1,2;
I'm not sure you can do this with listagg as it doesn't seem to support windowing clauses. If you're on 11g or higher you can use recursive subquery factoring to achieve your result.
with your_table (col1, col2) as (
select 1, 'a' from dual
union all select 1, 'b' from dual
union all select 2, 'c' from dual
union all select 2, 'd' from dual
union all select 2, 'e' from dual
), t as (
select col1, col2, row_number() over (partition by col1 order by col2) as rn
from your_table
), r (col1, col2, rn) as (
select col1, col2, rn
from t
where rn = 1
union all
select r.col1, t.col2 ||','|| r.col2, t.rn
from r
join t on t.col1 = r.col1 and t.rn = r.rn + 1
)
select col1, col2
from r
order by col1, rn;
COL1 COL2
---------- --------------------
1 a
1 b,a
2 c
2 d,c
2 e,d,c
The your_table CTE is just to mimic your base data. The t CTE adds a row_number() analytic column to provide a sequence for the next part. The interesting part is the r recursive CTE. The anchor member starts with the first row (according to rn from the previous CTE). The recursive member then finds the next row (against according to rn) for that col1, and for that it concatenates the current col2 with the previous one, which may itself already be a concatenation.

How to select rows with column containing provided values

Hi all consider following as my table structure
Col1 Col2 Col3
A 1 Aa
A 2 Bb
A 1 Aa
A 4 Bb
B 2 Bb
C 1 Aa
C 5 Bb
D 3 Aa
As you can see Col3 contains distint values of Aa and Bb.
I am trying to write a query which return only rows with Col1 having value Aa and Bb (Both) or Aa(Alone).
Point is to remove those rows which have only have Bb associated with distinct Col1 value to it.
Example - For Col1 Distinct value of A should have Aa and Bb / Aa in corresponding Col3. This requirement is violated by value of B in Col1, hence result set should not have rows associated with B.
Expected output -
Col1 Col2 Col3
A 1 Aa
A 2 Bb
A 1 Aa
A 4 Bb
C 1 Aa
C 5 Bb
D 3 Aa
SELECT *
FROM TableName T
WHERE EXISTS ( SELECT 1
FROM TableName
WHERE T.Col1 = Col1
AND Col3 = 'Aa')
One other approach is to use intersect and union.
Fiddle with sample data
select * from t where col1 in (
select col1 from t where col3 = 'Aa'
intersect
select col1 from t where col3 = 'Bb'
union
select col1 from t where col3 = 'Aa')
Select table1.Col1, table1.Col2, table1.Col3
From Table1
join (SELECT Col1
,SUM(case when col3 = 'Aa' then 1 when Col3 = 'Bb' then 0 end) AS [Counting]
FROM [dbo].[Table1]
group by Col1
having SUM(case when col3 = 'Aa' then 1 when Col3 = 'Bb' then 0 end) > 0) keep on table1.Col1 = keep.Col1
Here's another option, using your values instead of a table.:
Select
T.*
From
(Select
'A' As Col1, 1 As Col2, 'Aa' As Col3
Union All
Select
'A', 2, 'Bb'
Union All
Select
'A',1,'Aa'
Union All
Select
'A',4,'Bb'
Union All
Select
'B',2,'Bb'
Union All
Select
'C',1,'Aa'
Union All
Select
'C',5,'Bb'
Union All
Select
'D',3,'Aa'
) T
Inner Join
(Select
Col1
,Sum(Case When Col3 = 'Aa' Then 1 Else 0 End) As CountAa
From
(Select
'A' As Col1, 1 As Col2, 'Aa' As Col3
Union All
Select
'A', 2, 'Bb'
Union All
Select
'A',1,'Aa'
Union All
Select
'A',4,'Bb'
Union All
Select
'B',2,'Bb'
Union All
Select
'C',1,'Aa'
Union All
Select
'C',5,'Bb'
Union All
Select
'D',3,'Aa'
) T2
Group By
Col1
) T3 On T.Col1 = T3.Col1
Where
T3.CountAa > 0
Simplified with a table, thats:
Select
T.*
From
YourTable T
Inner Join
(Select
Col1
,Sum(Case When Col3 = 'Aa' Then 1 Else 0 End) As CountAa
From
YourTable T2
Group By
Col1
) T3 On T.Col1 = T3.Col1
Where
T3.CountAa > 0
The nice thing about this method is that you can add in lots of conditions in that case statement on a more complicated data set.
Depending on what your actual data looks like, row_number() may be another possibility:
select Col1, Col2, Col3
from (
select *, i = row_number() over(partition by Col1 order by Col1, Col3)
from TableName
) a
where i > 1 or Col3 like 'Aa'

Select records where all rows have same value in two columns

Here is my sample table
Col1 Col2
A 1
B 1
A 1
B 2
C 3
I want to be able to select distinct records where all rows have the same value in Col1 and Col2. So my answer should be
Col1 Col2
A 1
C 3
I tried
SELECT Col1, Col2 FROM Table GROUP BY Col1, Col2
This gives me
Col1 Col2
A 1
B 1
B 2
C 3
which is not the result I am looking for. Any tips would be appreciated.
Try this out:
SELECT col1, MAX(col2) aCol2 FROM t
GROUP BY col1
HAVING COUNT(DISTINCT col2) = 1
Output:
| COL1 | ACOL2 |
|------|-------|
| A | 1 |
| C | 3 |
Fiddle here.
Basically, this makes sure that amount the different values for col2 are unique for a given col1.
Try this:
SELECT * FROM MYTABLE
GROUP BY Col1, Col2
HAVING COUNT(*)>1
For example SQLFiddle here
you can try either of the below -
select col1, col2 from
(
select 'A' Col1 , 1 Col2
from dual
union all
select 'B' , 1
from dual
union all
select 'A' ,1
from dual
union all
select 'B' ,2
from dual
)
group by col1, col2
having count(*) >1;
OR
select col1, col2
from
(
select col1, col2, row_number() over (partition by col1, col2 order by col1, col2) cnt
from
(
select 'A' Col1 , 1 Col2
from dual
union all
select 'B' , 1
from dual
union all
select 'A' ,1
from dual
union all
select 'B' ,2
from dual
)
)
where cnt>1;