SQL: Analytical Function SUM with Distinct

SQL: Analytical Function SUM with Distinct - sql

Here We have a duplicate ID, Need to get distinct of ID and SUM, So it should be 13 as Amount
I know it is possible to do this by first getting the distinct query and then on top of the query getting the SUM.
Is there a way to achieve this in one single query
with data as
(
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 2 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 3 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 4 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
)
select * , sum(amount) over (partition by name ,status) from data

There might be a better way to do, but here's my take:
with data as
(
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 2 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 3 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 4 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
)
select sum(Amount) from (select distinct * from data) a

Here is my take,
with data as
(
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 2 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 3 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 4 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
)
select sum(Amount) from data d where not exists(select 'x' from data d2 where d2.amount = d.amount and d2.id > d.id)

I'd leverage Snowflake's QUALIFY function to do something along these lines:
with data as
(
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 1 as ID ,'ABC' as Name, 'Paid' as Status, 10 as Amount
union all
select 2 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 3 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
union all
select 4 as ID ,'ABC' as Name, 'Paid' as Status, 1 as Amount
),
filter as (
select *
from data
qualify row_number() over (partition by id order by name) = 1
)
select * , sum(amount) over (partition by name ,status)
from filter;

Related

How to get first row of each status?

I want to get the first row of each status for each id.
There can be multiple rows for each status. So I want to get the first occurrence of each status based on the previous status.
e.g. info_required first occurs at row 2, then it changes to another status pending at row 4, and then info_required again at row 6.
Likewise, status pending first at row 4, then at row 8 since the status changed after row4, it needs to be in the resultset.
Hence below I want to get the row number 1, 2, 4, 6, and 8.
WITH t1 AS (
SELECT 1 AS row, 'A' AS id, 'created' AS status, '2021-05-18 18:30:00'::timestamp AS created_at UNION ALL
SELECT 2 AS row, 'A' AS id, 'info_required' AS status, '2021-05-19 11:30:00'::timestamp AS created_at UNION ALL
SELECT 3 AS row, 'A' AS id, 'info_required' AS status, '2021-05-19 12:00:00'::timestamp AS created_at UNION ALL
SELECT 4 AS row, 'A' AS id, 'pending' AS status, '2021-05-19 12:30:00'::timestamp AS created_at UNION ALL
SELECT 5 AS row, 'A' AS id, 'pending' AS status, '2021-05-20 13:30:00'::timestamp AS created_at UNION ALL
SELECT 6 AS row, 'A' AS id, 'info_required' AS status, '2021-05-20 14:30:00'::timestamp AS created_at UNION ALL
SELECT 7 AS row, 'A' AS id, 'info_required' AS status, '2021-05-20 15:30:00'::timestamp AS created_at UNION ALL
SELECT 8 AS row, 'A' AS id, 'pending' AS status, '2021-05-20 16:30:00'::timestamp AS created_at
)
SELECT *
FROM t1

Using CONDITIONAL_CHANGE_EVENT
WITH cte AS (
SELECT *, CONDITIONAL_CHANGE_EVENT(status) over (partition by id
order by created_at) AS cce
FROM t1
)
SELECT *
FROM cte
QUALIFY ROW_NUMBER() OVER(PARTITION BY id, cce ORDER BY created_at) = 1;
Data preparation:
CREATE TABLE t1 AS
WITH t1 AS (
SELECT 1 AS row_, 'A' AS id, 'created' AS status, '2021-05-18 18:30:00'::timestamp AS created_at UNION ALL
SELECT 2 AS row_, 'A' AS id, 'info_required' AS status, '2021-05-19 11:30:00'::timestamp AS created_at UNION ALL
SELECT 3 AS row_, 'A' AS id, 'info_required' AS status, '2021-05-19 12:00:00'::timestamp AS created_at UNION ALL
SELECT 4 AS row_, 'A' AS id, 'pending' AS status, '2021-05-19 12:30:00'::timestamp AS created_at UNION ALL
SELECT 5 AS row_, 'A' AS id, 'pending' AS status, '2021-05-20 13:30:00'::timestamp AS created_at UNION ALL
SELECT 6 AS row_, 'A' AS id, 'info_required' AS status, '2021-05-20 14:30:00'::timestamp AS created_at UNION ALL
SELECT 7 AS row_, 'A' AS id, 'info_required' AS status, '2021-05-20 15:30:00'::timestamp AS created_at UNION ALL
SELECT 8 AS row_, 'A' AS id, 'pending' AS status, '2021-05-20 16:30:00'::timestamp AS created_at
)
SELECT *
FROM t1;
Cte part:
SELECT *, CONDITIONAL_CHANGE_EVENT(status) over (partition by id
order by created_at) AS cce
FROM t1;

You can use lag() and qualify():
select t.*
from t
qualify lag(status) over (partition by id order by created_at) is distinct from status;

How to find max of counts of a group where count is calculated using a function in SQL?

I have a table made up of customer_id and keyword_id. There are multiple occurance of different combinations of customer_id and keyword_id, and I want to find the highest occurring keyword_id for each customer_id. How should I do that?
Customer_ID . Keyword_ID
1 a
1 a
1 a
1 b
1 b
2 c
2 c
2 c
2 d
Expected Result
Customer_ID . Max_Keyword_ID . Count
1 a 3
2 c 3

You can make use of count and dense_rank to get your expected output. Get the rank =1 to make sure that you are getting the rows where you have maximum occurrences of a given output.
with cte as (
select 1 as customer_id, 'a' as Keyword_ID union all
select 1 as customer_id, 'a' as Keyword_ID union all
select 1 as customer_id, 'a' as Keyword_ID union all
select 1 as customer_id, 'b' as Keyword_ID union all
select 1 as customer_id, 'b' as Keyword_ID union all
select 2 as customer_id, 'c' as Keyword_ID union all
select 2 as customer_id, 'c' as Keyword_ID union all
select 2 as customer_id, 'c' as Keyword_ID union all
select 2 as customer_id, 'd' as Keyword_ID)
SELECT customer_id, Keyword_ID, [COUNT] FROM (
select customer_id, Keyword_ID, count(1) [COUNT],
dENSE_RANK() OVER (PARTITION BY customer_id ORDER BY COUNT(1) DESC) RANKED from cte C
group by customer_id, Keyword_ID ) Z
WHERE Z.RANKED = 1
Output:
customer_id Keyword_ID COUNT
1 a 3
2 c 3

You can try below - using correlated subquery
with cte as
(
select Customer_ID,Keyword_ID,count(Keyword_ID) as cnt
from tablename
group by Customer_ID,Keyword_ID
)
select * from cte a where cnt in (select max(cnt) from cte b where a.Customer_ID=b.Customer_ID )

You can try the following query
select Customer_ID,Keyword_ID,Count(Keyword_ID) as Count from tab group by
Customer_ID,Keyword_ID
Having Count(Keyword_ID)=(
SELECT MAX(mycount)
FROM (
SELECT Keyword_ID, COUNT(Keyword_ID) mycount
FROM tab
GROUP BY Keyword_ID) checkMaxValue)
Click here to view the reference

Another way of doing it using ROW_NUMBER() with PARTITION BY Customer_ID column.
You can try like following.
select *
from
(
select *, row_number() over(partition by Customer_ID order by ct desc) rn
from
(
select Customer_ID , Keyword_ID, count(*) ct
from YOURTABLE
GROUP BY Customer_ID , Keyword_ID
) t
) t1
where rn=1

Count total and count having in the same query

is there a way to get a total count of rows per {id, date} and the count > 1 per {id, date, columnX} in the same query?
For example, having such a table:
id date columnX
1 2017-04-20 a
1 2017-04-20 a
1 2017-04-18 b
1 2017-04-17 c
2 2017-04-20 a
2 2017-04-20 a
2 2017-04-20 c
2 2017-04-19 b
2 2017-04-19 b
2 2017-04-19 b
2 2017-04-19 b
2 2017-04-19 c
As the result, I wanna get the following table:
id date columnX count>1 count_total
1 2017-04-20 a 2 2
2 2017-04-20 a 2 3
2 2017-04-19 b 4 5
I tried to do it with partition by but receive weird results. I've heard Rollup function might be used but it seems like it's applicable only in legacy SQL, which is not the option for me.

If I understand correctly, you can use window functions:
select id, date, columnx, cnt,
(case when cnt > 1 then cnt else 0 end) as cnt_gt_1,
total_cnt
from (select id, date, columnx, count(*) as cnt
sum(count(*)) over (partition by id, date) as total_cnt
from t
group by id, date, columnx
) x
where cnt > 1;

Another possibility:
SELECT
id,
date,
data.columnX columnX,
data.count_ count_bigger_1,
count_total
FROM(
SELECT
id,
date,
ARRAY_AGG(columnX) data,
COUNT(1) count_total
FROM
`your_table_name`
GROUP BY
id, date
),
UNNEST(ARRAY(SELECT AS STRUCT columnX, count(1) count_ FROM UNNEST(data) columnX GROUP BY columnX HAVING count(1) > 1)) data
You can test it with simulated data:
WITH data AS(
SELECT 1 AS id, '2017-04-20' AS date, 'a' AS columnX UNION ALL
SELECT 1 AS id, '2017-04-20' AS date, 'a' AS columnX UNION ALL
SELECT 1 AS id, '2017-04-18' AS date, 'b' AS columnX UNION ALL
SELECT 1 AS id, '2017-04-17' AS date, 'c' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-20' AS date, 'a' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-20' AS date, 'a' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-20' AS date, 'c' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-19' AS date, 'b' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-19' AS date, 'b' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-19' AS date, 'b' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-19' AS date, 'b' AS columnX UNION ALL
SELECT 2 AS id, '2017-04-19' AS date, 'c' AS columnX
)
SELECT
id,
date,
data.columnX columnX,
data.count_ count_bigger_1,
count_total
FROM(
SELECT
id,
date,
ARRAY_AGG(columnX) data,
COUNT(1) count_total
FROM
data
GROUP BY
id, date
),
UNNEST(ARRAY(SELECT AS STRUCT columnX, count(1) count_ FROM UNNEST(data) columnX GROUP BY columnX HAVING count(1) > 1)) data
This solution avoids the analytical function (which can be quite expensive depending on the input) and scales well to large volumes of data.

I recommend you to add into your example two more below rows
1 2017-04-20 x
1 2017-04-20 x
and check what solutions in two previous answers will give you:
It will be something like below:
id date columnX count>1 count_total
1 2017-04-20 a 2 4
1 2017-04-20 x 2 4
2 2017-04-20 a 2 3
2 2017-04-19 b 4 5
Notice two rows for id=1 and date=2017-04-20 and both having count_total=4
I am not sure if this is what you want - even though you might not even considered this scenario in your question
Anyway, I feel that to support more generic case like above your expectation of output should of be like below
Row id date x.columnX x.countX count_total
1 1 2017-04-20 x 2 4
a 2
2 2 2017-04-20 a 2 3
3 2 2017-04-19 b 4 5
where x is repeated field and each value represents respective columnX with its count
Below query does exactly this
#standardSQL
SELECT id, date,
ARRAY(SELECT x FROM UNNEST(x) AS x WHERE countX > 1) AS x,
count_total
FROM (
SELECT id, date, SUM(countX) AS count_total,
ARRAY_AGG(STRUCT<columnX STRING, countX INT64>(columnX, countX) ORDER BY countX DESC) AS X
FROM (
SELECT id, date,
columnX, COUNT(1) countX
FROM `yourTable`
GROUP BY id, date, columnX
)
GROUP BY id, date
HAVING count_total > 1
)
you can play/test it with dummy data from your question
#standardSQL
WITH `yourTable` AS(
SELECT 1 AS id, '2017-04-20' AS date, 'a' AS columnX UNION ALL
SELECT 1, '2017-04-20', 'a' UNION ALL
SELECT 1, '2017-04-20', 'x' UNION ALL
SELECT 1, '2017-04-20', 'x' UNION ALL
SELECT 1, '2017-04-18', 'b' UNION ALL
SELECT 1, '2017-04-17', 'c' UNION ALL
SELECT 2, '2017-04-20', 'a' UNION ALL
SELECT 2, '2017-04-20', 'a' UNION ALL
SELECT 2, '2017-04-20', 'c' UNION ALL
SELECT 2, '2017-04-19', 'b' UNION ALL
SELECT 2, '2017-04-19', 'b' UNION ALL
SELECT 2, '2017-04-19', 'b' UNION ALL
SELECT 2, '2017-04-19', 'b' UNION ALL
SELECT 2, '2017-04-19', 'c'
)
SELECT id, date,
ARRAY(SELECT x FROM UNNEST(x) AS x WHERE countX > 1) AS x,
count_total
FROM (
SELECT id, date, SUM(countX) AS count_total,
ARRAY_AGG(STRUCT<columnX STRING, countX INT64>(columnX, countX) ORDER BY countX DESC) AS X
FROM (
SELECT id, date,
columnX, COUNT(1) countX
FROM `yourTable`
GROUP BY id, date, columnX
)
GROUP BY id, date
HAVING count_total > 1
)

Pivot table with multi values of one column

Everything find with one value for each column, but does it support multi value?
Example for my query:
WITH
INPUT_LIST AS
(SELECT 1 PRODUCT_ID, 1 TYPE_ID, 1000 PRICE FROM DUAL
UNION ALL
SELECT 2 PRODUCT_ID, 1 TYPE_ID, 1500 PRICE FROM DUAL
UNION ALL
SELECT 3 PRODUCT_ID, 2 TYPE_ID, 500 PRICE FROM DUAL
UNION ALL
SELECT 4 PRODUCT_ID, 3 TYPE_ID, 2000 PRICE FROM DUAL
UNION ALL
SELECT 1 PRODUCT_ID, 4 TYPE_ID, 1000 PRICE FROM DUAL
UNION ALL
SELECT 2 PRODUCT_ID, 5 TYPE_ID, 1500 PRICE FROM DUAL
UNION ALL
SELECT 3 PRODUCT_ID, 2 TYPE_ID, 500 PRICE FROM DUAL
UNION ALL
SELECT 2 PRODUCT_ID, 3 TYPE_ID, 2000 PRICE FROM DUAL
)
SELECT * FROM
(SELECT PRODUCT_ID, TYPE_ID, SUM(PRICE) TOTAL FROM INPUT_LIST GROUP BY PRODUCT_ID, TYPE_ID)
PIVOT (SUM(TOTAL) FOR TYPE_ID IN (1 AS "FIRST_TYPE", 2 AS "SECOND_TYPE", 3 AS "THIRD_TYPE", 4 AS "FOURTH_TYPE", 5 AS "FIFTH"))
ORDER BY PRODUCT_ID;
Multi value mean I want to mark TYPE_ID in (3,4,5) to "OTHER_TYPE". Something like:
PIVOT (SUM(TOTAL) FOR TYPE_ID IN (1 AS "FIRST_TYPE", 2 AS "SECOND_TYPE", (3,4,5) AS "OTHER_TYPE"))
I can use other way to query but I want to know can pivot do that?

No PIVOT clause does not have such a feature.
But You can still do a pivot the old fashioned way:
SELECT PRODUCT_ID,
sum( case when type_id = 1 then PRICE end ) As FIRST_TYPE,
sum( case when type_id = 2 then PRICE end ) As SEcOND_TYPE,
sum( case when type_id in ( 3,4,5) then PRICE end ) ANOTHER_TYPE
FROM INPUT_LIST
GROUP BY PRODUCT_ID
ORDER BY PRODUCT_ID;

Just group the types in the sub-query first:
SELECT *
FROM (
SELECT PRODUCT_ID,
CASE
WHEN TYPE_ID IN (1,2)
THEN TYPE_ID
ELSE 3
END AS TYPE_ID,
PRICE
FROM INPUT_LIST
)
PIVOT (
SUM(PRICE) FOR TYPE_ID IN (
1 AS "FIRST_TYPE",
2 AS "SECOND_TYPE",
3 AS "OTHER_TYPE"
)
)
ORDER BY PRODUCT_ID;

Get distinct rows based on priority?

I have a table as below.i am using oracle 10g.
TableA
------
id status
---------------
1 R
1 S
1 W
2 R
i need to get distinct ids along with their status. if i query for distinct ids and their status i get all 4 rows.
but i should get only 2. one per id.
here id 1 has 3 distinct statuses. here i should get only one row based on priority.
first priority is to 'S' , second priority to 'W' and third priority to 'R'.
in my case i should get two records as below.
id status
--------------
1 S
2 R
How can i do that? Please help me.
Thanks!

select
id,
max(status) keep (dense_rank first order by instr('SWR', status)) as status
from TableA
group by id
order by 1
fiddle

select id , status from (
select TableA.*, ROW_NUMBER()
OVER (PARTITION BY TableA.id ORDER BY DECODE(
TableA.status,
'S',1,
'W',2,
'R',3,
4)) AS row_no
FROM TableA)
where row_no = 1

This is first thing i would do, but there may be a better way.
Select id, case when status=1 then 'S'
when status=2 then 'W'
when status=3 then 'R' end as status
from(
select id, max(case when status='S' then 3
when status='W' then 2
when status='R' then 1
end) status
from tableA
group by id
);

To get it done you can write a similar query:
-- sample of data from your question
SQL> with t1(id , status) as (
2 select 1, 'R' from dual union all
3 select 1, 'S' from dual union all
4 select 1, 'W' from dual union all
5 select 2, 'R' from dual
6 )
7 select id -- actual query
8 , status
9 from ( select id
10 , status
11 , row_number() over(partition by id
12 order by case
13 when upper(status) = 'S'
14 then 1
15 when upper(status) = 'W'
16 then 2
17 when upper(status) = 'R'
18 then 3
19 end
20 ) as rn
21 from t1
22 ) q
23 where q.rn = 1
24 ;
ID STATUS
---------- ------
1 S
2 R

select id,status from
(select id,status,decode(status,'S',1,'W',2,'R',3) st from table) where (id,st) in
(select id,min(st) from (select id,status,decode(status,'S',1,'W',2,'R',3) st from table))

Something like this???
SQL> with xx as(
2 select 1 id, 'R' status from dual UNION ALL
3 select 1, 'S' from dual UNION ALL
4 select 1, 'W' from dual UNION ALL
5 select 2, 'R' from dual
6 )
7 select
8 id,
9 DECODE(
10 MIN(
11 DECODE(status,'S',1,'W',2,'R',3)
12 ),
13 1,'S',2,'W',3,'R') "status"
14 from xx
15 group by id;
ID s
---------- -
1 S
2 R
Here, logic is quite simple.
Do a DECODE for setting the 'Priority', then find the MIN (i.e. one with Higher Priority) value and again DECODE it back to get its 'Status'

Using MOD() example with added values:
SELECT id, val, distinct_val
FROM
(
SELECT id, val
, ROW_NUMBER() OVER (ORDER BY id) row_seq
, MOD(ROW_NUMBER() OVER (ORDER BY id), 2) even_row
, (CASE WHEN id = MOD(ROW_NUMBER() OVER (ORDER BY id), 2) THEN NULL ELSE val END) distinct_val
FROM
(
SELECT 1 id, 'R' val FROM dual
UNION
SELECT 1 id, 'S' val FROM dual
UNION
SELECT 1 id, 'W' val FROM dual
UNION
SELECT 2 id, 'R' val FROM dual
UNION -- comment below for orig data
SELECT 3 id, 'K' val FROM dual
UNION
SELECT 4 id, 'G' val FROM dual
UNION
SELECT 1 id, 'W' val FROM dual
))
WHERE distinct_val IS NOT NULL
/
ID VAL DISTINCT_VAL
--------------------------
1 S S
2 R R
3 K K
4 G G

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL: Analytical Function SUM with Distinct - sql

Related

How to get first row of each status?

How to find max of counts of a group where count is calculated using a function in SQL?

Count total and count having in the same query

Pivot table with multi values of one column

Get distinct rows based on priority?

Categories

Resources