SQL: An alternative to Group By approach using Partion By

SQL: An alternative to Group By approach using Partion By - sql

I have a table in a DW system (say AWS SnowFlake):
UPC_CODE A_PRICE A_QTY DATE COMPANY_CODE A_CAT
1001 100.25 2 2021-05-06 1 PB
1001 2122.75 10 2021-05-01 1 PB
1002 212.75 5 2021-05-07 2 PT
1002 3100.75 10 2021-05-01 2 PB
What I am looking for is :
For each UPC_CODE and COMPANY_CODE the latest data should be picked up
So the resultant table should be like below:
UPC_CODE A_PRICE A_QTY DATE COMPANY_CODE A_CAT
1001 100.25 2 2021-05-06 1 PB
1002 212.75 5 2021-05-07 2 PT
Approach: Below SQL string
SELECT UPC_CODE,A_PRICE,A_QTY,MAX(DATE) AS F_DATE,COMPANY_CODE,A_CAT
FROM <table_name>
GROUP BY 1,2,3,5,6
Can I have an alternative approach using partionby()?

Your current GROUP BY query doesn't really do what you have in mind. One canonical approach here uses ROW_NUMBER:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY UPC_CODE, COMPANY_CODE ORDER BY DATE DESC) rn
FROM yourTable
)
SELECT UPC_CODE, A_PRICE, A_QTY, DATE, COMPANY_CODE, A_CAT
FROM cte
WHERE rn = 1;
If you did want to use a GROUP BY approach, here is one way to do that:
SELECT t1.*
FROM yourTable t1
INNER JOIN
(
SELECT UPC_CODE, COMPANY_CODE, MAX(DATE) AS MAX_DATE
FROM yourTable
GROUP BY UPC_CODE, COMPANY_CODE
) t2
ON t2.UPC_CODE = t1.UPC_CODE AND
t2.COMPANY_CODE = t1.COMPANY_CODE AND
t2.MAX_DATE = t1.DATE;

In Snowflake (which your first line suggests), you would use QUALIFY:
SELECT UPC_CODE, A_PRICE, A_QTY, DATE AS F_DATE, COMPANY_CODE, A_CAT
FROM <table_name>
QUALIFY ROW_NUMBER() OVER (PARTITION BYUPC_CODE, A_PRICE, A_QTY, COMPANY_CODE, A_CAT
ORDER BY DATE DESC
) = 1;

Related

SQL Server Find Max and Min in Group

I'm using Microsoft SQL Server and have a table like this:
DATE
ITEM
BUYER
QTY_BUY
2022-01-01
ITEM A
TOMMY
5
2022-01-01
ITEM A
BENNY
3
2022-01-01
ITEM A
ANDY
1
2022-01-01
ITEM A
JOHN
8
2022-01-01
ITEM B
TOMMY
2
2022-01-01
ITEM B
BENNY
10
2022-01-01
ITEM B
ANDY
3
2022-01-01
ITEM B
JOHN
6
2022-01-02
ITEM A
TOMMY
3
2022-01-02
ITEM A
BENNY
0
2022-01-02
ITEM A
ANDY
5
2022-01-02
ITEM A
JOHN
6
I want to show top buyer and min buyer group by date and item, so it will look like:
DATE
ITEM
TOP_BUYER
TOP_QTY
MIN_BUYER
QTY_MIN
2022-01-01
ITEM A
JOHN
8
ANDY
1
2022-01-01
ITEM B
BENNY
10
TOMMY
2
2022-01-02
ITEM A
JOHN
6
BENNY
0
Please help me to do that, I try so many trick but cannot reach it. Thanks in advance

We can handle this requirement using ROW_NUMBER:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY DATE, ITEM ORDER BY QTY_BUY) rn1,
ROW_NUMBER() OVER (PARTITION BY DATE, ITEM ORDER BY QTY_BUY DESC) rn2
FROM yourTable
)
SELECT DATE, ITEM,
MAX(CASE WHEN rn2 = 1 THEN BUYER END) AS TOP_BUYER,
MAX(CASE WHEN rn2 = 1 THEN QTY_BUY END) AS TOP_QTY,
MAX(CASE WHEN rn1 = 1 THEN BUYER END) AS MIN_BUYER,
MAX(CASE WHEN rn1 = 1 THEN QTY_BUY END) AS QTY_MIN
FROM cte
GROUP BY DATE, ITEM
ORDER BY DATE, ITEM;

The solution is to use first_value + partition over
This query was tested inside SQL Server
select distinct [date], Item
, FIRST_VALUE(buyer) OVER (partition by [date], item ORDER BY qty_buy desc) AS Top_Buyer
, FIRST_VALUE(qty_buy) OVER (partition by [date], item ORDER BY qty_buy desc) AS Top_Qty
, FIRST_VALUE(buyer) OVER (partition by [date], item ORDER BY [date], item, qty_buy asc) AS Min_Buyer
, FIRST_VALUE(qty_buy) OVER (partition by [date], item ORDER BY [date], item, qty_buy asc) AS Qty_Min
from testtable

It can also be done using simple group by and outer apply,
see this dbfiddle
select t.bdate,
t.item,
max(tb.buyer) as top_buyer,
max(t.qty) as top_qty,
max(mb.buyer) as min_buyer,
min(t.qty) as qty_min
from test t
outer apply ( select top 1 t2.buyer
from test t2
where t2.bdate = t.bdate
and t2.item = t.item
order by t2.qty desc
) tb
outer apply ( select top 1 t2.buyer
from test t2
where t2.bdate = t.bdate
and t2.item = t.item
order by t2.qty
) mb
group by t.bdate,
t.item
order by t.bdate

Select rows based on one column value exists more than once

I have a table like this
Pcode LogId ExtDate
------------------------------
p123 2 2021-01-02
p342 3 2021-01-16
p456 4 2021-05-02
p456 5 2021-07-26
p634 6 2021-05-02
p764 7 2021-01-18
p764 8 2021-06-25
I am looking for a query which returns only those rows with column one value exists more than one.
So the output should be like this. Which means we are taking only items at which the Pcode exists more than once
Pcode LogId ExtDate
-----------------------------
p456 4 2021-05-02
p456 5 2021-07-26
p764 7 2021-01-18
p764 8 2021-06-25
I tried using dense_rank, but I got stuck here... can't move any further
select
pcode,
LogId,
extdate,
rn
from
(select
pcode,
L.logid,
extdate,
dense_rank() over (partition by pcode order by L.extdate desc) rn
from
kip_project_master P
inner join
kip_report_extraction_log L on L.LogId = P.LogId) tbl

You can try the below - demo here
select pcode,logid,extdate from
(
select *,count(pcode) over(partition by pcode) cnt from t1
)A where cnt>1

maybe ?
DROP TABLE IF EXISTS #yourtable
CREATE TABLE #yourtable(
Pcode VARCHAR(30) NOT NULL
,LogId INT
,ExtDate DATE
);
INSERT INTO #yourtable(Pcode,LogId,ExtDate) VALUES
('p123',2,'2021-01-02'),('p342',3,'2021-01-16'),('p456',4,'2021-05-02')
,('p456',5,'2021-07-26'),('p634',6,'2021-05-02'),('p764',7,'2021-01-18')
,('p764',8,'2021-06-25');
WITH cte AS
(
SELECT Pcode
FROM #yourtable
GROUP BY Pcode
HAVING (COUNT(Pcode) > 1)
)
SELECT yt.Pcode, yt.LogId, yt.ExtDate
FROM yourtable yt
INNER JOIN
cte ON yt.Pcode = cte.Pcode

Assuming another column is unique, just use exists:
select kpm.*
from kip_project_master kpm
where exists (select 1
from kip_project_master kpm2
where kpm2.pcode = kpm.pcode and kpm2.logid <> kpm.logid
)
order by kpm.pcode, kpm.logid;
In particular, this can take advantage of an index on (pcode, logid), which should make it pretty fast.

you can do this like,
select * from yourtable join (select
pcode,count(1) as cnt from yourtable group by
pcode having COUNT(1)>1)b on a.pcode=b.pcode

How to create GROUP BY on min and max date

I have a database table like this
emp_id start-date end_date title location
111 1-JAN-2000 31-DEC-2003 MANAGER NYO
111 1-JAN-2003 31-DEC-2005 MANAGER BOM
111 1-JAN-2006 31-DEC-2007 CFO NYO
111 1-JAN-2008 31-DEC-2015 MANAGER NYO
I have created a SQL code already with GROUP BY and min , max function
select emp_id,min(start_date),max(end_date),title
from table1
group by emp_id,title
What is expect is this:
111 1-JAN-2000 31-DEC-2005 MANAGER
111 1-JAN-2006 31-DEC-2007 CFO
111 1-JAN-2008 31-DEC-2015 MANAGER
What i am getting is:
111 1-JAN-2000 31-DEC-2015 MANAGER
111 1-JAN-2006 31-DEC-2007 CFO

This is a type of gaps-and-islands problem with date-chains. I would suggest using a left join to find where the islands start. Then a cumulative sum and aggregation:
select emp_id, title, min(start_date), max(end_date)
from (select t.*,
sum(case when tprev.emp_id is null then 1 else 0 end) over
(partition by t.emp_id, t.title order by t.start_date) as grouping
from t left join
t tprev
on t.emp_id = tprev.emp_id and
t.title = tprev.title and
t.start_date = tprev.end_date + 1
) t
group by grouping, emp_id, title;

try like below by using window function find the gap and make it the group
with cte1 as
(
select a.*,
row_number()over(partition by emp_id,title order by start-date) rn,
row_number() over(order by start-date) rn1
from table_name a
) select emp_id,
min(start-date),
max(end_date),
max(title)
from cte1 group by emp_id, rn1-rn
demo link

First value in DATE minus 30 days SQL

I have bunch of data out of which I'm showing ID, max date and it's corresponding values (user id, type, ...). Then I need to take MAX date for each ID, substract 30 days and show first date and it's corresponding values within this date period.
Example:
ID Date Name
1 01.05.2018 AAA
1 21.04.2018 CCC
1 05.04.2018 BBB
1 28.03.2018 AAA
expected:
ID max_date max_name previous_date previous_name
1 01.05.2018 AAA 05.04.2018 BBB
I have working solution using subselects, but as I have quite huge WHERE part, refresh takes ages.
SUBSELECT looks like that:
(SELECT MIN(N.name)
FROM t1 N
WHERE N.ID = T.ID
AND (N.date < MAX(T.date) AND N.date >= (MAX(T.date)-30))
AND (...)) AS PreviousName
How'd you write the select?
I'm using TSQL
Thanks

I can do this with 2 CTEs to build up the dates and names.
SQL Fiddle
MS SQL Server 2017 Schema Setup:
CREATE TABLE t1 (ID int, theDate date, theName varchar(10)) ;
INSERT INTO t1 (ID, theDate, theName)
VALUES
( 1,'2018-05-01','AAA' )
, ( 1,'2018-04-21','CCC' )
, ( 1,'2018-04-05','BBB' )
, ( 1,'2018-03-27','AAA' )
, ( 2,'2018-05-02','AAA' )
, ( 2,'2018-05-21','CCC' )
, ( 2,'2018-03-03','BBB' )
, ( 2,'2018-01-20','AAA' )
;
Main Query:
;WITH cte1 AS (
SELECT t1.ID, t1.theDate, t1.theName
, DATEADD(day,-30,t1.theDate) AS dMinus30
, ROW_NUMBER() OVER (PARTITION BY t1.ID ORDER BY t1.theDate DESC) AS rn
FROM t1
)
, cte2 AS (
SELECT c2.ID, c2.theDate, c2.theName
, ROW_NUMBER() OVER (PARTITION BY c2.ID ORDER BY c2.theDate) AS rn
, COUNT(*) OVER (PARTITION BY c2.ID) AS theCount
FROM cte1
INNER JOIN cte1 c2 ON cte1.ID = c2.ID
AND c2.theDate >= cte1.dMinus30
WHERE cte1.rn = 1
GROUP BY c2.ID, c2.theDate, c2.theName
)
SELECT cte1.ID, cte1.theDate AS max_date, cte1.theName AS max_name
, cte2.theDate AS previous_date, cte2.theName AS previous_name
, cte2.theCount
FROM cte1
INNER JOIN cte2 ON cte1.ID = cte2.ID
AND cte2.rn=1
WHERE cte1.rn = 1
Results:
| ID | max_date | max_name | previous_date | previous_name |
|----|------------|----------|---------------|---------------|
| 1 | 2018-05-01 | AAA | 2018-04-05 | BBB |
| 2 | 2018-05-21 | CCC | 2018-05-02 | AAA |
cte1 builds the list of max_date and max_name grouped by the ID and then using a ROW_NUMBER() window function to sort the groups by the dates to get the most recent date. cte2 joins back to this list to get all dates within the last 30 days of cte1's max date. Then it does essentially the same thing to get the last date. Then the outer query joins those two results together to get the columns needed while only selecting the most and least recent rows from each respectively.
I'm not sure how well it will scale with your data, but using the CTEs should optimize pretty well.
EDIT: For the additional requirement, I just added in another COUNT() window function to cte2.

I would do:
select id,
max(case when seqnum = 1 then date end) as max_date,
max(case when seqnum = 1 then name end) as max_name,
max(case when seqnum = 2 then date end) as prev_date,
max(case when seqnum = 2 then name end) as prev_name,
from (select e.*, row_number() over (partition by id order by date desc) as seqnum
from example e
) e
group by id;

TSQL getting max and min date with a seperate but not unique record

example table:
test_date | test_result | unique_ID
12/25/15 | 100 | 50
12/01/15 | 150 | 75
10/01/15 | 135 | 75
09/22/14 | 99 | 50
04/10/13 | 125 | 50
I need to find the first and last test date as well as the test result to match said date by user. So, I can group by ID, but not test result.
SELECT MAX(test_date)[need matching test_result],
MIN(test_date) [need matching test_result],
unique_id
from [table]
group by unique_id
THANKS!

Create TABLE #t
(
test_date date ,
Test_results int,
Unique_id int
)
INSERT INTO #t
VALUES ( '12/25/15',100,50 ),
( '12/01/15',150,75 ),
( '10/01/15',135,75 ),
( '09/22/14',99,50 ),
( '04/10/13',125,50 )
select 'MinTestDate' as Type, a.test_date, a.Test_results, a.Unique_id
from #t a inner join (
select min(test_date) as test_datemin, max(test_date) as test_datemax, unique_id from #t
group by unique_ID) b
on a.test_date = b.test_datemin
union all
select 'MaxTestDate' as Type, a.test_date, a.Test_results, a.Unique_id from #t a
inner join (
select min(test_date) as test_datemin, max(test_date) as test_datemax, unique_id from #t
group by unique_ID) b
on a.test_date = b.test_datemax

I would recommend window functions. The following returns the information on 2 rows per id:
select t.*
from (select t.*,
row_number() over (partition by unique_id order by test_date) as seqnum_asc,
row_number() over (partition by unique_id order by test_date desc) as seqnum_desc
from table t
) t;
For one row, use conditional aggregation (or pivot if you prefer):
select unique_id,
min(test_date), max(case when seqnum_asc = 1 then test_result end),
max(test_date), max(case when seqnum_desc = 1 then test_result end)
from (select t.*,
row_number() over (partition by unique_id order by test_date) as seqnum_asc,
row_number() over (partition by unique_id order by test_date desc) as seqnum_desc
from table t
) t
group by unique_id;

Consider using a combination of self-joins and derived tables:
SELECT t1.unique_id, minTable.MinOftest_date, t1.test_result As Mintestdate_result,
maxTable.MaxOftest_date, t2.test_result As Maxtestdate_result
FROM TestTable AS t1
INNER JOIN
(
SELECT Min(TestTable.test_date) AS MinOftest_date,
TestTable.unique_ID
FROM TestTable
GROUP BY TestTable.unique_ID
) As minTable
ON (t1.test_date = minTable.MinOftest_date
AND t1.unique_id = minTable.unique_id)
INNER JOIN TestTable As t2
INNER JOIN
(
SELECT Max(TestTable.test_date) AS MaxOftest_date,
TestTable.unique_ID
FROM TestTable
GROUP BY TestTable.unique_ID
) AS maxTable
ON t2.test_date = maxTable.MaxOftest_date
AND t2.unique_ID = maxTable.unique_ID
ON minTable.unique_id = maxTable.unique_id;
OUTPUT
unique_id MinOftest_date Mintestdate_result MaxOftest_date Maxtestdate_result
50 4/10/2013 125 12/25/2015 100
75 10/1/2015 135 12/1/2015 150

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL: An alternative to Group By approach using Partion By - sql

In Snowflake (which your first line suggests), you would use QUALIFY: SELECT UPC_CODE, A_PRICE, A_QTY, DATE AS F_DATE, COMPANY_CODE, A_CAT FROM <table_name> QUALIFY ROW_NUMBER() OVER (PARTITION BYUPC_CODE, A_PRICE, A_QTY, COMPANY_CODE, A_CAT ORDER BY DATE DESC ) = 1;

Related

SQL Server Find Max and Min in Group

Select rows based on one column value exists more than once

How to create GROUP BY on min and max date

First value in DATE minus 30 days SQL

TSQL getting max and min date with a seperate but not unique record

Categories

Resources