SQL to Find Max date Value from each group- SQL Server

SQL to Find Max date Value from each group- SQL Server - sql

So I have 2 tables which iam joining using Inner Join.
Table 1 :
Name
Batch_Date
AcctID
Bob
18-08-11
32
Bob
19-08-11
32
Shawn
18-08-11
42
Shawn
20-08-11
42
Paul
18-08-11
36
Paul
19-08-11
36
Table 2
Code
order_Date
AcctID
1
18-08-11
32
0
NULL
32
0
NULL
42
0
NULL
42
1
18-08-11
36
1
18-08-11
36
So I want to get the name, last batch_date , AcctID from the table 1
and code, order date from table 2.
The challenge for me here is as there are multiple rows of same AcctId in table 2, if for any acctid, the date column is not null, I want to select that date and if date column is null for each row, I want to select the null value for date.
SO resulting dataset should look like below:
Name
Batch_Date
AcctID
Code
Order_Date
Bob
19-08-11
32
1
18-08-11
Shawn
20-08-11
42
0
NULL
Paul
19-08-11
36
1
18-08-11

OK, try this
--Set up your sample data in useable form, skip in your actual solution
with cteT1 as (
SELECT *
FROM (VALUES ('Bob', '18-08-11', 32), ('Bob', '19-08-11', 32)
, ('Shawn', '18-08-11', 42), ('Shawn', '20-08-11', 42)
, ('Paul', '18-08-11', 36), ('Paul', '19-08-11', 36)
) as T1 (CustName, BatchDate, AcctID)
), cteT2 as (
SELECT *
FROM (VALUES (1, '18-08-11', 32), (0, NULL, 32), (0, NULL, 42)
, (0, NULL, 42), (1, '18-08-11', 36), (1, '18-08-11', 36)
) as T2 (OrderCode, OrderDate, AcctID)
)
--Set up the solution - tag the newest of each table
, cteTopBatches as (
SELECT ROW_NUMBER() over (PARTITION BY AcctID ORDER BY BatchDate DESC) as BatchNewness, *
FROM cteT1
), cteTopOrders as (
SELECT ROW_NUMBER() over (PARTITION BY AcctID ORDER BY OrderDate DESC) as OrderNewness, *
FROM cteT2 --NOTE: NULLs sort below actual dates, but you could use COALESCE to force a specific value to use, probably in another CTE for readability
)
--Now combine the 2 tables keeping only the newest of each
SELECT T1.AcctID , T1.CustName , T1.BatchDate , T2.OrderCode , T2.OrderDate
FROM cteTopBatches as T1 INNER JOIN cteTopOrders as T2 ON T1.AcctID = T2.AcctID
WHERE T1.BatchNewness = 1 AND T2.OrderNewness = 1

Related

Select most recent record (with expiration date)

Let's say that we have 2 tables named Records and Opportunities:
Records:
RecordID
CustomerID
CreateDate
777
1
1/1/2021
888
2
1/1/2021
999
1
2/1/2021
Opportunities:
OppID
CustomerID
OppCreateDate
10
1
12/31/2020
11
1
1/10/2021
12
2
2/1/2021
13
1
4/1/2021
14
1
8/5/2025
Desired Output:
RecordID
CustomerID
CreateDate
#Opportunities
777
1
1/1/2021
1
888
2
1/1/2021
1
999
1
2/1/2021
1
As you can see, the Records table provides the first 3 columns of the desired output, and the "#Opportunities" column is created by counting the number of opportunities that happen after the record is created for a given customer.
Two key things to note on this logic:
Only count opportunities when they occur within 6 months of a record.
If another record is created for a customer, only count opportunities for the most recent record.
More specifically, OppID = 11 will get credited to RecordID = 777; 12 to 888; and 13 to 999. 10 and 14 will not get credited to either RecordID.
I wrote the below code, which does not take into account #2 above:
CREATE TABLE #Records
(
RecordID int
, CustomerID int
, CreateDate Date
)
INSERT INTO #Records
VALUES
(777, 1, '2021-01-01')
, (888, 2, '2021-01-31')
, (999, 1, '2021-02-01')
CREATE TABLE #Opportunities
(
OppID int
, CustomerID int
, OppCreateDate Date
)
INSERT INTO #Opportunities
VALUES
(10, 1, '2020-12-31')
, (11, 1, '2021-01-10')
, (12, 2, '2021-02-01')
, (13, 1, '2021-04-01')
, (14, 1, '2025-08-25')
select *
from #Records
select *
from #Opportunities
select rec.*
, (select count(*)
from #Opportunities opp
where rec.CustomerID=opp.CustomerID
and rec.CreateDate<=opp.OppCreateDate --record happened on the same day or before the opportunity
and datediff(month,rec.CreateDate,opp.OppCreateDate) < 6 --opened and created within 6 months
) as [#Opportunities]
from #Records rec
Any suggestions to incorporate #2 above and generate the desired output?

Decide on which #records row is related to an #Opportunities row based on #records.CreateDate
select RecordID, CustomerID, CreateDate, count(*) cnt
from (
select r.RecordID, r.CustomerID, r.CreateDate,
row_number() over(partition by op.OppID order by r.CreateDate desc) rn
from #records r
join #Opportunities op on r.CustomerID = op.CustomerID and datediff(month, r.CreateDate, op.OppCreateDate) < 6 and r.CreateDate <= op.OppCreateDate
) t
where rn = 1
group by RecordID, CustomerID, CreateDate
Returns
RecordID CustomerID CreateDate cnt
777 1 2021-01-01 1
888 2 2021-01-31 1
999 1 2021-02-01 1

Try this:
DECLARE #Records table ( RecordID int, CustomerID int, CreateDate date );
INSERT INTO #Records VALUES
( 777, 1, '2021-01-01' ), ( 888, 2, '2021-01-31' ), ( 999, 1, '2021-02-01' );
DECLARE #Opportunities table ( OppID int, CustomerID int, OppCreateDate date );
INSERT INTO #Opportunities VALUES
( 10, 1, '2020-12-31' )
, ( 11, 1, '2021-01-10' )
, ( 12, 2, '2021-02-01' )
, ( 13, 1, '2021-04-01' )
, ( 14, 1, '2025-08-25' );
SELECT
*
FROM #Records r
OUTER APPLY (
SELECT
COUNT ( * ) AS [#Opportunities]
FROM #Opportunities AS o
WHERE
o.CustomerID = r.CustomerID
AND o.OppCreateDate >= r.CreateDate
AND DATEDIFF ( month, r.CreateDate, o.OppCreateDate ) <= 6
AND o.OppID NOT IN (
SELECT
OppID
FROM #Records AS r2
INNER JOIN #Opportunities AS o2
ON r2.CustomerID = o2.CustomerID
WHERE
r2.CustomerID = o.CustomerID
AND o2.OppCreateDate >= r2.CreateDate
AND r2.RecordID > r.RecordID
)
) AS Opps
ORDER BY
r.RecordID;
RETURNS
+----------+------------+------------+----------------+
| RecordID | CustomerID | CreateDate | #Opportunities |
+----------+------------+------------+----------------+
| 777 | 1 | 2021-01-01 | 1 |
| 888 | 2 | 2021-01-31 | 1 |
| 999 | 1 | 2021-02-01 | 1 |
+----------+------------+------------+----------------+

SQL: Update Column with increment numbers based on 2 Columns

I have 2 Columns and i need to reorder one of them with an Update statement.
Here is an Example:
Date_time---------Priority
20.07.2018 10
21.07.2018 3
21.07.2018 13
21.07.2018 4
22.07.2018 23
23.07.2018 3
23.07.2018 7
And i need to get this:
Date_time---------Priority
20.07.2018 10
21.07.2018 10
21.07.2018 20
21.07.2018 30
22.07.2018 10
23.07.2018 10
23.07.2018 20
I need to change the Priority column based on current order and Date. The new order should be separated by 10... 10, 20, 30, 40, 50...
Can there someone help? Thanks.

you can try below query by using row_number funtion
update A
set Priority= 10*rn
from TableA A inner join
( select date_time, row_number() over(partition by Date_time order by Date_time ) as rn from TableA
) as B
on A.Date_time=B.Date_time

CREATE TABLE #Table1
([Date_time] varchar(10), [Priority] int)
;
INSERT INTO #Table1
([Date_time], [Priority])
VALUES
('20.07.2018', 10),
('21.07.2018', 3),
('21.07.2018', 13),
('21.07.2018', 4),
('22.07.2018', 23),
('23.07.2018', 3),
('23.07.2018', 7)
with cte as
(
SELECT *
,row_number() OVER (
PARTITION BY REPLACE([Date_time], '.', '-') ORDER BY [Priority]
) AS rn
FROM #Table1
) select [Date_time],(10*rn) [Priority] from cte
output
Date_time (No column name)
20.07.2018 10
21.07.2018 10
21.07.2018 20
21.07.2018 30
22.07.2018 10
23.07.2018 10
23.07.2018 20

replace [Table Name] with your table name
;with cte as
(
select DENSE_RANK() over(partition by date_time order by priority)*10 as newPriority,date_time,priority from [Table Name]
)
update [Table Name] set [priority]= newPriority from cte
where [Table Name].[priority]=cte.priority and [Table Name].Date_time=cte.Date_time

Find the most recent record from the table for the given criteria

It may seem like a duplicate question, but all the answers I found on SO didn't help me solve this.
So, I have this database that stores every update on an item. Essentially, when the item is first created, the statusId is 1 and the date it's created. If someone updated the item and changed the status of the item, the statusId for that item is added. For eg. a new row with statusId 2 is added with the current date. And so on and so forth. One example of the table is shown below:
id statusId updatedDate userId authId
1 1 2016-12-20 15:43:17.703 14 14
2 1 2016-12-20 15:54:01.523 14 15
3 2 2016-12-21 16:05:48.157 14 14
4 3 2016-12-21 16:27:58.610 14 15
5 1 2016-12-20 17:16:47.627 14 18
6 1 2016-12-20 17:27:58.930 14 19
7 1 2017-01-18 14:13:35.800 18 20
So, what I want to do next is query the table where the most recent statusId is given. For the table above, the query for statusid = 1 should show the following result:
id statusId updatedDate userId authId
5 1 2016-12-20 17:16:47.627 14 18
6 1 2016-12-20 17:27:58.930 14 19
7 1 2017-01-18 14:13:35.800 18 20
Notice how the list doesn't show for authIds 14 and 15 even though it has status 1 but have different statusId in the later date.
One way I tried doing is the following:
select A1.id, A1.statusId, A1.updatedDate, A1.userId, A1.authId from AuthProgressTracker A1
left join AuthProgressTracker A2
on (A1.authId = A2.authId and A1.updatedDate > A2.updatedDate)
where A2.authId is not null
That didn't show the result I was looking for. I tried another one
SELECT *
FROM AuthProgressTracker T
INNER JOIN (
SELECT id, authId, statusId, MAX(updatedDate) as maxDate FROM AuthProgressTracker GROUP BY authId, statusId, id
) AP
ON AP.id = T.id AND T.updatedDate = AP.maxDate
order by T.id
This didn't produce the desired result either.
What am I missing?
And how can I break down the problems in SQL Server 2012 so that I can learn to figure out the problems like this in the future?

Your problem statement may have lead you a bit astray, because while you want the most recent records, the timestamp may not be how you arrive at your result set. In the query below, I use a subquery which identifies all authId which do not have a statusId other than 1. This then filters the original table to leave you with the results you want.
SELECT t1.*
FROM AuthProgressTracker t1
INNER JOIN
(
SELECT authId
FROM AuthProgressTracker
GROUP BY authId
HAVING SUM(CASE WHEN statusId <> 1 THEN 1 ELSE 0 END) = 0
) t2
ON t1.authId = t2.authId

(You haven't stated what RDBMS you're using, so you'll need to adjust your queries accordingly. E.g. If using mysql, use LIMIT syntax instead of TOP.)
declare #AuthProgressTracker table (
id int,
statusId int,
updatedDate datetime,
userId int,
authId int
)
insert into #AuthProgressTracker
values
(1, 1, '2016-12-20 15:43:17.703', 14, 14),
(2, 1, '2016-12-20 15:54:01.523', 14, 15),
(3, 2, '2016-12-21 16:05:48.157', 14, 14),
(4, 3, '2016-12-21 16:27:58.610', 14, 15),
(5, 1, '2016-12-20 17:16:47.627', 14, 18),
(6, 1, '2016-12-20 17:27:58.930', 14, 19),
(7, 1, '2017-01-18 14:13:35.800', 18, 20)
/* Determine id's of latest status updates per authId */
SELECT MAX(id) as LatestSatus
FROM #AuthProgressTracker
GROUP BY authId
/* Note the above includes row id's you've chosen to exclude, so... */
/* Determine most recent statusId */
SELECT TOP 1 statusId
FROM #AuthProgressTracker
ORDER BY updatedDate DESC
/* Putting it all together */
SELECT i.*
FROM #AuthProgressTrackeri
INNER JOIN (
SELECT MAX(id) as LatestSatus
FROM #AuthProgressTracker
GROUP BY authId
) ls ON
ls.LatestSatus = i.id
WHERE i.statusId = (
SELECT TOP 1 statusId
FROM #AuthProgressTracker
ORDER BY updatedDate DESC
)

GroupBy with respect to record intervals on another table

I prepared a sql fiddle for my question. Here it is There is a working code here. I am asking whether there exists an alternative solution which I did not think.
CREATE TABLE [Product]
([Timestamp] bigint NOT NULL PRIMARY KEY,
[Value] float NOT NULL
)
;
CREATE TABLE [PriceTable]
([Timestamp] bigint NOT NULL PRIMARY KEY,
[Price] float NOT NULL
)
;
INSERT INTO [Product]
([Timestamp], [Value])
VALUES
(1, 5),
(2, 3),
(4, 9),
(5, 2),
(7, 11),
(9, 3)
;
INSERT INTO [PriceTable]
([Timestamp], [Price])
VALUES
(1, 1),
(3, 4),
(7, 2.5),
(10, 3)
;
Query:
SELECT [Totals].*, [PriceTable].[Price]
FROM
(
SELECT [PriceTable].[Timestamp]
,SUM([Value]) AS [TotalValue]
FROM [Product],
[PriceTable]
WHERE [PriceTable].[Timestamp] <= [Product].[Timestamp]
AND NOT EXISTS (SELECT * FROM [dbo].[PriceTable] pt
WHERE pt.[Timestamp] <= [Product].[Timestamp]
AND pt.[Timestamp] > [PriceTable].[Timestamp])
GROUP BY [PriceTable].[Timestamp]
) AS [Totals]
INNER JOIN [dbo].[PriceTable]
ON [PriceTable].[Timestamp] = [Totals].[Timestamp]
ORDER BY [PriceTable].[Timestamp]
Result
| Timestamp | TotalValue | Price |
|-----------|------------|-------|
| 1 | 8 | 1 |
| 3 | 11 | 4 |
| 7 | 14 | 2.5 |
Here, my first table [Product] contains the product values for different timestamps. And second table [PriceTable] contains the prices for different time intervals. A given price is valid until a new price is set. Therefore the price with timestamp 1 is valid for Products with timestamps 1 and 2.
I am trying to get the total number of products with respect to given prices. The SQL on the fiddle produces what I expect.
Is there a smarter way to get the same result?
By the way, I am using SQLServer 2014.

DECLARE #Product TABLE
(
[Timestamp] BIGINT NOT NULL
PRIMARY KEY ,
[Value] FLOAT NOT NULL
);
DECLARE #PriceTable TABLE
(
[Timestamp] BIGINT NOT NULL
PRIMARY KEY ,
[Price] FLOAT NOT NULL
);
INSERT INTO #Product
( [Timestamp], [Value] )
VALUES ( 1, 5 ),
( 2, 3 ),
( 4, 9 ),
( 5, 2 ),
( 7, 11 ),
( 9, 3 );
INSERT INTO #PriceTable
( [Timestamp], [Price] )
VALUES ( 1, 1 ),
( 3, 4 ),
( 7, 2.5 ),
( 10, 3 );
WITH cte
AS ( SELECT * ,
LEAD(pt.[Timestamp]) OVER ( ORDER BY pt.[Timestamp] ) AS [lTimestamp]
FROM #PriceTable pt
)
SELECT cte.[Timestamp] ,
( SELECT SUM(Value)
FROM #Product
WHERE [Timestamp] >= cte.[Timestamp]
AND [Timestamp] < cte.[lTimestamp]
) AS [TotalValue],
cte.[Price]
FROM cte
Idea is to generate intervals from price table like:
1 - 3
3 - 7
7 - 10
and sum up all values in those intervals.
Output:
Timestamp TotalValue Price
1 8 1
3 11 4
7 14 2.5
10 NULL 3
You can simply add WHERE clause if you want to filter out rows where no orders are sold.
Also you can indicate the default value for LEAD window function if you want to close the last interval like:
LEAD(pt.[Timestamp], 1, 100)
and I guess it would be something like this in production:
LEAD(pt.[Timestamp], 1, GETDATE())

I think I've got a query which is easier to read. Does this work for you?
select pt.*,
(select sum(P.Value) from Product P where
P.TimeStamp between pt.TimeStamp and (
--get the next time stamp
select min(TimeStamp)-1 from PriceTable where TimeStamp > pt.TimeStamp
)) as TotalValue from PriceTable pt
--exclude entries with timestamps greater than those in Product table
where pt.TimeStamp < (select max(TimeStamp) from Product)
Very detailed question BTW

You could use a cte
;with cte as
(
select p1.[timestamp] as lowval,
case
when p2.[timestamp] is not null then p2.[timestamp] - 1
else 999999
end hival,
p1.price
from
(
select p1.[timestamp],p1.price,
row_number() over (order by p1.[timestamp]) rn
from pricetable p1 ) p1
left outer join
(select p1.[timestamp],p1.price,
row_number() over (order by p1.[timestamp]) rn
from pricetable p1) p2
on p2.rn = p1.rn + 1
)
select cte.lowval as 'timestamp',sum(p1.value) TotalValue,cte.price
from product p1
join cte on p1.[Timestamp] between cte.lowval and cte.hival
group by cte.lowval,cte.price
order by cte.lowval
It's a lot easier to understand and the execution plan compares favourably with your query (about 10%) cheaper

Need to write SQL Server query to return sum of unique values (based on one column)

My table looks like this:
Supplier Reference Description Total
--------------------------------------------------
smiths BP657869510L NULL 42
smiths BP657869510L NULL 42
smiths BP654669510L No. 5621 13
smiths BP654669510L No. 5621 13
corrigan 15:51 Order 23542 23
corrigan 15:51 Order 23542 23
williams 14015 Block B 19
williams 14015 Block B 19
I would like to write a T-SQL query to
return the list of transactions with each supplier, eliminating duplicate entries based on the Reference column.
return the total sum of transactions with each supplier, again eliminating duplicate entries based on the Reference column.
So the results I would want to return based on the data above would be
Supplier Reference Description Total
---------------------------------------------------
smiths BP657869510L NULL 42
smiths BP654669510L No. 5621 13
corrigan 15:51 Order 23542 23
williams 14015 Block B 19
and for the second requirement:
Supplier Total
---------------------
smiths 55
corrigan 23
williams 19
Is this possible? Please note that values in other columns may differ even though the Reference column contains the same value. It doesn't matter if this occurs, I am only concerned with rows which contain a distinct or unique Reference value.

declare #tempData table
(
supplier nvarchar(20),
reference nvarchar (20),
xDescription nvarchar(20),
total int
);
insert into #tempData
select 'smiths', 'BP657869510L' ,NULL, 42 union all
select 'smiths', 'BP657869510L' ,NULL, 42 union all
select 'smiths', 'BP654669510L' ,'No. 5621', 13 union all
select 'smiths', 'BP654669510L' ,'No. 5621', 13 union all
select 'corrigan', '15:51' ,'Order 23542', 23 union all
select 'corrigan', '15:51' ,'Order 23542', 23 union all
select 'williams', '14015' ,'Block B', 19 union all
select 'williams', '14015' ,'Block B', 19
;
select distinct x.supplier,
SUM(X.total)OVER(PARTITION BY x.supplier )As Total from
(Select a.supplier,a.reference,a.xDescription,a.total from #tempData a
GROUP BY a.supplier,a.reference,a.xDescription,a.total) X
GROUP BY x.supplier,X.total

As per a comment from the OP Total is always the same for Reference, but Description can change. DISTINCT is equivalent to a GROUP BY all the columns in the SELECT
To get the first requirement a distinct is enough, if it's possible to drop the Description column
SELECT DISTINCT
Supplier
, Reference
, Total
FROM myTable
if it's not possible then a NULL, a MAX or something on the same line can be done, in the query below a NULL is returned if there are more then one values for the group, otherwise the single value is outputted
SELECT Supplier
, Reference
, Description = CASE WHEN COUNT(DISTINCT Description) > 1 THEN NULL
ELSE MAX(Description)
END
, Total
FROM myTable
GROUP BY Supplier, Reference, Total
To get the second the above query can be used as a CTE for the main query where a GROUP BY is added, in this case the Description columns is not needed so is dropped.
With dValue AS (
SELECT DISTINCT
Supplier
, Reference
, Total
FROM myTable
)
SELECT Supplier
, SUM(Total) Total
FROM dValue
GROUP BY Supplier
If you have a version of SQLServer where the CTE are not possible the first query can be used as a subquery to get the same result
SELECT Supplier
, SUM(Total) Total
FROM (SELECT DISTINCT Supplier, Reference, Total
FROM myTable) dValue
GROUP BY Supplier

Try below sql
assuming #tempData is your table name.
declare #tempData table
(
supplier nvarchar(20),
reference nvarchar (20),
xDescription nvarchar(20),
total int
);
insert into #tempData
select 'smiths', 'BP657869510L' ,NULL, 42 union all
select 'smiths', 'BP657869510L' ,NULL, 42 union all
select 'smiths', 'BP654669510L' ,'No. 5621', 13 union all
select 'smiths', 'BP654669510L' ,'No. 5621', 13 union all
select 'corrigan', '15:51' ,'Order 23542', 23 union all
select 'corrigan', '15:51' ,'Order 23542', 23 union all
select 'williams', '14015' ,'Block B', 19 union all
select 'williams', '14015' ,'Block B', 19
;
select
a.supplier
, a.reference
, a.xDescription
, a.total
from #tempData a
group by a.supplier
, a.reference
, a.xDescription
, a.total
;
/*
supplier reference xDescription total
-------------------- -------------------- -------------------- -----------
corrigan 15:51 Order 23542 23
smiths BP654669510L No. 5621 13
smiths BP657869510L NULL 42
williams 14015 Block B 19
*/
with cte as
(
select
a.supplier
, a.reference
, a.xDescription
, a.total
from #tempData a
group by a.supplier
, a.reference
, a.xDescription
, a.total
)
select
distinct c.supplier, sum(c.total) over(partition by c.supplier) as total
from cte c
;
/*
supplier total
-------------------- -----------
corrigan 23
smiths 55
williams 19
*/
UPDATE
as requested, the aim for this query is to include Separate record that has the same supplier with different description: example supplier smith
Dense_Rank() will fulfill this request (http://technet.microsoft.com/en-us/library/ms173825(v=sql.90).aspx)
with cte as
(
select
a.supplier
, a.reference
, a.xDescription
, a.total
,dense_rank() over(partition by a.supplier order by a.supplier, a.xDescription) as dRow
from #tempData a
group by a.supplier
, a.reference
, a.xDescription
, a.total
)
select
distinct c.supplier, sum(c.total) over(partition by c.supplier,drow) as total
from cte c
;
/*
supplier total
-------------------- -----------
corrigan 23
smiths 13
smiths 42
williams 19
*/
View All field
with cte as
(
select
a.supplier
, a.reference
, a.xDescription
, a.total
,dense_rank() over(partition by a.supplier order by a.supplier, a.xDescription) as dRow
from #tempData a
group by a.supplier
, a.reference
, a.xDescription
, a.total
)
select
distinct c.supplier, c.reference,c.xDescription, sum(c.total) over(partition by c.supplier,drow) as total
from cte c
;

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL to Find Max date Value from each group- SQL Server - sql

Related

Select most recent record (with expiration date)

SQL: Update Column with increment numbers based on 2 Columns

Find the most recent record from the table for the given criteria

GroupBy with respect to record intervals on another table

Need to write SQL Server query to return sum of unique values (based on one column)

Categories

Resources