SQL: Getting previous record from other table - sql

I want to get the previous record of each record in Table A from Table B.
for easy, below is the table sample data:
drop table if exists #A
drop table if exists #B
CREATE TABLE #A(Name varchar(10), time datetime, value int)
insert into #A values
('A', '2020-03-31 18:00:00', 56),
('A', '2020-03-31 19:00:00', 3),
('B', '2020-03-31 14:00:00', 14),
('C', '2020-03-31 15:00:00', 26)
CREATE TABLE #B(Name varchar(10), time datetime, value int)
insert into #A values
('A', '2020-03-31 21:00:00', 79),
('A', '2020-03-31 17:00:00', 44),
('A', '2020-03-31 14:00:00', 76),
('B', '2020-03-31 18:00:00', 89),
('C', '2020-03-31 11:00:00', 29),
('C', '2020-03-31 08:00:00', 6)
EDIT:
It should include only last previous record from TableB.
Sorry for the confusion. Changed image and sample data also.

I think you want:
select a.name, a.time, a.value
from #a a
union all
select b.name, b.time, b.value
from (select b.*, row_number() over (order by time desc) as seqnum
from #b b
where b.time < (select min(a.time)
from #a a
where a.name = b.name
)
) b
where seqnum = 1
order by name, time;
Here is a db<>fiddle.
EDIT:
If b could have multiple "previous" records, then:
select a.name, a.time, a.value
from #a a
union all
select b.name, b.time, b.value
from (select b.*,
row_number() over (partition by b.name order by b.time desc) as seqnum
from #b b
where b.time < (select min(a.time)
from #a a
where a.name = b.name
)
) b
where seqnum = 1
order by name, time;
Here is a db<>fiddle for this version.

Related

SQL Server : create a column based on the first occurrence of a value in another column

Consider the following table in SQL Server:
I want to write a SQL query to generate the column Indicator. This column should be set to 1 on the first occurrence of Flag = 1 for each category.
For instance, for Category A, column Flag is set to 1 for the dates 1/3/2019, 1/4/2019, 1/5/2019, 1/6/2019. Since 1/3/2019 is the earliest date when Flag was set to 1, the Indicator column for that record should also be set to 1.
What SQL Server query should I write for this?
PS: The figure already shows the desired output for the Indicator column.
Below is the code to generate the table in SQL Server:
CREATE TABLE myTable
(
Category CHAR(1),
Date DATE,
Flag INT
)
INSERT INTO myTable (Category, Date, Flag)
VALUES ('A', '2019-01-01', 0), ('A', '2019-02-01', 0),
('A', '2019-03-01', 1), ('A', '2019-04-01', 1),
('A', '2019-05-01', 1), ('A', '2019-06-01', 1),
('B', '2019-01-01', 0), ('B', '2019-02-01', 0),
('B', '2019-03-01', 0), ('B', '2019-04-01', 0),
('B', '2019-05-01', 1), ('B', '2019-06-01', 1),
('C', '2019-01-01', 0), ('C', '2019-02-01', 0),
('C', '2019-03-01', 0), ('C', '2019-04-01', 1),
('C', '2019-05-01', 1), ('C', '2019-06-01', 1),
('C', '2019-07-01', 1)
One way using a derived table and MIN() to figure out which is the first date for a category that has the flag. Join that back to the original table.
DEMO
SELECT
yt.*
, ISNULL(b.Indicator, 0) AS Indicator
FROM YourTable yt
LEFT JOIN
(SELECT category, MIN(date) AS date, 1 AS Indicator
FROM dbo.YourTable
WHERE Flag = 1
GROUP BY Category) b ON b.Category = yt.Category AND b.date = yt.date
I am thinking of using the min() function as a window function:
select t.*,
(case then t.flag = 1 and
t.date = min(t.date) over (partition by t.category, t.flag)
then 1 else 0
end) as indicator
from myTable t
order by t.Category, t.date
Another way
DEMO
CREATE TABLE myTable
(
Category char(1),
Date date,
Flag int
)
INSERT INTO myTable (Category, Date, Flag) VALUES
('A','2019-01-01',0),
('A','2019-02-01',0),
('A','2019-03-01',1),
('A','2019-04-01',1),
('A','2019-05-01',1),
('A','2019-06-01',1),
('B','2019-01-01',0),
('B','2019-02-01',0),
('B','2019-03-01',0),
('B','2019-04-01',0),
('B','2019-05-01',1),
('B','2019-06-01',1),
('C','2019-01-01',0),
('C','2019-02-01',0),
('C','2019-03-01',0),
('C','2019-04-01',1),
('C','2019-05-01',1),
('C','2019-06-01',1),
('C','2019-07-01',1);
select t.* ,
CASE WHEN T.FLAG=1 AND FIRST_VALUE(T.DATE) OVER (PARTITION BY T.Category ORDER BY t.FLAG desc, t.Date asc)=T.DATE THEN 1
ELSE 0 END Indicator
from myTable t
order by t.Category, t.date

Multiple SQL MAX when items are not in order

I have some data as below:
DECLARE #MyTable AS TABLE
(productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
;
SELECT productname,MAX(test1) from #MyTable group BY productname
a MAX query on test1 column gives
a,5
b,3
but I need to have result as
a,3
b,3
a,5
when I have order by test2
You can solve this by using a trick with row_numbers, so that you assign 2 different row numbers, one for the whole data and one that is partitioned by productname. If you compare the difference between these numbers, you can figure out when product name has changed, and use that to determine the max values for each group.
select productname, max(test1) from (
SELECT *,
row_number() over (order by test2 asc) -
row_number() over (partition by productname order by test2 asc) as GRP
from #MyTable
) X
group by productname, GRP
You can test this in SQL Fiddle
If the test2 column is always a row number without gaps, you can use that too instead of the first row number column. If you need ordering in the data, you'll have to for example to use the max of test1 to do that.
Please check the following SQL Select statement
DECLARE #MyTable AS TABLE (productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
DECLARE #MyTableNew AS TABLE (id int identity(1,1), productName varchar(13), test1 int,test2 int)
insert into #MyTableNew select * from #MyTable
--select * from #MyTableNew
;with cte as (
SELECT
id, productName, test1, test2,
case when (lag(productName,1,'') over (order by id)) = productName then 0 else 1 end ischange
from #MyTableNew
), cte2 as (
select t.*,(select sum(ischange) from cte where id <= t.id) grp from cte t
)
select distinct grp, productName, max(test1) over (partition by grp) from cte2
This is implemented according to the following SQL Server Lag() function tutorial
The Lag() function is used to identify and order the groups in table data
Please try this query
DECLARE #MyTable AS TABLE
(productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
;
SELECT productname,MAX(test1)
from #MyTable
where test1 = test2
group BY productname
union all
SELECT productname,MAX(test1)
from #MyTable
where test1 != test2
group BY productname

Summing up the records as per given conditions

I have a table like below, What I need that for any particular fund and up to any particular date logic will sum the amount value. Let say I need the sum for 3 dates as 01/28/2015,03/30/2015 and 04/01/2015. Then logic will check for up to first date how many records are there in table . If it found more than one record then it'll sum the amount value. Then for next date it'll sum up to the next date but from the previous date it had summed up.
Id Fund Date Amount
1 A 01/20/2015 250
2 A 02/28/2015 300
3 A 03/20/2015 400
4 A 03/30/2015 200
5 B 04/01/2015 500
6 B 04/01/2015 600
I want result to be like below
Id Fund Date SumOfAmount
1 A 02/28/2015 550
2 A 03/30/2015 600
3 B 04/01/2015 1100
Based on your question, it seems that you want to select a set of dates, and then for each fund and selected date, get the sum of the fund amounts from the selected date to the previous selected date. Here is the result set I think you should be expecting:
Fund Date SumOfAmount
A 2015-02-28 550.00
A 2015-03-30 600.00
B 2015-04-01 1100.00
Here is the code to produce this output:
DECLARE #Dates TABLE
(
SelectedDate DATE PRIMARY KEY
)
INSERT INTO #Dates
VALUES
('02/28/2015')
,('03/30/2015')
,('04/01/2015')
DECLARE #FundAmounts TABLE
(
Id INT PRIMARY KEY
,Fund VARCHAR(5)
,Date DATE
,Amount MONEY
);
INSERT INTO #FundAmounts
VALUES
(1, 'A', '01/20/2015', 250)
,(2, 'A', '02/28/2015', 300)
,(3, 'A', '03/20/2015', 400)
,(4, 'A', '03/30/2015', 200)
,(5, 'B', '04/01/2015', 500)
,(6, 'B', '04/01/2015', 600);
SELECT
F.Fund
,D.SelectedDate AS Date
,SUM(F.Amount) AS SumOfAmount
FROM
(
SELECT
SelectedDate
,LAG(SelectedDate,1,'1/1/1900') OVER (ORDER BY SelectedDate ASC) AS PreviousDate
FROM #Dates
) D
JOIN
#FundAmounts F
ON
F.Date BETWEEN DATEADD(DAY,1,D.PreviousDate) AND D.SelectedDate
GROUP BY
D.SelectedDate
,F.Fund
EDIT: Here is alternative to the LAG function for this example:
FROM
(
SELECT
SelectedDate
,ISNULL((SELECT TOP 1 SelectedDate FROM #Dates WHERE SelectedDate < Dates.SelectedDate ORDER BY SelectedDate DESC),'1/1/1900') AS PreviousDate
FROM #Dates Dates
) D
If i change your incorrect sample data to ...
CREATE TABLE TableName
([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
;
INSERT INTO TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '2015-01-28 00:00:00', 250),
(2, 'A', '2015-01-28 00:00:00', 300),
(3, 'A', '2015-03-30 00:00:00', 400),
(4, 'A', '2015-03-30 00:00:00', 200),
(5, 'B', '2015-04-01 00:00:00', 500),
(6, 'B', '2015-04-01 00:00:00', 600)
;
this query using GROUP BY works:
SELECT MIN(Id) AS Id,
MIN(Fund) AS Fund,
[Date],
SUM(Amount) AS SumOfAmount
FROM dbo.TableName t
WHERE [Date] IN ('01/28/2015','03/30/2015','04/01/2015')
GROUP BY [Date]
Demo
Initially i have used Row_number and month function to pick max date of every month and in 2nd cte i did sum of amounts and joined them..may be this result set matches your out put
declare #t table (Id int,Fund Varchar(1),Dated date,amount int)
insert into #t (id,Fund,dated,amount) values (1,'A','01/20/2015',250),
(2,'A','01/28/2015',300),
(3,'A','03/20/2015',400),
(4,'A','03/30/2015',200),
(5,'B','04/01/2015',600),
(6,'B','04/01/2015',500)
;with cte as (
select ID,Fund,Amount,Dated,ROW_NUMBER() OVER
(PARTITION BY DATEDIFF(MONTH, '20000101', dated)ORDER BY dated desc)AS RN from #t
group by ID,Fund,DATED,Amount
),
CTE2 AS
(select SUM(amount)Amt from #t
GROUP BY MONTH(dated))
,CTE3 AS
(Select Amt,ROW_NUMBER()OVER (ORDER BY amt)R from cte2)
,CTE4 AS
(
Select DISTINCT C.ID As ID,
C.Fund As Fund,
C.Dated As Dated
,ROW_NUMBER()OVER (PARTITION BY RN ORDER BY (SELECT NULL))R
from cte C INNER JOIN CTE3 CC ON c.RN = CC.R
Where C.RN = 1
GROUP BY C.ID,C.Fund,C.RN,C.Dated )
select C.R,C.Fund,C.Dated,cc.Amt from CTE4 C INNER JOIN CTE3 CC
ON c.R = cc.R
declare #TableName table([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
declare #Sample table([SampleDate] datetime)
INSERT INTO #TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '20150120 00:00:00', 250),
(2, 'A', '20150128 00:00:00', 300),
(3, 'A', '20150320 00:00:00', 400),
(4, 'A', '20150330 00:00:00', 200),
(5, 'B', '20150401 00:00:00', 500),
(6, 'B', '20150401 00:00:00', 600)
INSERT INTO #Sample ([SampleDate])
values ('20150128 00:00:00'), ('20150330 00:00:00'), ('20150401 00:00:00')
-- select * from #TableName
-- select * from #Sample
;WITH groups AS (
SELECT [Fund], [Date], [AMOUNT], MIN([SampleDate]) [SampleDate] FROM #TableName
JOIN #Sample ON [Date] <= [SampleDate]
GROUP BY [Fund], [Date], [AMOUNT])
SELECT [Fund], [SampleDate], SUM([AMOUNT]) FROM groups
GROUP BY [Fund], [SampleDate]
Explanation:
The CTE groups finds the earliest SampleDate which is later than (or equals to) your
data's date and enriches your data accordingly, thus giving them the group to be summed up in.
After that, you can group on the derived date.

Updating Using Aggregate Function

I am trying to update a column of the table using the below query.. But I get an error
An aggregate may not appear in the set list of an UPDATE statement
Code:
UPDATE Test.dbo.Table1
SET InDate = MIN(b.Date)
FROM
Test.dbo.Table1 a
LEFT OUTER JOIN
Test.dbo.Table2 b
ON
a.ID1 = b.ID2
WHERE b.Code = 'IN';
I want to update the InDate column in my table with the oldest date from Table2 (b.Date) column where (b.code) is 'IN'
What is wrong in here?
You need to put the aggregate in a temp table or subquery and you need an explicit GROUP BY statement.
UPDATE Test.dbo.Table1
SET InDate = min_date
FROM Test.dbo.Table1 c inner join
(SELECT a.id1, MIN(b.Date) min_date
FROM Test.dbo.Table1 a
LEFT OUTER JOIN Test.dbo.Table2 b
ON a.ID1 = b.ID2
Group by a.id1) d
ON c.ID1 = d.ID1
WHERE c.Code = 'IN';
I think this will do what you want. I've removed the aliases to make it as clear as possible:
UPDATE Table1
SET InDate = (
SELECT MIN(Table2.Date)
FROM Table2
WHERE Table1.ID1 = Table2.ID2
AND Table2.Code = 'IN'
)
You could use apply to get the min date and then use that in the update statement:
UPDATE a
SET a.InDate = b.MinBDate
FROM Table1 a
OUTER APPLY
(
SELECT MIN(b.InDate) MinBDate
FROM Table2 b
WHERE b.Id = a.Id
AND b.Code = 'IN'
) b
Maybe this?
UPDATE Test.dbo.Table1
SET InDate = b.Date
FROM
Test.dbo.Table1 a
INNER JOIN (
select
b.ID2,
MIN(b.Date) Date
from Test.dbo.Table2 b
where
WHERE b.Code = 'IN'
group by
b.ID2
) b
ON
a.ID1 = b.ID2
Assuming your data model is something like the following, joining to a derived table should do the trick:
--Data Setup:
DECLARE #Table1 TABLE (ID1 INT, InDate DATETIME)
DECLARE #Table2 TABLE (ID2 INT, ID1 INT, Date DATETIME, Code VARCHAR(12))
INSERT INTO #Table1 (ID1)
VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
INSERT INTO #Table2 (ID2, ID1, Date, Code)
VALUES
(1, 1, '1/1/2014', 'OUT'),
(2, 1, '5/1/2014', 'IN'),
(3, 1, '3/1/2013', 'IN'),
(4, 2, '1/1/2014', 'OUT'),
(5, 2, '1/1/2014', 'IN'),
(6, 3, '1/1/2014', 'IN'),
(7, 4, '1/1/2014', 'IN'),
(8, 5, '1/1/2014', 'IN'),
(9, 6, '2/1/2014', 'OUT'),
(10, 7, '3/1/2014', 'IN'),
(11, 8, '4/1/2014', 'IN'),
(12, 9, '2/1/2014', 'IN'),
(12, 9, '2/1/2014', 'IN'),
(12, 10, '1/2/2014', 'IN'),
(12, 10, '1/3/2014', 'IN'),
(12, 10, '1/4/2014', 'IN'),
(12, 10, '1/1/2014', 'OUT')
--Actual Update:
UPDATE T1
SET InDate = T2.MinDate
FROM #Table1 T1
JOIN (SELECT T2.ID1, MIN(Date) AS MinDate
FROM #Table2 T2
WHERE T2.Code = 'IN'
GROUP BY T2.ID1) T2 ON T2.ID1 = T1.ID1
--Results
SELECT *
FROM #Table1

T-SQL: Paging WITH TIES

I am trying to implement a paging routine that's a little different.
For the sake of a simple example, let's assume that I have a table defined and populated as follows:
DECLARE #Temp TABLE
(
ParentId INT,
[TimeStamp] DATETIME,
Value INT
);
INSERT INTO #Temp VALUES (1, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (1, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (1, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (2, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (2, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (2, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (3, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (3, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (3, '1/1/2013 02:00', 8);
TimeStamp will always be the same interval, e.g. daily data, 1 hour data, 1 minute data, etc. It will not be mixed.
For reporting and presentation purposes, I want to implement paging that:
Orders by TimeStamp
Starts out using a suggested pageSize (say 4), but will automatically adjust to include additional records matching on TimeStamp. In other words, if 1/1/2013 01:00 is included for one ParentId, the suggested pageSize will be overridden and all records for hour 01:00 will be included for all ParentId's. It's almost like the TOP WITH TIES option.
So running this query with pageSize of 4 would return 6 records. There are 3 hour 00:00 and 1 hour 01:00 by default, but because there are more hour 01:00's, the pageSize would be overridden to return all hour 00:00 and 01:00.
Here's what I have so far, and I think I'm close as it works for the first iteration, but sequent queries for the next pageSize+ rows doesn't work.
WITH CTE AS
(
SELECT ParentId, [TimeStamp], Value,
RANK() OVER(ORDER BY [TimeStamp]) AS rnk,
ROW_NUMBER() OVER(ORDER BY [TimeStamp]) AS rownum
FROM #Temp
)
SELECT *
FROM CTE
WHERE (rownum BETWEEN 1 AND 4) OR (rnk BETWEEN 1 AND 4)
ORDER BY TimeStamp, ParentId
The ROW_NUMBER ensures the minimum pageSize is met, but the RANK will include additional ties.
declare #Temp as Table ( ParentId Int, [TimeStamp] DateTime, [Value] Int );
insert into #Temp ( ParentId, [TimeStamp], [Value] ) values
(1, '1/1/2013 00:00', 6),
(1, '1/1/2013 01:00', 7),
(1, '1/1/2013 02:00', 8),
(2, '1/1/2013 00:00', 6),
(2, '1/1/2013 01:00', 7),
(2, '1/1/2013 02:00', 8),
(3, '1/1/2013 00:00', 6),
(3, '1/1/2013 01:00', 7),
(3, '1/1/2013 02:00', 8);
declare #PageSize as Int = 4;
declare #Page as Int = 1;
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select Min( Rnk ) as MinRnk, Max( Rnk ) as MaxRnk
from Alpha
where ( #Page - 1 ) * #PageSize < RowNum and RowNum <= #Page * #PageSize )
select A.*
from Alpha as A inner join
Beta as B on B.MinRnk <= A.Rnk and A.Rnk <= B.MaxRnk
order by [TimeStamp], ParentId;
EDIT:
An alternative query that assigns page numbers as it goes, so that next/previous page can be implemented without overlapping rows:
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select ParentId, [TimeStamp], Value, Rnk, RowNum, 1 as Page, 1 as PageRow
from Alpha
where RowNum = 1
union all
select A.ParentId, A.[TimeStamp], A.Value, A.Rnk, A.RowNum,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then B.Page + 1 else B.Page end,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then 1 else B.PageRow + 1 end
from Alpha as A inner join
Beta as B on B.RowNum + 1 = A.RowNum
)
select * from Beta
option ( MaxRecursion 0 )
Note that recursive CTEs often scale poorly.
I think your strategy of using row_number() and rank() is overcomplicating things.
Just pick the top 4 timestamps from the data. Then choose any timestamps that match those:
select *
from #temp
where [timestamp] in (select top 4 [timestamp] from #temp order by [TimeStamp])