Self join issue

Self join issue - sql

I have a table called tblAccInfo, below is the table data.
I need output like below.
Input
PolicyNumber BankAc StorageDate VerNum
6003210400 123 2012-01-01 1
6003210400 164 2012-01-03 2
6003210400 860 2012-01-05 3
6004317654 301 2012-02-05 1
6004317654 615 2012-03-01 2
6004317654 253 2012-03-12 3
6004317654 887 2012-04-03 4
OUTPUT
PolicyNumber IntialBankAc IntialSDate VerNum LatestBankAc LatestSDate VerNum
6003210400 123 2012-01-01 1 860 2012-01-05 3
6004317654 301 2012-02-05 1 887 2012-04-03 4
I have tried with below self join, but did not succeeded. Please help me out in this.
Select DISTINCT
P.PolicyNumber,
P.BankAc [IntialBankAc],
P.StorageDate IntialSDate],
P.VerNum,
P1.BankAc [LatestBankAc],
P1.StorageDate [LatestSDate],
P1.VerNum
FROM tblAccInfo P
INNER JOIN tblAccInfo P1
ON P1.PolicyNumber=P.PolicyNumber
AND (P.BankAc<>P1.BankAc AND P.StorageDate<>P1.StorageDate AND P.VerNum<>P1.VerNum)

Try this:
SELECT
T1.PolicyNumber,
T2.BankAc AS IntialBankAc,
T2.StorageDate AS IntialSDate,
T2.VerNum AS InitalVerNum,
T3.BankAc AS LatestBankAc,
T3.StorageDate AS LatestSDate,
T3.Vernum AS LatestVerNum
FROM
(
SELECT
PolicyNumber,
MIN(VerNum) AS MinVerNum,
MAX(VerNum) AS MaxVerNum
FROM tblAccInfo
GROUP BY PolicyNumber
) AS T1
JOIN tblAccInfo AS T2
ON T1.PolicyNumber = T2.PolicyNumber
AND T1.MinVerNum = T2.VerNum
JOIN tblAccInfo AS T3
ON T1.PolicyNumber = T3.PolicyNumber
AND T1.MaxVerNum = T3.VerNum
See it working online: sqlfiddle

DECLARE #x TABLE
(
PolicyNumber VARCHAR(32),
BankAc INT,
StorageDate DATE,
VerNum INT
);
INSERT #x VALUES
('6003210400','123','2012-01-01',1),
('6003210400','164','2012-01-03',2),
('6003210400','860','2012-01-05',3),
('6004317654','301','2012-02-05',1),
('6004317654','615','2012-03-01',2),
('6004317654','253','2012-03-12',3),
('6004317654','887','2012-04-03',4);
WITH x AS
(
SELECT PolicyNumber, BankAc, StorageDate, VerNum,
f = ROW_NUMBER() OVER (PARTITION BY PolicyNumber ORDER BY VerNum),
l = ROW_NUMBER() OVER (PARTITION BY PolicyNumber ORDER BY VerNum DESC)
FROM #x
)
SELECT
x.PolicyNumber,
InitialBankAc = x.BankAc,
InitialSDate = x.StorageDate,
InitialVerNum = x.VerNum,
LatestBankAc = x2.BankAc,
LatestSDate = x2.StorageDate,
LatestVerNum = x2.VerNum
FROM x INNER JOIN x AS x2
ON x.PolicyNumber = x2.PolicyNumber
WHERE x.f = 1 AND x2.l = 1
ORDER BY x.PolicyNumber;

not tested - but should give you the idea. (There may be a more efficient way of doing this - it was just the approach that jumped out at me.)
select initial.policynumber
,initial.initialbankaccoutn
,initial.initialstoragedate
,initial.intialvernum
,final.latestbankaccount
,final.lateststoragedate
,final.latestvernum
from
(select a.policynumber, a.bankaccount as initialbankaccount, a.storagedate as initialstoragedate, a.vernum as initialvernum
from tblAccInfo a1
inner join (select min(storagedate) as storagedate, policynumber
from tblAccInfo
group by policynumber) a
on a.policynumber = a1.policynumber
and a.storagedate = a1.storagedate) initial
inner join
(select b.policynumber, b.bankaccount as latestbankaccount, b.storagedate as lateststoragedate, b.vernum as latestvernum
from tblAccInfo b1
inner join (select min(storagedate) as storagedate, policynumber
from tblAccInfo
group by policynumber) b
on a.policynumber = b1.policynumber
and a.storagedate = b1.storagedate) final
on final.policynumber = initial.policynumber

Related

SQL Server : smallest ROW_NUM in where condition, with subgroup pre-condition

Thanks all in advance! I am trying to describe this as clear as I can.
I got two sub-tables, 1st table retrieves Comfirmed_Date and the 2nd table retrieves Mail_Date with condition Mail_Date >= Comfirmed_Date.
select
a.ID
,g.ROWNUM
,f.CORM_DT
,g.MAIL_DT
from
SOURCE_U a
left join
(select
a.SOURCE_ID
, Max(Cast(b.ATUF_DATE3 as date)) as [CORM_DT]
from
ATTACH_U a
inner join
USERFLD_D b on a.DEST_CK = b.DEST_CK
group by
a.SOURCE_ID) f on f.SOURCE_ID = a.SOURCE_ID
left join
(select
a.SOURCE_ID
, cast(b.MAILED_DT as date) as MAIL_DT
, row_number() over (partition by SOURCE_ID order by CREATE_DT) as ROWNUM
from
ATTACH_U a
left join
LETTER_D b on b.DEST_CK = a.DEST_CK) g on g.SOURCE_ID = a.SOURCE_ID
and g.MAIL_DT >= f.CORM_DT
I need the first line (smallest row_num) for the tables, how can I achieve that?
Original I think I can make condition like
where g.ROWNUM = 1
but because I have the condition on joint table, it does not work for below situations.
ID gROWNUM CORM_DT MAIL_DT
1001 3 2020-10-20 2020-10-22
1001 4 2020-10-20 2020-10-30
1002 2 2020-10-20 2020-10-21
1002 3 2020-10-20 2020-10-23
1002 4 2020-10-20 2020-10-28
1003 1 2020-10-20 2020-10-30
1004 1 2020-10-20 2020-10-21
1004 2 2020-10-20 2020-10-23
1005 4 2020-10-20 2020-10-28
1006 1 2020-10-20 2020-10-30
I only want one line for each ID here.

Try this:
SELECT TOP 1
a.ID
, g.ROWNUM
, f.CORM_DT
, g.MAIL_DT
FROM SOURCE_U a
LEFT JOIN (
SELECT
a.SOURCE_ID
, Max(Cast(b.ATUF_DATE3 as date)) as [CORM_DT]
FROM ATTACH_U a
INNER JOIN USERFLD_D b
ON a.DEST_CK = b.DEST_CK
GROUP BY a.SOURCE_ID
) f
ON f.SOURCE_ID = a.SOURCE_ID
LEFT JOIN (
SELECT
a.SOURCE_ID
, CAST( b.MAILED_DT AS date) AS MAIL_DT
, ROW_NUMBER() OVER( PARTITION BY SOURCE_ID ORDER BY CREATE_DT ) AS ROWNUM
FROM ATTACH_U a
LEFT JOIN LETTER_D b
ON b.DEST_CK = a.DEST_CK
) g
ON g.SOURCE_ID = a.SOURCE_ID
AND g.MAIL_DT >= f.CORM_DT
ORDER BY
g.ROWNUM;

All you need is a window function in your select.
select rows, columns... from (
select dense_rank() over ( partition by a.ID order by MAIL_DT) as rows, columns...
...
)
where rows = 1

SQL - spread previous values from one column into multiple new columns

I have a SQL table of Customer_ID, showing Payments by Year. The first (of many) customer appears like this:
ID Payment Year
112 0 2004
112 0 2005
112 0 2006
112 9592 2007
112 12332 2008
112 9234 2011
112 5400 2012
112 7392 2014
112 8321 2015
Note that some years are missing. I need to create 10 new columns, showing the Payments in the previous 10 years, for each row. The resulting table should look like this:
ID Payment Year T-1 T-2 T-3 T-4 T-5 T-6 T-7 T-8 T-9 T-10
112 0 2004 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
112 0 2005 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL
112 0 2006 0 0 NULL NULL NULL NULL NULL NULL NULL NULL
112 952 2007 0 0 0 NULL NULL NULL NULL NULL NULL NULL
112 1232 2008 952 0 0 0 NULL NULL NULL NULL NULL NULL
112 924 2011 NULL NULL 1232 952 0 0 0 NULL NULL NULL
112 500 2012 924 NULL NULL 1232 952 0 0 0 NULL NULL
112 392 2014 NULL 500 924 NULL NULL 1232 952 0 0 0
112 821 2015 392 NULL 500 924 NULL NULL 1232 952 0 0
I am well aware that this is a large duplication of data, and so seems like a strange thing to do. However, I would still like to do it! (the data is being prepared for a predictive model, in which previous payments (and other info) will be used to predict the current year's payment)
I'm not really sure where to start with this. I have been looking at using pivot, but can't figure out how to get it to select values from a customer's previous year.
I would very much like to do this in SQL. If that is not possible I may be able to copy the table into R - but SQL is my preference.
Any help much appreciated.

You could use lag() if you had full data:
select t.*,
lag(payment, 1) over (partition by id order by year) as t_1,
lag(payment, 2) over (partition by id order by year) as t_2,
. . .
from t;
However, for your situation with missing intermediate years, left join may be simpler:
select t.*,
t1.payment as t_1,
t2.payment as t_2,
. . .
from t left join
t t1
on t1.id = t.id and
t1.year = t.year - 1 left join
t t2
on t1.id = t.id and
t1.year = t.year - 2 left join
. . .;

I thnk your friend will be LAG
Here's an implementation:
Declare #t table (
ID int,
Payment int,
Yr int
)
Insert Into #t Values(112,0,2004)
Insert Into #t Values(112,0,2005)
Insert Into #t Values(112,0,2006)
Insert Into #t Values(112,9592,2007)
Insert Into #t Values(112,12332,2008)
Insert Into #t Values(112,9234,2011)
Insert Into #t Values(112,5400,2012)
Insert Into #t Values(112,7392,2014)
Insert Into #t Values(112,8321,2015)
Insert Into #t Values(113,0,2009)
Insert Into #t Values(113,9234,2011)
Insert Into #t Values(113,5400,2013)
Insert Into #t Values(113,8321,2015)
;with E1(n) as (Select 1 Union All Select 1 Union All Select 1 Union All Select 1 Union All Select 1 Union All Select 1 Union All Select 1 Union All Select 1 Union All Select 1 Union All Select 1)
,E2(n) as (Select 1 From E1 a, E1 b)
,E4(n) as (Select 1 From E2 a, E2 b)
,E5(n) as (Select row_number() over(order by isnull(null,1)) From E4 a, E1 b)
,IDYears as (
Select z.ID, Yr = y.n
From (
Select
Id,
MinYear = min(Yr),
MaxYear = max(Yr)
From #t a
Group By Id
) z
Inner Join E5 y On y.n between z.MinYear and z.MaxYear
)
Select
*,
[t-1] = Lag(B.Payment, 1) Over(Partition By a.ID Order By a.Yr),
[t-2] = Lag(B.Payment, 2) Over(Partition By a.ID Order By a.Yr),
[t-3] = Lag(B.Payment, 3) Over(Partition By a.ID Order By a.Yr),
[t-4] = Lag(B.Payment, 4) Over(Partition By a.ID Order By a.Yr),
[t-5] = Lag(B.Payment, 5) Over(Partition By a.ID Order By a.Yr),
[t-6] = Lag(B.Payment, 6) Over(Partition By a.ID Order By a.Yr),
[t-7] = Lag(B.Payment, 7) Over(Partition By a.ID Order By a.Yr),
[t-8] = Lag(B.Payment, 8) Over(Partition By a.ID Order By a.Yr),
[t-9] = Lag(B.Payment, 9) Over(Partition By a.ID Order By a.Yr),
[t-10] = Lag(B.Payment, 10) Over(Partition By a.ID Order By a.Yr)
From IDYears a
Left Join #t b On a.ID = b.ID and a.Yr = b.Yr
Order By A.ID

Concrete Rows of Id 2 with Id 1 IF Date is Same and All Row Names Should be different in SQL Server 2008R2 and

I have following Data in myRecords Table
Id Date Name Cash
1 11/25/2016 4:23.123 Ramesh 10000
2 11/25/2016 4:23.173 Suresh 15000
1 11/27/2016 5:23.320 Ramesh 30000
2 11/27/2016 5:23.670 Suresh 40000
and I want to create view So I can get data in following Format
Id1 Date1 Name1 Cash1 Id2 Date2 Name2 Cash2
1 11/25/2016 4:23.123 Ramesh 10000 2 11/25/2016 4:23.173 Suresh 15000
1 11/27/2016 5:23.320 Ramesh 30000 2 11/27/2016 5:23.670 Suresh 40000
How can I do it.

If you are doing date and there will always only be 2 records per day you could convert to drop off the time and do a self join:
DECLARE #myRecords AS TABLE (Id INT, DATE DATETIME, Name VARCHAR(20), CASH INT)
INSERT INTO #myRecords VALUES (1,'11/25/2016 4:23','Ramesh',10000),(2,'11/25/2016 4:23','Suresh',15000)
,(1,'11/27/2016 5:23','Ramesh',30000),(2,'11/27/2016 4:23','Suresh',40000)
SELECT
m1.Id as Id1
,m1.Date as Date1
,m1.Name as Name1
,m1.Cash as Cash1
,m2.Id as Id2
,m2.Date as Date2
,m2.Name as Name2
,m2.Cash as Cash2
FROM
#myRecords m1
LEFT JOIN #myRecords m2
ON CAST(m1.DATE AS DATE) = CAST(m2.DATE AS DATE)
AND m1.Id <> m2.Id
WHERE
m1.Id = 1
Then you can also introduce ROW_NUMBER() to figure out whatever order you want then take all of the ODD RowNumbers and SELF JOIN to the Even RowNumbers:
;WITH cte AS (
SELECT
*
,RowNum = ROW_NUMBER() OVER (ORDER BY Date)
FROM
#myRecords
)
SELECT *
FROM
cte c1
LEFT JOIN cte c2
ON c1.RowNum + 1 = c2.RowNum
WHERE
c1.RowNum % 2 <> 0

As long as your Id joining logic is unclear, this will help In this case but you will need to add Id Filter or additional Identity column and row_number() in future I guess.
SELECT
T.*, TT.*
FROM
[Table] AS T
INNER JOIN
[Table] AS TT
ON T.Date = TT.Date

You can use Cross Apply for the required result set.
SELECT [ID],
[DATE],
[NAME],
[CASH],
B.*
FROM #TABLE1 A
CROSS APPLY (SELECT ID AS ID2,
[DATE] AS DATE2,
[NAME] AS NAME2,
[CASH] AS CASH2
FROM #TABLE1 B
WHERE A.ID < B.ID
AND CONVERT(DATE, A.DATE) = CONVERT(DATE, B.DATE))B

This will also return the same result:
select a.id, a.date, a.name, a.cash, b.id as id2, b.date as date2,
b.name as name2, b.cash as cash2
from myTable a
inner join myTable b on a.id+1 = b.id
and cast(a.date as date) <> cast(b.date as date)

How to SUM Only One Time Per UniqueId in SQL?

I have two tables that look roughly like this:
Table A
DocumentId (*is unique) DocumentDate
1 2016-01-01
2 2016-01-01
3 2016-02-01
4 2016-03-01
and Table B
ContractId SnapshotTimeId NetFinanced
1 20160231 300
1 20160331 300
1 20160431 300
2 20160231 450
2 20160331 450
2 20160431 450
3 20160331 500
3 20160431 500
4 20160431 150
I would like the final table to look something like this:
DocumentDate NetFinanced
2016-01-01 750
2016-02-01 500
2016-03-01 150
I have tried the following and it doesn't work:
SELECT A.DocumentDate, SUM(B.NetFinanced)
FROM A
JOIN B on B.ContractId=A.DocumentId
GROUP BY A.DocumentDate
Any ideas? Thanks in advance

you can use distinct
SELECT A.DocumentDate,
SUM(B.NetFinanced)
FROM A
JOIN (SELECT DISTINCT
ContractId,
NetFinanced
FROM B
) B ON B.ContractId = A.DocumentId
GROUP BY A.DocumentDate
the result of this will be different if the NetFinanced amount changes per SnapshotTimeId
if you want the most recent NetFinanced amount, you can use Row_number() to order the values.
SELECT A.DocumentDate,
SUM(B.NetFinanced)
FROM A
JOIN (SELECT ROW_NUMBER() OVER (PARTITION BY ContractId ORDER BY SnapshotTimeId DESC) Rn,
ContractId,
NetFinanced
FROM B
) B ON B.ContractId = A.DocumentId AND B.Rn = 1
GROUP BY A.DocumentDate

You have duplicate values for NetFinanced in TableB, of course the results won't give you what you want. You need to join TableA with the unique values (I assume) of ContractId and NetFinanced columns from TableB:
SELECT A.DocumentDate,
SUM(B.NetFinanced) NetFinanced
FROM dbo.TableA A
INNER JOIN (SELECT DISTINCT ContractId, NetFinanced
FROM dbo.TableB) B
ON A.DocumentId = B.ContractId
GROUP BY A.DocumentDate;

Try Like this
SELECT A.DocumentDate, SUM(B.NetFinanced)
FROM A
JOIN (SELECT MAX(ContractId) ContractId, MAX(SnapshotTimeId)SnapshotTimeId,
MAX(NetFinanced)NetFinanced
FROM B GROUP BY ContractId) B ON B.ContractId = A.DocumentId
GROUP BY A.DocumentDate

SQL MIN Datetime based on first occuranceof a value in another column

This is what I have
ID Name DateTime Value Group
1 Mark 1/1/2010 0 1
2 Mark 1/2/2010 1 1
3 Mark 1/3/2010 0 1
4 Mark 1/4/2010 0 2
40 Mark 1/5/2010 1 2
5 Mark 1/9/2010 1 2
6 Mark 1/6/2010 1 2
7 Kelly 1/1/2010 0 3
8 Kelly 1/2/2010 1 3
9 Kelly 1/3/2010 1 3
10 Nancy 1/4/2010 0 4
11 Nancy 1/5/2010 0 4
12 Nancy 1/6/2010 1 5
13 Nancy 1/7/2010 0 5
What I want is to get the rows per "name" per "group" with minimum datetime after the value becomes 1. From the above example, I would need to get
3 Mark 1/3/2010 0 1
6 Mark 1/6/2010 1 2
9 Kelly 1/3/2010 1 3
13 Nancy 1/7/2010 0 5

Based on the description of your rules, I believe the output will actually be a bit different since 2010-01-05 was the first DateTime where the Value = 1 for Group 2 for Mark.
ID Name DateTime Value Group
3 Mark 2010-01-03 0 1
6 Mark 2010-01-06 1 2
9 Kelly 2010-01-03 1 3
13 Nancy 2010-01-07 0 5
The below code will work as demonstrated in this SQLFiddle.
SELECT sub.ID
, sub.Name
, sub.[DateTime]
, sub.Value
, sub.[Group]
FROM
(SELECT t.ID
, t.Name
, t.[DateTime]
, t.Value
, t.[Group]
, SequentialOrder = ROW_NUMBER() OVER
(PARTITION BY t.Name, t.[Group]
ORDER BY t.[DateTime])
FROM Test t
JOIN
(SELECT Name
, [Group]
, MinimumDateTime = MIN([DateTime])
FROM Test
WHERE Value = 1
GROUP BY Name
, [Group]) mint
ON t.Name = mint.Name
AND t.[Group] = mint.[Group]
WHERE t.[DateTime] > mint.MinimumDateTime) sub
WHERE sub.SequentialOrder = 1
ORDER BY ID;

Below is my query and it goes on assumption that records are received in order of their dates
WITH TBL_1 AS
(
SELECT A.*, ROW_NUMBER() OVER(PARTITION BY NAME, GROUP ORDER BY DATE) AS RN
FROM TABLE
WHERE (NAME, GROUP) IN
(SELECT NAME, GROUP FROM TABLE WHERE VALUE = 1)
),
TBL_2 AS
(
SELECT * FROM TBL_1 WHERE VALUE = 1
),
TBL_3 AS
(
SELECT A.*
FROM TBL_1 AS A
INNER JOIN TBL_2 AS B
ON B.NAME = A.NAME
AND B.GROUP = A.GROUP
AND A.RN > B.RN
)
SELECT *
FROM TBL_3
WHERE (NAME, GROUP, DATE) IN
(SELECT NAME, GROUP, MIN(DATE) FROM TBL_3 GROUP BY NAME, GROUP)

In SQL Server 2012 you can do this:
SELECT * FROM (
SELECT DISTINCT
ID,
Name,
DateTime,
Value,
Gr,
LAG(ID) OVER (PARTITION BY Name, Gr ORDER BY DateTime) F
FROM (
SELECT
ID,
Name,
DateTime,
Value,
Gr,
CASE WHEN LAG(Value) OVER (PARTITION BY Name, Gr ORDER BY DateTime) = 1 THEN 1 ELSE 0 END F
FROM
T
) TT
WHERE F = 1
) TT WHERE F IS NULL
ORDER BY Gr, Name, DateTime
Fiddle: http://www.sqlfiddle.com/#!6/5a0fa2/19

using window functions:
with cte as (
select
*,
row_number() over(partition by [Group], Name order by [DateTime]) as rn,
dense_rank() over(order by [Group], Name) as rnk
from Table1
)
select c1.*
from cte as c1
inner join cte as c2 on c2.rn = c1.rn - 1 and c2.rnk = c1.rnk and c2.Value = 1
where
not exists (select * from cte as c3 where c3.rn <= c1.rn - 2 and c3.rnk = c1.rnk and c3.Value = 1)
or apply:
select t1.*
from Table1 as t1
cross apply (
select top 1 t2.Value, t2.DateTime
from Table1 as t2
where
t2.[Group] = t1.[Group] and t2.Name = t1.Name and
t2.[DateTime] < t1.[DateTime]
order by t2.[Datetime] desc
) as t2
where
t2.Value = 1 and
not exists (
select *
from Table1 as t3
where
t3.[Group] = t1.[Group] and t3.Name = t1.Name and
t3.[DateTime] < t2.[DateTime] and t3.Value = 1
)
sql fiddle demo
update forgot to mention that your output seems to be incorrect - there should id = 6 instead of 5 in second row (see sql fiddle).

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Self join issue - sql

Related

SQL Server : smallest ROW_NUM in where condition, with subgroup pre-condition

SQL - spread previous values from one column into multiple new columns

Concrete Rows of Id 2 with Id 1 IF Date is Same and All Row Names Should be different in SQL Server 2008R2 and

How to SUM Only One Time Per UniqueId in SQL?

SQL MIN Datetime based on first occuranceof a value in another column

Categories

Resources