Lead and partition function - sql

This is my input table
create table #table1 (id int, FN varchar(20), startdate varchar(20), id1 varchar)
insert #table1
select 1, 'Joe', '2019-01-01', 'A'
union select 1, 'Joe', '2019-01-01', 'B'
union select 1, 'Joe', '2019-01-05', 'C'
union select 1, 'Joe', '2019-01-05', 'D'
union select 1, 'Joe', '2019-01-06', 'E'
union select 2, 'john', '2019-01-05', 'F'
union select 2, 'john', '2019-01-06', 'G'
union select 2, 'john', '2019-01-06', 'H'
union select 2, 'john', '2019-01-07', 'I'
I tried the following code
select *
, dense_rank() OVER (partition by id, fn order by startdate)
, lead(startdate,1) OVER (partition by id, fn order by startdate)
from #table1
order by id
But I require the following output:

I know that there might be a better approach but a least this is a working solution:
select *,
(select MIN(startdate)
from #table1 t1
where t1.id = #table1.id and
t1.fn = #table1.fn and
t1.startdate > #table1.startdate) enddate
from #table1
Result

Related

Find duplicate sets of data grouped by foreign key

How to check if the above table contains duplicate group of rows based on id. For ex here first two rows of id 1 are matching with the next two rows of id 2 but id 2 also have the third row which is not matching with any two rows of id 1 so it's not duplicate and there could be n no of ids.
I tried it to do with the group by and string_agg but it didn't work.
Here what I tried:
declare #t2 Table( m1 int, m2 int,n varchar(50),n2 varchar(50), id int)
insert into #t2 values(3,1,'c','',1),(2,1,'s','o',1),(2,1,'s','o',2),(3,1,'c','',2),(3,1,'f','',2)
if exists( SELECT *
FROM #t2
GROUP BY m1,m2,n,n2
HAVING COUNT(*) > 1)
begin
select 'Same.'
end
else
begin
select 'not found'
end
Any help here will be great.
Thanks
Thanks Iptr As per your solution in comment I am posting the same here:
declare #t2 table(m1 int, m2 int, n varchar(5), n2 varchar(5), id int);
insert into #t2(m1, m2, n, n2, id)
values
(3, 1, 'c', '', 1),
(2, 1, 's', 'o', 1),
(2, 1, 's', 'o', 2),
(3, 1, 'c', '', 2),
(3, 1, 'f', '', 2),
(3, 1, 'c', '', 4),
(2, 1, 's', 'o', 4),
(3, 1, 'c', '', 10),
(2, 1, 's', 'o', 10),
(3, 1, 'c', '', 5);
--if exists(select a.id from(.. having count(*) = a.idcnt)
select a.id, b.id
from
(
select *, count(*) over (partition by id) as idcnt
from #t2
) as a
join
(
select *, count(*) over (partition by id) as idcnt
from #t2
) as b on a.id </*>*/ b.id and a.m1 = b.m1 and a.m2 = b.m2 and a.n = b.n and a.n2 = b.n2 and a.idcnt = b.idcnt
group by a.id, b.id, a.idcnt
having count(*) = a.idcnt;
--if exists(select j.j from (.. having count(*) > 1;)
select string_agg(i.id, ',')
from
(
select distinct id
from #t2
) as i
cross apply
(
select r.m1, r.m2, r.n, r.n2
from #t2 as r
where r.id = i.id
order by r.m1, r.m2, r.n, r.n2
for json path
) as j(j)
group by j.j
having count(*) > 1;
You can count how many different ids for each set of rows. If the count is more than one, then there are duplicates. For example:
select m1, m2, n, n2, count(distinct id) as cnt
from t
group by m1, m2, n, n2
having count(distinct id) > 1

How to use ROW_NUMBER when grouping records?

I have the following:
DECLARE #items TABLE
(
ItemId int NOT NULL,
[Description] varchar(255) NOT NULL,
Amount money NOT NULL
);
INSERT INTO #items SELECT 1, 'A', 10;
INSERT INTO #items SELECT 2, 'A', 10;
INSERT INTO #items SELECT 3, 'B', 11;
INSERT INTO #items SELECT 4, 'B', 11;
INSERT INTO #items SELECT 5, 'B', 11;
INSERT INTO #items SELECT 6, 'C', 12;
INSERT INTO #items SELECT 7, 'C', 12;
INSERT INTO #items SELECT 8, 'A', 10;
INSERT INTO #items SELECT 9, 'A', 10;
SELECT
ROW_NUMBER() OVER(PARTITION BY b.ItemId ORDER BY b.[Description]),
[Description],
COUNT(ItemId) OVER(PARTITION BY b.ItemId),
SUM(Amount) OVER(PARTITION BY b.ItemId)
FROM #items b
The result should be:
1, A, 4, 40
2, B, 3, 33
3, C, 2, 24
However the items are not being grouped.
So how to I need to use ROW_NUMBER to group records?
Is this what you want?
SELECT ROW_NUMBER() OVER (ORDER BY i.Description),
i.Description,
COUNT(*),
SUM(i.Amount)
FROM #items i
GROUP BY Description
ORDER BY Description;
Here is a rextester.
If you don't want use GROUP BY by itself you may do a subquery with two row_number(), something like this:
select ROW_NUMBER() over(order by t.[Description]), t.Description, t.cnt, t.summ
from (
SELECT
ROW_NUMBER() OVER(PARTITION BY b.[Description] ORDER BY b.[Description] ) rn,
[Description],
COUNT(ItemId) OVER(PARTITION BY b.[Description]) cnt,
SUM(Amount) OVER(PARTITION BY b.[Description]) summ
FROM #items b
) t where rn = 1
And anyway you shouldn't group data by the ItemId - it's a wrong way to achieve your aim

SQL Server 2016 GROUP BY + HAVING only get different records by JOIN

We have two tables, one for users and one for records. Records have a reference number and link to a user by a FK. We want to get reference numbers from the records table where the same reference number was used for different users. Two users are the same user if they have the same last name and same date of birth. Two users are different users if either last name or DOB are different.
Sample data:
DECLARE #user TABLE (id INT, fname VARCHAR, lname VARCHAR, dob VARCHAR(3))
DECLARE #record TABLE (id INT, refno VARCHAR, fk INT)
--first two are the same people because same firstname + lastname + date of birth
INSERT INTO #user SELECT 1, 'a', 'a', 'jan'
INSERT INTO #user SELECT 2, 'a', 'a', 'jan'
--next two are the same people because same firstname + lastname + date of birth
INSERT INTO #user SELECT 3, 'b', 'b', 'feb'
INSERT INTO #user SELECT 4, 'b', 'b', 'feb'
--next two are different people because same firstname + lastname but different date of birth
INSERT INTO #user SELECT 5, 'c', 'c', 'mar'
INSERT INTO #user SELECT 6, 'c', 'c', 'apr'
--next two are different people because same firstname + date of birth but but different surname
INSERT INTO #user SELECT 7, 'd', 'e', 'may'
INSERT INTO #user SELECT 8, 'd', 'f', 'may'
--next two are different people because same firstname + date of birth but but different surname
INSERT INTO #user SELECT 9, 'd', 'e', 'may'
INSERT INTO #user SELECT 10, 'd', 'f', 'may'
--with simplified ref nos
INSERT INTO #record SELECT 10, '1', 1
INSERT INTO #record SELECT 11, '1', 2
INSERT INTO #record SELECT 12, '2', 3
INSERT INTO #record SELECT 13, '2', 4
INSERT INTO #record SELECT 14, '3', 5
INSERT INTO #record SELECT 15, '3', 6
INSERT INTO #record SELECT 16, '3', 7
INSERT INTO #record SELECT 17, '3', 8
INSERT INTO #record SELECT 18, 'a', 9
INSERT INTO #record SELECT 19, 'a', 10
What I've tried:
SELECT r.refno
, count(1) cnt
FROM #record r
INNER JOIN #user u ON u.id = r.fk
GROUP BY u.lname
, r.refno
, u.dob
HAVING COUNT(1) > 1
Which returns:
refno cnt
1 2
2 2
but the expected output is:
refno cnt
3 4
a 2
Thanks.
Just fix your SQL Query with below :
SELECT A.refno,
SUM(cnt) cnt
FROM
(
SELECT r.refno,
COUNT(*) cnt
FROM #record r
INNER JOIN #user u ON u.id = r.fk
GROUP BY u.lname,
r.refno,
u.dob
) A
WHERE A.cnt = 1
GROUP BY A.refno;
Result :
refno cnt
3 4
a 2
You could try this:
SELECT refno
, COUNT(1) AS cnt
FROM (
SELECT DISTINCT u.lname
, r.refno
, u.dob
FROM #record r
INNER JOIN #user u ON u.id = r.fk
) AS t
GROUP BY refno
HAVING COUNT(1) > 1;

SQL Query Using AND OR

I have two tables: Customers and Orders.
I need to know which Customers have ordered "Bread" AND (Milk or Ice Cream) in a given month.
How should I write the SQL query?
Thanks.
As others have said in the comments, you should really try to improve your question so that people are more inclined to answer and help you.
Setting up some test data (making assumptions about your schema):
CREATE TABLE Customers (
CustomerID INT,
CustomerName VARCHAR(50)
)
CREATE TABLE Orders (
OrderId INT,
OrderDate DATETIME,
CustomerID INT,
Product VARCHAR(50)
)
INSERT INTO Customers
SELECT 1, 'John Smith'
UNION
SELECT 2, 'Jane Doe'
UNION
SELECT 3, 'Rachel Booth'
UNION
SELECT 4, 'Steve Barnes'
UNION
SELECT 5, 'Nancy Green'
UNION
SELECT 6, 'Phil Jones'
INSERT INTO Orders
SELECT 1, '2016-09-01', 1, 'Bread'
UNION
SELECT 2, '2016-09-10', 3, 'Milk'
UNION
SELECT 3, '2016-09-19', 1, 'Milk'
UNION
SELECT 4, '2016-10-07', 4, 'Bread'
UNION
SELECT 5, '2016-10-13', 2, 'Milk'
UNION
SELECT 6, '2016-10-15', 4, 'Milk'
UNION
SELECT 7, '2016-10-19', 6, 'Milk'
UNION
SELECT 8, '2016-10-21', 6, 'Ice Cream'
UNION
SELECT 9, '2016-10-27', 1, 'Milk'
UNION
SELECT 10, '2016-10-28', 4, 'Milk'
UNION
SELECT 11, '2016-11-06', 5, 'Ice Cream'
UNION
SELECT 12, '2016-11-09', 5, 'Bread'
And setting the desired month to check:
DECLARE #Month DATETIME
SET #Month = '2016-09-01'
I would get a list of Customers fitting your logic like this:
SELECT DISTINCT c.CustomerName
FROM Customers c
JOIN Orders ob ON ob.CustomerID = c.CustomerID
JOIN Orders omi ON omi.CustomerID = c.CustomerID
WHERE ob.Product = 'Bread'
AND omi.Product IN ('Milk','Ice Cream')
AND DATEADD(month, DATEDIFF(month, 0, ob.OrderDate), 0) = DATEADD(month, DATEDIFF(month, 0, omi.OrderDate), 0)
AND DATEADD(month, DATEDIFF(month, 0, ob.OrderDate), 0) = #Month

Query multiple records of a group that has max date....sorta

Example code:
DECLARE #TABLE TABLE (ID int, Name varchar(50), Date Datetime2)
INSERT INTO #TABLE (ID, Name, Date)
SELECT 1, 'abc', GETDATE()
UNION ALL
SELECT 1, 'def', GETDATE()
UNION ALL
SELECT 1, 'hij', GETDATE()
UNION ALL
SELECT 1, 'abc', GETDATE()-2
UNION ALL
SELECT 1, 'def', GETDATE()-2
UNION ALL
SELECT 1, 'hij', GETDATE()-2
UNION ALL
SELECT 1, 'abc', GETDATE()-4
UNION ALL
SELECT 1, 'def', GETDATE()-4
UNION ALL
SELECT 1, 'hij', GETDATE()-4
UNION ALL
SELECT 2, 'abc', GETDATE()-1
UNION ALL
SELECT 2, 'def', GETDATE()-1
UNION ALL
SELECT 2, 'hij', GETDATE()-1
UNION ALL
SELECT 2, 'abc', GETDATE()-3
UNION ALL
SELECT 2, 'def', GETDATE()-3
UNION ALL
SELECT 2, 'hij', GETDATE()-3
UNION ALL
SELECT 2, 'abc', GETDATE()-4
UNION ALL
SELECT 2, 'def', GETDATE()-4
UNION ALL
SELECT 2, 'hij', GETDATE()-4
UNION ALL
SELECT 3, 'abc', GETDATE()+2
UNION ALL
SELECT 3, 'def', GETDATE()+2
UNION ALL
SELECT 3, 'hij', GETDATE()+2
UNION ALL
SELECT 3, 'abc', GETDATE()-4
UNION ALL
SELECT 3, 'def', GETDATE()-4
UNION ALL
SELECT 3, 'hij', GETDATE()-4
UNION ALL
SELECT 3, 'abc', GETDATE()-5
UNION ALL
SELECT 3, 'def', GETDATE()-5
UNION ALL
SELECT 3, 'hij', GETDATE()-5
UNION ALL
SELECT 4, 'abc', GETDATE()+1
UNION ALL
SELECT 4, 'def', GETDATE()+1
UNION ALL
SELECT 4, 'hij', GETDATE()+1
UNION ALL
SELECT 4, 'abc', GETDATE()-4
UNION ALL
SELECT 4, 'def', GETDATE()-4
UNION ALL
SELECT 4, 'hij', GETDATE()-4
UNION ALL
SELECT 4, 'abc', GETDATE()-5
UNION ALL
SELECT 4, 'def', GETDATE()-5
UNION ALL
SELECT 4, 'hij', GETDATE()-5
SELECT * FROM #TABLE
The data output that I want reflects output of the following records
3 abc 2013-09-18
3 def 2013-09-18
3 hij 2013-09-18
Description: I would like to query the data for the ID/Name that was processed most recently (Max(Date)) (there can be multiple ID's if they were processed at the same date/time)
My try...
SELECT DISTINCT A.*, B.My_Rank
FROM #TABLE A
INNER JOIN
(
SELECT DISTINCT ID, Name, CONVERT(varchar(8), Date, 101) AS Date, RANK() OVER (PARTITION BY ID, Name ORDER BY CONVERT(varchar(8), Date, 101) DESC) My_Rank, MAX(CONVERT(varchar(8), Date, 101)) OVER (partition by ID, Name) Max_Date
FROM #TABLE
GROUP BY ID, Name, CONVERT(varchar(8), Date, 101)
) B
ON A.ID = B.ID AND CONVERT(varchar(8), A.Date, 101) = B.Max_Date
WHERE My_Rank = 1
Clearly, this logic is not working out. "I want to get records for ID/Name/Date based on latest processed ID/Name.
Thank you
You were close, but you don't have to join back (and you probably don't want the PARTITION BY id:
SELECT *
FROM
(
SELECT ID, Name, Date,
RANK() OVER (ORDER BY CAST(Date AS DATE) DESC) My_Rank
FROM #TABLE
) dt
WHERE My_rank = 1
AFAIK SS2008 supports CAST(x AS DATE)
You want to use max() with the partition by clause:
SELECT *
FROM (select a.*,
max("date") over (partition by id) as maxdate
from #TABLE A
) a
where "date" = maxdate;
This can return multiple rows. If you only one one, even when there are ties, then use row_number():
SELECT *
FROM (select a.*,
row_number() over (partition by id order by "date" desc) as seqnum
from #TABLE A
) a
where seqnum = 1;