Related
Following #mdb's answer to apply pagination using SQL SERVER, I find it hard to retrieve distinct records when the main table is joined to other tables for a one-to-many relationship, i.e, A person has many addresses.
Use case, suppose I want to retrieve all persons which has an address in New York given tables #temp_person and #temp_addresses, I would join them on PersonID and OwnerID.
The problem arises when there are multiple addresses for a person, the result set contains duplicate records.
To make it clearer, here's a sample query with data:
Sample Data:
create table #temp_person (
PersonID int not null,
FullName varchar(max) not null
)
create table #temp_addresses(
AddressID int identity not null,
OwnerID int not null,
Address1 varchar(max),
City varchar(max)
)
insert into #temp_person
values
(1, 'Sample One'),
(2, 'Sample Two'),
(3, 'Sample Three')
insert into #temp_addresses (OwnerID, Address1, City)
values
(1, 'Somewhere East Of', 'New York'),
(1, 'Somewhere West Of', 'New York'),
(2, 'blah blah blah', 'Atlantis'),
(2, 'Address2 Of Sample Two', 'New York'),
(2, 'Address3 Of Sample Two', 'Nowhere City'),
(3, 'Address1 Of Sample Three', 'New York'),
(3, 'Address2 Of Sample Three', 'Seattle')
--drop table #temp_addresses, #temp_person
Pagination Query:
SELECT
(
CAST( RowNum as varchar(MAX) )
+ '/'
+ CAST(TotalCount as varchar(MAX))
) as ResultPosition
, PersonID
, FullName
FROM (
SELECT DISTINCT
ROW_NUMBER() OVER(ORDER BY p.FullName ASC) as RowNum
, p.PersonID
, p.FullName
, Count(1) OVER() as TotalCount
FROM #temp_person p
LEFT JOIN #temp_addresses a
ON p.PersonID = a.OwnerID
WHERE City = 'New York'
) as RowConstrainedResult
WHERE RowNum > 0 AND RowNum <= 3
ORDER BY RowNum
Expected Results:
ResultPosition PersonID FullName
1/3 1 Sample One
2/3 2 Sample Two
3/3 3 Sample Three
Actual Results:
ResultPosition PersonID FullName
1/4 1 Sample One
2/4 1 Sample One
3/4 3 Sample Three
As you can see, the inner query is returning multiple records due to the join with #temp_addresses.
Is there a way we could only return unique records by PersonID?
UPDATE:
Actual use case is for an "Advanced Search" functionality where the user can search using different filters, i.e, name, firstname, last name, birthdate, address, etc.. The <WHERE_CLAUSE> and <JOIN_STATEMENTS> in the query are added dynamically so GROUP BY is not applicable here.
Also, please address the "Pagination" scheme for this question. That is, I want to retrieve only N number of results from Start while also retrieving the total count of the results as if they are not paged. i.e, I retrieve only 25 rows out of a total of 500 results.
Just do group by PersonID and no need to use subquery
SELECT
cast(row_number() over (order by (select 1)) as varchar(max)) +'/'+
cast(Count(1) OVER() as varchar(max)) ResultPosition,
p.PersonID,
max(p.FullName) FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON p.PersonID = a.OwnerID
WHERE City = 'New York'
group by p.PersonID
EDIT : I would use CTE for the pagination
;with cte as
(
SELECT
row_number() over(order by (select 1)) rn,
cast(row_number() over (order by (select 1)) as varchar(max)) +'/'+
cast(Count(1) OVER() as varchar(max)) ResultPosition,
p.PersonID,
max(p.FullName) FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON p.PersonID = a.OwnerID
WHERE City = 'New York'
group by p.PersonID
)
select * from cte
where rn > 0 and rn <= 2
Result:
ResultPosition PersonID FullName
1/3 1 Sample One
2/3 2 Sample Two
3/3 3 Sample Three
You need to have distinct rows before using ROW_NUMBER().
If you will filter by City, there are no need to use LEFT JOIN. Use INNER JOIN instead.
select ResultPosition = cast(row_number() over (order by (r.PersonID)) as varchar(max)) +'/'+ cast(Count(r.PersonID) OVER() as varchar(max)), *
from(
SELECT distinct p.PersonID,
p.FullName
FROM #temp_person p
JOIN #temp_addresses a ON
p.PersonID = a.OwnerID
WHERE City = 'New York') r
EDIT:
Considering pagination
declare #page int =1, #rowsPage int = 25
select distinct position, ResultPosition = cast(position as varchar(10)) + '/' + cast(count(*) OVER() as varchar(10)), *
from(
SELECT position = DENSE_RANK () over (order by p.PersonID),
p.PersonID,
p.FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON
p.PersonID = a.OwnerID
WHERE City = 'New York'
) r
where position between #rowsPage*(#page-1)+1 and #rowsPage*#page
Geoman Yabes, Check if this help... Gives results expected in your example and you can have pagination using RowNum:-
SELECT *
FROM
(SELECT ROW_NUMBER() OVER(ORDER BY RowConstrainedResult.PersonId ASC) As RowNum,
Count(1) OVER() As TotalRows,
RowConstrainedResult.PersonId,
RowConstrainedResult.FullName
FROM (
SELECT
RANK() OVER(PARTITION BY p.PersonId ORDER BY a.Address1 ASC) as Ranking
, p.PersonID
, p.FullName
FROM #temp_person p
INNER JOIN #temp_addresses a ON p.PersonID = a.OwnerID WHERE City = 'New York'
) as RowConstrainedResult WHERE Ranking = 1) Filtered
Where RowNum > 0 And RowNum <= 4
Sample Data:
insert into #temp_person
values
(1, 'Sample One'),
(2, 'Sample Two'),
(3, 'Sample Three'),
(4, 'Sample 4'),
(5, 'Sample 5'),
(6, 'Sample 6')
insert into #temp_addresses (OwnerID, Address1, City)
values
(1, 'Somewhere East Of', 'New York'),
(1, 'Somewhere West Of', 'New York'),
(2, 'blah blah blah', 'Atlantis'),
(2, 'Address2 Of Sample Two', 'New York'),
(2, 'Address3 Of Sample Two', 'Nowhere City'),
(3, 'Address1 Of Sample Three', 'New York'),
(3, 'Address2 Of Sample Three', 'Seattle'),
(4, 'Address1 Of Sample 4', 'New York'),
(4, 'Address1 Of Sample 4', 'New York 2'),
(5, 'Address1 Of Sample 5', 'New York'),
(6, 'Address1 Of Sample 6', 'New York')
I was trying to get result with native SQL query as it is presented on below picture, currently im not sure if there is any way to get this result with using only SQL.
I was around this query, but no idea currently further:
SELECT
receipts.client_code clientCode,
date_trunc('MON', receipts.create_date) monthYear,
COUNT(date_trunc('MON', receipts.create_date)) receipts,
subReceipts.total total
FROM receipts
LEFT JOIN (SELECT
receipts.client_code clientCode,
date_trunc('MON', receipts.create_date) monthYear,
COUNT(date_trunc('MON', receipts.create_date)) total
FROM receipts
GROUP BY
receipts.client_code,
date_trunc('MON' ,receipts.create_date)
ORDER BY
date_trunc('MON' ,receipts.create_date)
) subReceipts ON subReceipts.clientCode = receipts.client_code
GROUP BY
receipts.client_code,
date_trunc('MON' ,receipts.create_date),
subReceipts.total
ORDER BY
date_trunc('MON' ,receipts.create_date)
Sample sql data and db table create script:
CREATE TABLE receipts
(
receipt_id int primary key,
client_code varchar not null,
create_date date not null
);
insert into receipts (receipt_id, client_code, create_date) values (1, 'fx90', to_date('2016/01/11', 'yyyy/MM/dd'));
insert into receipts (receipt_id, client_code, create_date) values (2, 'fx90', to_date('2016/02/12', 'yyyy/MM/dd'));
insert into receipts (receipt_id, client_code, create_date) values (3, 'fx90', to_date('2016/02/20', 'yyyy/MM/dd'));
insert into receipts (receipt_id, client_code, create_date) values (4, 'fx90', to_date('2016/03/11', 'yyyy/MM/dd'));
insert into receipts (receipt_id, client_code, create_date) values (5, 'fx90', to_date('2016/03/12', 'yyyy/MM/dd'));
insert into receipts (receipt_id, client_code, create_date) values (6, 'fx90', to_date('2016/03/19', 'yyyy/MM/dd'));
Example result
Assuming mysql, you could just do:
set #running_total := 0;
SELECT
client_code,
CONCAT(MONTH(create_date), ' - ', YEAR(create_date)) as month_year,
COUNT(receipt_id) AS receipts_month,
(#running_total := #running_total + COUNT(receipt_id)) as total_receipts
FROM receipts
GROUP BY client_code, MONTH(create_date), YEAR(create_date)
ORDER BY receipt_id;
For postgresql:
SELECT clientCode, monthYear, receipts,
sum(receipts) over(order by monthYear) as total
FROM (
SELECT receipts.client_code clientCode,
date_trunc('MON', receipts.create_date) monthYear,
COUNT(1) receipts
FROM receipts
GROUP BY receipts.client_code, monthYear
) X
ORDER BY monthYear
I create table Appointments with this structure:
CREATE TABLE Appointments
(
[Id] bigint,
[Name] varchar(250),
[DateInit] date
);
INSERT INTO Appointments ([Id], [Name], [DateInit])
values
(1000, 'Lorena', '03/06/2016'),
(1000, 'Lorena', '01/06/2016'),
(1000, 'Lorena', '08/06/2016'),
(1000, 'Lorena', '10/06/2016'),
(1000, 'Lorena', '02/06/2016'),
(1000, 'Lorena', '20/06/2016'),
(7000, 'Susan', '04/06/2016'),
(7000, 'Susan', '08/06/2016'),
(7000, 'Susan', '09/06/2016'),
(7000, 'Susan', '01/06/2016');
This is the final result:
I need to get the result for the next day and the day before, for example if today is '03/06/2016' I need to get result for the last appointment inserted in the table from today and the next appointment inserted in the table from today, the result I need is something like this:
Name Last Visit Next Visit
----- ---------- -----------
Lorena 2016-06-02 2016-06-08
Susan 2016-06-01 2016-06-04
How can I get this result?
Thanks
Do a GROUP BY, use case expressions to pick max previous appointment, and min future appointment:
select name,
max(case when DateInit < CONVERT(DATE,GETDATE()) then DateInit end) as LastVisit,
min(case when DateInit > CONVERT(DATE,GETDATE()) then DateInit end) as NextVisit
from Appointments
group by name
I'd do this as joins to the previous and next visit, something like this;
SELECT DISTINCT
a.ID
,a.NAME
,l.LastVisit
,n.NextVisit
FROM Appointments a
LEFT JOIN (
SELECT ID
,MIN(DateInit) NextVisit
FROM Appointments
WHERE DateInit > GETDATE()
GROUP BY ID
) n ON a.ID = n.ID
LEFT JOIN (
SELECT ID
,MAX(DateInit) LastVisit
FROM Appointments
WHERE DateInit < GETDATE()
GROUP BY ID
) l ON a.ID = l.ID
DECLARE #Appointments TABLE
(
[Id] bigint,
[Name] varchar(250),
[DateInit] date
);
INSERT INTO #Appointments ([Id], [Name], [DateInit])
values
(1000, 'Lorena','2016/06/03'),
(1000, 'Lorena','2016/06/01'),
(1000, 'Lorena','2016/06/08'),
(1000, 'Lorena','2016/06/10'),
(1000, 'Lorena','2016/06/02'),
(1000, 'Lorena','2016/06/20'),
(7000, 'Susan', '2016/06/04'),
(7000, 'Susan', '2016/06/08'),
(7000, 'Susan', '2016/06/09'),
(7000, 'Susan', '2016/06/01');
DECLARE #Today DATE = GETDATE();
WITH CTE
AS (
SELECT A.NAME
,ROW_NUMBER() OVER (
PARTITION BY ID ORDER BY ID
) RN
,(
SELECT TOP 1 DateInit
FROM #Appointments B
WHERE B.ID = A.ID
AND DateInit < #TODAY
ORDER BY DateInit DESC
) [Last Visit]
,(
SELECT TOP 1 DateInit
FROM #Appointments B
WHERE B.ID = A.ID
AND DateInit > #TODAY
ORDER BY DateInit
) [Next Visit]
FROM #Appointments A
--GROUP BY ID
)
SELECT C.NAME
,C.[Last Visit]
,C.[Next Visit]
,RN
FROM CTE C
WHERE RN = 1
Below, created a (SQL server table variable) schema, inserted dummy data (for testing) and attempted to write a query to display two (replaceable by N) employees from every department who has stayed with the company longest. Can someone please help in improving, correcting and/or optimizing this solution. (feel free to execute the code in SQL management studio as-it-is to run to execute and see results if needed)
--declare employee variable to hold employee data
DECLARE #employee TABLE
(
id int,
name varchar(50),
startdate datetime,
enddate datetime,
departmentid int
)
--declare department variable to hold department data
DECLARE #department TABLE
(
id int,
name varchar(50)
)
--insert dummy department data
INSERT INTO #department VALUES (1, 'IT'),
(2, 'SALES'),
(3, 'HR')
--insert dummy employee data
INSERT INTO #employee VALUES (1, 'mikhail', '01/01/2005', '01/01/2013', 1),
(2, 'david', '01/01/2006', '01/01/2012', 1),
(3, 'andrew', '01/01/2002', null, 1),
(4, 'will', '01/01/2013', null, 1),
(5, 'dave', '01/01/2006', '01/01/2012', 2),
(6, 'mike', '01/01/2002', '01/01/2012', 2),
(7, 'brad', '01/01/2011', null, 2),
(8, 'thomas', '01/01/2002', '01/01/2003', 3),
(9, 'anthony', '01/01/2015', null, 3),
(10, 'vincent', '01/01/2002', null, 3),
(11, 'bobby', '01/01/2002', '01/01/2003', 3);
--declare variable to old intermediate data
DECLARE #hold TABLE (rowid int, ename varchar(50), timew int, dname varchar(50))
-- insert intermediate data
INSERT INTO #hold
SELECT row_number() OVER (PARTITION BY dname ORDER BY timew DESC) rowid, ename, timew, dname
FROM
(
SELECT E.name ename, E.startdate startdate, E.enddate enddate,
CASE
WHEN E.enddate is null then datediff(DAY, E.startdate, getdate())
ELSE datediff(DAY, E.startdate, E.enddate)
END timew, D.name dname
FROM #employee E inner join #department D ON E.departmentid = D.id
) PART
-- final result
SELECT ename, dname FROM #hold WHERE rowid < 3
You can combine some of the logic and make the query smaller but other than that you seem to have a good understanding of how it should be done.
SELECT ename, dname
FROM
(
SELECT e.name ename,
d.name dname,
ROW_NUMBER() OVER (PARTITION BY d.name ORDER BY datediff(DAY, E.startdate, COALESCE(e.enddate, getdate())) desc) rn
FROM #employee e
INNER JOIN #department d ON e.departmentid = d.id
) t
WHERE rn < 3
One thing I might suggest would be to consider using DENSE_RANK instead of ROW_NUMBER if you want to include ties
SELECT ename, dname
FROM
(
SELECT e.name ename,
d.name dname,
DENSE_RANK() OVER (PARTITION BY d.name ORDER BY datediff(DAY, E.startdate, COALESCE(e.enddate, getdate())) desc) rnk
FROM #employee e
INNER JOIN #department d ON e.departmentid = d.id
) t
WHERE rnk < 3
More information on Ranking Functions
I have a table like below, What I need that for any particular fund and up to any particular date logic will sum the amount value. Let say I need the sum for 3 dates as 01/28/2015,03/30/2015 and 04/01/2015. Then logic will check for up to first date how many records are there in table . If it found more than one record then it'll sum the amount value. Then for next date it'll sum up to the next date but from the previous date it had summed up.
Id Fund Date Amount
1 A 01/20/2015 250
2 A 02/28/2015 300
3 A 03/20/2015 400
4 A 03/30/2015 200
5 B 04/01/2015 500
6 B 04/01/2015 600
I want result to be like below
Id Fund Date SumOfAmount
1 A 02/28/2015 550
2 A 03/30/2015 600
3 B 04/01/2015 1100
Based on your question, it seems that you want to select a set of dates, and then for each fund and selected date, get the sum of the fund amounts from the selected date to the previous selected date. Here is the result set I think you should be expecting:
Fund Date SumOfAmount
A 2015-02-28 550.00
A 2015-03-30 600.00
B 2015-04-01 1100.00
Here is the code to produce this output:
DECLARE #Dates TABLE
(
SelectedDate DATE PRIMARY KEY
)
INSERT INTO #Dates
VALUES
('02/28/2015')
,('03/30/2015')
,('04/01/2015')
DECLARE #FundAmounts TABLE
(
Id INT PRIMARY KEY
,Fund VARCHAR(5)
,Date DATE
,Amount MONEY
);
INSERT INTO #FundAmounts
VALUES
(1, 'A', '01/20/2015', 250)
,(2, 'A', '02/28/2015', 300)
,(3, 'A', '03/20/2015', 400)
,(4, 'A', '03/30/2015', 200)
,(5, 'B', '04/01/2015', 500)
,(6, 'B', '04/01/2015', 600);
SELECT
F.Fund
,D.SelectedDate AS Date
,SUM(F.Amount) AS SumOfAmount
FROM
(
SELECT
SelectedDate
,LAG(SelectedDate,1,'1/1/1900') OVER (ORDER BY SelectedDate ASC) AS PreviousDate
FROM #Dates
) D
JOIN
#FundAmounts F
ON
F.Date BETWEEN DATEADD(DAY,1,D.PreviousDate) AND D.SelectedDate
GROUP BY
D.SelectedDate
,F.Fund
EDIT: Here is alternative to the LAG function for this example:
FROM
(
SELECT
SelectedDate
,ISNULL((SELECT TOP 1 SelectedDate FROM #Dates WHERE SelectedDate < Dates.SelectedDate ORDER BY SelectedDate DESC),'1/1/1900') AS PreviousDate
FROM #Dates Dates
) D
If i change your incorrect sample data to ...
CREATE TABLE TableName
([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
;
INSERT INTO TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '2015-01-28 00:00:00', 250),
(2, 'A', '2015-01-28 00:00:00', 300),
(3, 'A', '2015-03-30 00:00:00', 400),
(4, 'A', '2015-03-30 00:00:00', 200),
(5, 'B', '2015-04-01 00:00:00', 500),
(6, 'B', '2015-04-01 00:00:00', 600)
;
this query using GROUP BY works:
SELECT MIN(Id) AS Id,
MIN(Fund) AS Fund,
[Date],
SUM(Amount) AS SumOfAmount
FROM dbo.TableName t
WHERE [Date] IN ('01/28/2015','03/30/2015','04/01/2015')
GROUP BY [Date]
Demo
Initially i have used Row_number and month function to pick max date of every month and in 2nd cte i did sum of amounts and joined them..may be this result set matches your out put
declare #t table (Id int,Fund Varchar(1),Dated date,amount int)
insert into #t (id,Fund,dated,amount) values (1,'A','01/20/2015',250),
(2,'A','01/28/2015',300),
(3,'A','03/20/2015',400),
(4,'A','03/30/2015',200),
(5,'B','04/01/2015',600),
(6,'B','04/01/2015',500)
;with cte as (
select ID,Fund,Amount,Dated,ROW_NUMBER() OVER
(PARTITION BY DATEDIFF(MONTH, '20000101', dated)ORDER BY dated desc)AS RN from #t
group by ID,Fund,DATED,Amount
),
CTE2 AS
(select SUM(amount)Amt from #t
GROUP BY MONTH(dated))
,CTE3 AS
(Select Amt,ROW_NUMBER()OVER (ORDER BY amt)R from cte2)
,CTE4 AS
(
Select DISTINCT C.ID As ID,
C.Fund As Fund,
C.Dated As Dated
,ROW_NUMBER()OVER (PARTITION BY RN ORDER BY (SELECT NULL))R
from cte C INNER JOIN CTE3 CC ON c.RN = CC.R
Where C.RN = 1
GROUP BY C.ID,C.Fund,C.RN,C.Dated )
select C.R,C.Fund,C.Dated,cc.Amt from CTE4 C INNER JOIN CTE3 CC
ON c.R = cc.R
declare #TableName table([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
declare #Sample table([SampleDate] datetime)
INSERT INTO #TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '20150120 00:00:00', 250),
(2, 'A', '20150128 00:00:00', 300),
(3, 'A', '20150320 00:00:00', 400),
(4, 'A', '20150330 00:00:00', 200),
(5, 'B', '20150401 00:00:00', 500),
(6, 'B', '20150401 00:00:00', 600)
INSERT INTO #Sample ([SampleDate])
values ('20150128 00:00:00'), ('20150330 00:00:00'), ('20150401 00:00:00')
-- select * from #TableName
-- select * from #Sample
;WITH groups AS (
SELECT [Fund], [Date], [AMOUNT], MIN([SampleDate]) [SampleDate] FROM #TableName
JOIN #Sample ON [Date] <= [SampleDate]
GROUP BY [Fund], [Date], [AMOUNT])
SELECT [Fund], [SampleDate], SUM([AMOUNT]) FROM groups
GROUP BY [Fund], [SampleDate]
Explanation:
The CTE groups finds the earliest SampleDate which is later than (or equals to) your
data's date and enriches your data accordingly, thus giving them the group to be summed up in.
After that, you can group on the derived date.