Select 1 Distinct Record per month by latest date

Select 1 Distinct Record per month by latest date - sql

I have a list of data that looks like this:
Name Date Weight
Person 1 01/01/2014 89KG
Person 2 01/01/2014 62KG
Person 1 07/01/2014 88KG
Person 2 07/01/2014 62KG
Person 1 21/01/2014 85KG
Person 2 21/01/2014 63KG
What I would like to do is select only the records with a distinct name and are the latest dates in a given month. So for this example I would like to only select the person 1 and person 2 records for 21/01/2014 (as this is the latest date). I'm using SQL 2008.

Please see if this works for you.
Sample Data:
IF OBJECT_ID(N'tempdb..#TEMP') > 0
BEGIN
DROP TABLE #TEMP
END
CREATE TABLE #TEMP(Name VARCHAR(20),
WDate VARCHAR(20),
Weight VARCHAR(20))
INSERT INTO #TEMP
VALUES
('Person 1', '01/01/2014', '89KG'),
('Person 2', '01/01/2014', '62KG'),
('Person 1', '07/01/2014', '88KG'),
('Person 1', '07/01/2014', '88KG'),
('Person 2', '07/02/2014', '62KG'),
('Person 1', '21/01/2014', '85KG'),
('Person 2', '21/01/2014', '63KG');
Script:
;WITH cte_DateFormat
AS (
SELECT Name,
CONVERT(DATE, WDate, 103) AS WDate,
Weight
FROM #TEMP
)
, cte_Rank
AS (
SELECT ROW_NUMBER() OVER (PARTITION BY Name,
CAST(YEAR(WDate) AS VARCHAR(4)) + CAST(MONTH(WDate) AS VARCHAR(2)) ORDER BY WDate DESC) AS ID,
Name,
WDate,
Weight
FROM cte_DateFormat
)
SELECT Name,
WDate,
Weight
FROM cte_Rank
WHERE ID = 1
Cleanup Script:
IF OBJECT_ID(N'tempdb..#TEMP') > 0
BEGIN
DROP TABLE #TEMP
END

Please try using DENSE_RANK:
select
*
From (
select
*,
DENSE_RANK() over(PARTITION BY YEAR([Date]),
MONTH([Date])
ORDER BY [Date] desc) Rnk
From tbl
)x where Rnk=1

This will also work, using row partitioning:
SELECT Name, Date, Weight
FROM ( SELECT
Name,
Date,
Weight,
ROW_NUMBER() OVER ( PARTITION BY Name, MONTH(Date)
ORDER BY Date DESC) AS [RowNum]
FROM [YourTableHere]
) Tbl
WHERE Tbl.RowNum = 1
ORDER BY MONTH(Date), Name
Test Script:
DECLARE #Table TABLE (Name VARCHAR(20), Date Date, Weight VARCHAR(20))
INSERT INTO #Table (Name, Date, Weight)
VALUES ('Person 1', '1/1/2014', '89KG'),
('Person 2', '1/1/2014', '62KG'),
('Person 1', '1/7/2014', '88KG'),
('Person 2', '1/7/2014', '62KG'),
('Person 1', '1/21/2014', '85KG'),
('Person 2', '1/21/2014', '63KG'),
('Person 1', '2/1/2014', '84KG'),
('Person 2', '2/1/2014', '61KG'),
('Person 1', '2/11/2014', '83KG'),
('Person 2', '2/11/2014', '60KG')
SELECT Name, Date, Weight
FROM ( SELECT
Name,
Date,
Weight,
ROW_NUMBER() OVER ( PARTITION BY Name, MONTH(Date)
ORDER BY Date DESC) AS [RowNum]
FROM #Table
) Tbl
WHERE Tbl.RowNum = 1
ORDER BY MONTH(Date), Name

Related

how to select all records which exists in last 6 months?

Can someone suggest me how to select all the records which exists in the last N month?
N would be parameter.
I've written below sub-query to return the desired result. but this is not dynamic since i can not pass N number of month to select last n month records.
SELECT DISTINCT supplier_code
FROM API_StockAndSaleHeader
WHERE supplier_code IN
(SELECT supplier_code
FROM API_StockAndSaleHeader
WHERE dbo.ConvertStringToDate(period_start_date) = '2020-03-01')
AND supplier_code IN
(SELECT supplier_code
FROM API_StockAndSaleHeader
WHERE dbo.ConvertStringToDate(period_start_date) = '2020-02-01')
AND supplier_code IN
(SELECT supplier_code
FROM API_StockAndSaleHeader
WHERE dbo.ConvertStringToDate(period_start_date) = '2020-01-01')
AND supplier_code IN
(SELECT supplier_code
FROM API_StockAndSaleHeader
WHERE dbo.ConvertStringToDate(period_start_date) = '2019-12-01')
AND supplier_code IN
(SELECT supplier_code
FROM API_StockAndSaleHeader
WHERE dbo.ConvertStringToDate(period_start_date) = '2019-11-01')

This will get you the suppliers which have at least one entry for each of the last N months:
declare #N as INT = 6;
WITH CTE AS
(
SELECT DISTINCT supplier_code, CONVERT(varchar(6), CONVERT(date, period_start_date), 112) start_month
FROM API_StockAndSaleHeader
WHERE CONVERT(date, period_start_date) >= DATEADD(month, -#N, CONVERT(date, GETDATE()))
)
SELECT supplier_code
FROM CTE
GROUP BY supplier_code
HAVING COUNT(*) >= #N;
This first gets an auxiliary set of distinct supplier_codes + months, in order to know which suppliers had at least one record in the last 6 months. The trick is to convert the date to varchar and trim it to 6 characters, getting a yyyymm format. Then you just need those with at least N records, which will mean all N months have data. I used >= just to play safe, it depends on how you are dividing months. Every 30/31? 30 days counting from today?
Note I'm using CONVERT to convert to a date instead. BTW, storing dates as other types is a bad thing.
With this data, it will display just Supplier 1:
select 'Supplier 1' supplier_code, '10/10/2019' period_start_date into API_StockAndSaleHeader
union all
select 'Supplier 1', '11/11/2019'
union all
select 'Supplier 1', '12/12/2019'
union all
select 'Supplier 1', '01/01/2020'
union all
select 'Supplier 1', '02/01/2020'
union all
select 'Supplier 1', '03/01/2020'
union all
select 'Supplier 1', '03/15/2020'
union all
select 'Supplier 1', '04/01/2020'
union all
select 'Supplier 1', '05/01/2020'
union all
select 'Supplier 2', '02/01/2020'
union all
select 'Supplier 2', '03/01/2020'
union all
select 'Supplier 2', '03/15/2020'
union all
select 'Supplier 2', '03/22/2020'
union all
select 'Supplier 2', '03/26/2020'
union all
select 'Supplier 2', '04/01/2020'
union all
select 'Supplier 2', '05/01/2020'
Note Supplier 2 has more than 6 entries in the last month, but has no entries for each of the last N months.

CREATE TABLE #API_StockAndSaleHeader
(
supplier_code int,
period_start_date DATETIME
)
INSERT INTO #API_StockAndSaleHeader (supplier_code, period_start_date) VALUES
(1, '2020-05-01'),
(1, '2020-04-01'),
(1, '2020-04-15'), -- This one should not show up.
(2, '2020-05-01'),
(2, '2020-04-01'),
(2, '2020-03-01'), -- This one should show up.
(2, '2020-02-01'),
(2, '2020-01-01')
DECLARE #months int = 3
SELECT supplier_code
FROM
(
SELECT supplier_code, DATEPART(year, period_start_date) as [Year], DATEPART(month, period_start_date) as [month]--DISTINCT supplier_code
FROM #API_StockAndSaleHeader
WHERE DATEADD(MONTH, #months, period_start_date) >= GETDATE()
GROUP BY supplier_code, DATEPART(year, period_start_date), DATEPART(month, period_start_date)
) A
GROUP BY supplier_code
HAVING COUNT(supplier_code) >= #months
IF(OBJECT_ID('tempdb..#API_StockAndSaleHeader') IS NOT NULL)
BEGIN
DROP TABLE #Temp
END

How to get columns from different unrelated tables having similar column names?

I have 3 tables:
Pay Group:
PayGroupId Name Description Code
1 US Weekly US Weekly USW
2 Can Weekly Canada Weekly CANW
3 US Monthly US Monthly USM
4 Can Monthly Can Monthly CANM
Pay Type:
PayTypeId Name Description Code
1 Hourly Hourly H
2 Salary Salaried S
Pay Code:
PayCodeId Name Description Code
1 Regular Regular REG
2 PTO PTO PTO
3 Sick Sick SICK
I need a report in following format:
PayGroup PayType PayCode
US Weekly Hourly Regular
Can Weekly Salary PTO
US Monthly Sick
Can we do this?

I suspect this gets you the result you are after, but seems like an odd requirement:
WITH PG AS(
SELECT [Name],
ROW_NUMBER() OVER (ORDER BY PayGroupID ASC) AS RN
FROM PayGroup),
PT AS(
SELECT [Name],
ROW_NUMBER() OVER (ORDER BY PayTypeID ASC) AS RN
FROM PayGroup),
PC AS(
SELECT [Name],
ROW_NUMBER() OVER (ORDER BY PayCodeID ASC) AS RN
FROM PayCode)
SELECT PG.[Name] AS PayGroup,
PT.[Name] AS PayType,
PC.[Name] AS PayCode
FROM PG
FULL OUTER JOIN PT ON PG.RN = PT.RN
FULL OUTER JOIN PC ON PG.RN = PC.RN
OR PT.RN = PC.RN;

CREATE TABLE #table1
([PayGroupId] int, [Name] varchar(11), [Description] varchar(13), [Code] varchar(4))
;
INSERT INTO #table1
([PayGroupId], [Name], [Description], [Code])
VALUES
(1, 'US Weekly', 'US Weekly', 'USW'),
(2, 'Can Weekly', 'Canada Weekly', 'CANW'),
(3, 'US Monthly', 'US Monthly', 'USM'),
(4, 'Can Monthly', 'Can Monthly', 'CANM')
;
CREATE TABLE #table2
([PayTypeId] int, [Name] varchar(6), [Description] varchar(8), [Code] varchar(1))
;
INSERT INTO #table2
([PayTypeId], [Name], [Description], [Code])
VALUES
(1, 'Hourly', 'Hourly', 'H'),
(2, 'Salary', 'Salaried', 'S')
;
CREATE TABLE #table3
([PayCodeId] int, [Name] varchar(7), [Description] varchar(7), [Code] varchar(4))
;
INSERT INTO #table3
([PayCodeId], [Name], [Description], [Code])
VALUES
(1, 'Regular', 'Regular', 'REG'),
(2, 'PTO', 'PTO', 'PTO'),
(3, 'Sick', 'Sick', 'SICK')
;
select a.name PayGroup ,isnull(B.Name,'') PayType ,isnull(C.Name,'')PayCode
from #table1 A left join #table2 B on a.[PayGroupId]=b.[PayTypeId]left join
#table3 c on c.[PayCodeId]=a.[PayGroupId]
PayGroup PayType PayCode
US Weekly Hourly Regular
Can Weekly Salary PTO
US Monthly Sick
Can Monthly

Getting Paged Distinct Records Using SQL (Not Duplicate)

Following #mdb's answer to apply pagination using SQL SERVER, I find it hard to retrieve distinct records when the main table is joined to other tables for a one-to-many relationship, i.e, A person has many addresses.
Use case, suppose I want to retrieve all persons which has an address in New York given tables #temp_person and #temp_addresses, I would join them on PersonID and OwnerID.
The problem arises when there are multiple addresses for a person, the result set contains duplicate records.
To make it clearer, here's a sample query with data:
Sample Data:
create table #temp_person (
PersonID int not null,
FullName varchar(max) not null
)
create table #temp_addresses(
AddressID int identity not null,
OwnerID int not null,
Address1 varchar(max),
City varchar(max)
)
insert into #temp_person
values
(1, 'Sample One'),
(2, 'Sample Two'),
(3, 'Sample Three')
insert into #temp_addresses (OwnerID, Address1, City)
values
(1, 'Somewhere East Of', 'New York'),
(1, 'Somewhere West Of', 'New York'),
(2, 'blah blah blah', 'Atlantis'),
(2, 'Address2 Of Sample Two', 'New York'),
(2, 'Address3 Of Sample Two', 'Nowhere City'),
(3, 'Address1 Of Sample Three', 'New York'),
(3, 'Address2 Of Sample Three', 'Seattle')
--drop table #temp_addresses, #temp_person
Pagination Query:
SELECT
(
CAST( RowNum as varchar(MAX) )
+ '/'
+ CAST(TotalCount as varchar(MAX))
) as ResultPosition
, PersonID
, FullName
FROM (
SELECT DISTINCT
ROW_NUMBER() OVER(ORDER BY p.FullName ASC) as RowNum
, p.PersonID
, p.FullName
, Count(1) OVER() as TotalCount
FROM #temp_person p
LEFT JOIN #temp_addresses a
ON p.PersonID = a.OwnerID
WHERE City = 'New York'
) as RowConstrainedResult
WHERE RowNum > 0 AND RowNum <= 3
ORDER BY RowNum
Expected Results:
ResultPosition PersonID FullName
1/3 1 Sample One
2/3 2 Sample Two
3/3 3 Sample Three
Actual Results:
ResultPosition PersonID FullName
1/4 1 Sample One
2/4 1 Sample One
3/4 3 Sample Three
As you can see, the inner query is returning multiple records due to the join with #temp_addresses.
Is there a way we could only return unique records by PersonID?
UPDATE:
Actual use case is for an "Advanced Search" functionality where the user can search using different filters, i.e, name, firstname, last name, birthdate, address, etc.. The <WHERE_CLAUSE> and <JOIN_STATEMENTS> in the query are added dynamically so GROUP BY is not applicable here.
Also, please address the "Pagination" scheme for this question. That is, I want to retrieve only N number of results from Start while also retrieving the total count of the results as if they are not paged. i.e, I retrieve only 25 rows out of a total of 500 results.

Just do group by PersonID and no need to use subquery
SELECT
cast(row_number() over (order by (select 1)) as varchar(max)) +'/'+
cast(Count(1) OVER() as varchar(max)) ResultPosition,
p.PersonID,
max(p.FullName) FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON p.PersonID = a.OwnerID
WHERE City = 'New York'
group by p.PersonID
EDIT : I would use CTE for the pagination
;with cte as
(
SELECT
row_number() over(order by (select 1)) rn,
cast(row_number() over (order by (select 1)) as varchar(max)) +'/'+
cast(Count(1) OVER() as varchar(max)) ResultPosition,
p.PersonID,
max(p.FullName) FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON p.PersonID = a.OwnerID
WHERE City = 'New York'
group by p.PersonID
)
select * from cte
where rn > 0 and rn <= 2
Result:
ResultPosition PersonID FullName
1/3 1 Sample One
2/3 2 Sample Two
3/3 3 Sample Three

You need to have distinct rows before using ROW_NUMBER().
If you will filter by City, there are no need to use LEFT JOIN. Use INNER JOIN instead.
select ResultPosition = cast(row_number() over (order by (r.PersonID)) as varchar(max)) +'/'+ cast(Count(r.PersonID) OVER() as varchar(max)), *
from(
SELECT distinct p.PersonID,
p.FullName
FROM #temp_person p
JOIN #temp_addresses a ON
p.PersonID = a.OwnerID
WHERE City = 'New York') r
EDIT:
Considering pagination
declare #page int =1, #rowsPage int = 25
select distinct position, ResultPosition = cast(position as varchar(10)) + '/' + cast(count(*) OVER() as varchar(10)), *
from(
SELECT position = DENSE_RANK () over (order by p.PersonID),
p.PersonID,
p.FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON
p.PersonID = a.OwnerID
WHERE City = 'New York'
) r
where position between #rowsPage*(#page-1)+1 and #rowsPage*#page

Geoman Yabes, Check if this help... Gives results expected in your example and you can have pagination using RowNum:-
SELECT *
FROM
(SELECT ROW_NUMBER() OVER(ORDER BY RowConstrainedResult.PersonId ASC) As RowNum,
Count(1) OVER() As TotalRows,
RowConstrainedResult.PersonId,
RowConstrainedResult.FullName
FROM (
SELECT
RANK() OVER(PARTITION BY p.PersonId ORDER BY a.Address1 ASC) as Ranking
, p.PersonID
, p.FullName
FROM #temp_person p
INNER JOIN #temp_addresses a ON p.PersonID = a.OwnerID WHERE City = 'New York'
) as RowConstrainedResult WHERE Ranking = 1) Filtered
Where RowNum > 0 And RowNum <= 4
Sample Data:
insert into #temp_person
values
(1, 'Sample One'),
(2, 'Sample Two'),
(3, 'Sample Three'),
(4, 'Sample 4'),
(5, 'Sample 5'),
(6, 'Sample 6')
insert into #temp_addresses (OwnerID, Address1, City)
values
(1, 'Somewhere East Of', 'New York'),
(1, 'Somewhere West Of', 'New York'),
(2, 'blah blah blah', 'Atlantis'),
(2, 'Address2 Of Sample Two', 'New York'),
(2, 'Address3 Of Sample Two', 'Nowhere City'),
(3, 'Address1 Of Sample Three', 'New York'),
(3, 'Address2 Of Sample Three', 'Seattle'),
(4, 'Address1 Of Sample 4', 'New York'),
(4, 'Address1 Of Sample 4', 'New York 2'),
(5, 'Address1 Of Sample 5', 'New York'),
(6, 'Address1 Of Sample 6', 'New York')

Sybase Alternative for DENSE_RANK() OVER (PARTITION BY

I have a below SQL query, need to convert it into Sybase.
SELECT prd_name, DENSE_RANK() OVER (PARTITION BY prd_cat ORDER BY createddt) FROM product
Table Script:
CREATE table product(prd_name varchar(10),
prd_cat varchar(10),
createddt datetime)
INSERT INTO product values('Product 1', 'Toy', CONVERT(DATE,'2017-05-30'))
INSERT INTO product values('Product 2', 'Toy', CONVERT(DATE,'2017-05-31'))
INSERT INTO product values('Product 3', 'Toy', CONVERT(DATE,'2017-05-31'))
INSERT INTO product values('Product 4', 'Toy1', CONVERT(DATE,'2017-05-29'))
Version: Adaptive Server Enterprise/15.7

Some versions of Sybase support window functions. I'm assuming that you don't have such a version.
You can replace it with a correlated subquery:
SELECT p.prd_name,
(SELECT COUNT(DISTINCT createddt)
FROM product p2
WHERE p2.prd_cat = p.prd_cat AND p2.createddt < p.createddt
)
FROM product p;

How to concatenate strings from multiple rows?

I have a table with the following columns:
PRODUCT
YEAR_UPDATED
Example data:
PROD1,2017
PROD1,2015
PROD2,2014
PROD3,2017
How can I get a list of when each product was updated? Something like:
PRODUCT,2017,2016,2015,2014,etc
PROD1,Y,N,Y,N
PROD2,N,N,N,Y
PROD3,Y,N,N,N
or
PROD1,2017,2015
PROD2,2014
PROD3,2017
Oracle DB
Thanks!

I am assuming the table's name is Products, change it to whatever your table's name is.
Oracle
You achieve it by using the LISTAGG function.
select p.Product || ', ' || listagg(p.YEARUPDATED,',') within group (order by p.YEARUPDATED)
from Products p
group by p.Product;
If you are using SQL Server, this is how you can do it.
select p.Product + ', ' + stuff((select ', '+ cast(tp.YearUpdated as varchar(4)) from Products tp where p.Product = tp.Product
FOR XML PATH('')) , 1, 2, '')
from Products p
group by p.Product
In case you want to quickly test it, you can try this out (using an in-memory table).
declare #Products table (Product varchar(50), YearUpdated int);
insert into #Products values ('Product 1', 2000);
insert into #Products values ('Product 1', 2001);
insert into #Products values ('Product 1', 2002);
insert into #Products values ('Product 1', 2003);
insert into #Products values ('Product 2', 2010);
insert into #Products values ('Product 2', 2011);
insert into #Products values ('Product 4', 2012);
insert into #Products values ('Product 4', 2013);
insert into #Products values ('Product 4', 2015);
insert into #Products values ('Product 3', 2005);
select p.Product + ', ' + stuff((select ', '+ cast(tp.YearUpdated as varchar(4)) from #Products tp where p.Product = tp.Product
FOR XML PATH('')) , 1, 2, '')
from #Products p
group by p.Product

Assuming you have id and year columns in your table :
select cast ( t1.id as varchar) + ',' + ( case when t1.rn2 = 1 then '2015' else '' end )
+
( case when t1.rn2 = 2 then '2015,2016 ' else '' end ) +
( case when t1.rn2 = 3 then '2015,2016,2017' else '' end )
from
(select distinct yourTBL.id , max(yourTBL.rn)
over ( partition by yourTBL.id order by yourTBL.year rows BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED following ) as rn2
from (select id , year ,
row_number()over (partition by id order by year) as rn from yourTBL ) t) t1

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Select 1 Distinct Record per month by latest date - sql

Please try using DENSE_RANK: select * From ( select *, DENSE_RANK() over(PARTITION BY YEAR([Date]), MONTH([Date]) ORDER BY [Date] desc) Rnk From tbl )x where Rnk=1

Related

how to select all records which exists in last 6 months?

How to get columns from different unrelated tables having similar column names?

Getting Paged Distinct Records Using SQL (Not Duplicate)

Sybase Alternative for DENSE_RANK() OVER (PARTITION BY

How to concatenate strings from multiple rows?

Categories

Resources