Join subquery and pull most recent date - sql

I have two tables:
CLIENTS
ID NAME
001 John
002 Sara
CLIENT_STATUS
CLIENT_ID STATUS DATE
001 3 2018-01-02
001 2 2018-01-04
002 2 2018-01-02
002 1 2018-01-03
I want to filter by status = 1 and I just want the most recent date within the specified time frame.
I have this so far:
DECLARE
#StartDate DATE,
#EndDate DATE
SET #StartDate = '2016-07-01'
SET #EndDate = '2018-06-30'
SELECT
c.NAME
, c.ID
, cs.STATUS
FROM CLIENT c
LEFT JOIN (
SELECT cs.CLIENT_ID, cs.DATE
FROM CLIENT_STATUS
WHERE STATUS = 1 AND h.DATE BETWEEN #StartDate AND #EndDate
) AS hst ON hst.CLIENT_ID = c.ID
Only that doesn't Order by the most recent date.
Please note this is part of a larger query.

Left (or INNER) join with a sub-query that has row numbers:
SELECT *
FROM CLIENTS
LEFT JOIN (
SELECT *, ROW_NUMBER() OVER (PARTITION BY CLIENT_ID ORDER BY DATE DESC) AS rn
FROM CLIENT_STATUS
WHERE STATUS = 1
) RECENT_STATUS ON CLIENTS.ID = RECENT_STATUS.CLIENT_ID AND RECENT_STATUS.rn = 1

You can use row_number() function :
SELECT TOP (1) WITH TIES c.ID, c.NAME, cs.DATE
FROM CLIENT c INNER JOIN
CLIENT_STATUS cs
ON cs.CLIENT_ID = c.ID
WHERE cs.STATUS = 1 AND cs.DATE >= #StartDate AND cs.DATE <= #EndDate
ORDER BY ROW_NUMBER() OVER (PARTITION BY c.ID ORDER BY cs.DATE DESC);

Related

How do I merge 2 rows into 1 row in SQL?

ReportDateTime EuId1 EuId2
2020-02-01 1:00 1576 Null
2020-02-01 1:00 Null 1579
2020-02-01 2:00 Null 1573
2020-02-01 2:00 1566 Null
This is what I have and this is what I want...
ReportDateTime EuId1 EuId2
2020-02-01 1:00 1576 1579
2020-02-01 2:00 1566 1573
Here is my code...
;WITH cteEq AS (
SELECT e.EntHID, e.EntCode, e.EntName
FROM cfEntity fac (NOLOCK)
JOIN cfEntityRelation er (NOLOCK) ON fac.EntHID = er.EntParentHID AND er.EntRelEffEnd = '12/31/2078'
JOIN cfEntity e (NOLOCK) ON er.EntChildHID = e.EntHID AND e.EntTypeTID = -200020
WHERE e.EntHID IN (SELECT ea.EaHID FROM cfEntityAttribute ea (NOLOCK) WHERE ea.EaKey = 'AirPermitXref_Diesel')
)
SELECT
ReportDateTime = a.EqReportDate
, EuId1 = CASE
WHEN EntName LIKE '%EU1%' THEN a.EqPwr
END
, EuId2 = CASE
WHEN EntName LIKE '%EU2%' THEN a.EqPwr
END
FROM eqHourlyAir a (NOLOCK)
JOIN cteEq e (NOLOCK) ON a.EqHID = e.EntHID
OUTER APPLY (
SELECT ad.EqHID, ad.EqReportDate, ad.EqRunTime
FROM eqHourlyAir ad (NOLOCK)
LEFT JOIN cfEntityAttribute ea (NOLOCK) ON ad.EqHID = CAST(ea.EaValue AS INT) AND ea.EaKey = 'AirPermitXref_DWI'
WHERE ea.EaHID = a.EqHID
AND ad.EqReportDate = a.EqReportDate
) dwi
OUTER APPLY (
SELECT ad.EqHID, ad.EqReportDate, ad.EqRunTime
FROM eqHourlyAir ad (NOLOCK)
LEFT JOIN cfEntityAttribute ea (NOLOCK) ON ad.EqHID = CAST(ea.EaValue AS INT) AND ea.EaKey = 'AirPermitXref_Diesel'
WHERE ea.EaHID = a.EqHID
AND ad.EqReportDate = a.EqReportDate
) dsl
WHERE CAST(a.EqReportDate AS DATE) >= #StartDate AND CAST(a.EqReportDate AS DATE) <= #EndDate
ORDER BY a.EqReportDate
using nolock hint is not the best idea !
if you have one null value and only one non value for each reportingdate you can grooup by ReportDateTime and get the max value :
...
SELECT
ReportDateTime = a.EqReportDate,
EuId1 = MAX(CASE WHEN EntName LIKE '%EU1%' THEN a.EqPwr END),
EuId2 = MAX(CASE WHEN EntName LIKE '%EU2%' THEN a.EqPwr END)
FROM
{...}
WHERE
CAST(a.EqReportDate AS DATE) >= #StartDate
AND CAST(a.EqReportDate AS DATE) <= #EndDate
GROUP BY a.EqReportDate
ORDER BY
a.EqReportDate
From you sample data it's very easy to select your desired result as below:
select ReportDateTime ,max(EuId1) EuId1,max(EuId2) EuId2 from
(select * from table1 join table2 on .... )t
group by ReportDateTime
But your query indicates that there is more to the story. Please share some more information.

SQL - Find if column dates include at least partially a date range

I need to create a report and I am struggling with the SQL script.
The table I want to query is a company_status_history table which has entries like the following (the ones that I can't figure out)
Table company_status_history
Columns:
| id | company_id | status_id | effective_date |
Data:
| 1 | 10 | 1 | 2016-12-30 00:00:00.000 |
| 2 | 10 | 5 | 2017-02-04 00:00:00.000 |
| 3 | 11 | 5 | 2017-06-05 00:00:00.000 |
| 4 | 11 | 1 | 2018-04-30 00:00:00.000 |
I want to answer to the question "Get all companies that have been at least for some point in status 1 inside the time period 01/01/2017 - 31/12/2017"
Above are the cases that I don't know how to handle since I need to add some logic of type :
"If this row is status 1 and it's date is before the date range check the next row if it has a date inside the date range."
"If this row is status 1 and it's date is after the date range check the row before if it has a date inside the date range."
I think this can be handled as a gaps and islands problem. Consider the following input data: (same as sample data of OP plus two additional rows)
id company_id status_id effective_date
-------------------------------------------
1 10 1 2016-12-15
2 10 1 2016-12-30
3 10 5 2017-02-04
4 10 4 2017-02-08
5 11 5 2017-06-05
6 11 1 2018-04-30
You can use the following query:
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
ORDER BY company_id, effective_date
to get:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 0
2 10 1 2016-12-30 1
3 10 5 2017-02-04 2
4 10 4 2017-02-08 2
5 11 5 2017-06-05 0
6 11 1 2018-04-30 0
Now you can identify status = 1 islands using:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
)
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
Output:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 1
2 10 1 2016-12-30 1
3 10 5 2017-02-04 1
4 10 4 2017-02-08 2
5 11 5 2017-06-05 1
6 11 1 2018-04-30 2
Calculated field grp will help us identify those islands:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
), CTE2 AS
(
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
)
SELECT company_id,
MIN(effective_date) AS start_date,
CASE
WHEN COUNT(*) > 1 THEN DATEADD(DAY, -1, MAX(effective_date))
ELSE MIN(effective_date)
END AS end_date
FROM CTE2
GROUP BY company_id, grp
HAVING COUNT(CASE WHEN status_id = 1 THEN 1 END) > 0
Output:
company_id start_date end_date
-----------------------------------
10 2016-12-15 2017-02-03
11 2018-04-30 2018-04-30
All you want know is those records from above that overlap with the specified interval.
Demo here with somewhat more complicated use case.
Maybe this is what you are looking for? For these kind of questions, you need to join two instance of your table, in this case I am just joining with next record by Id, which probably is not totally correct. To do it better, you can create a new Id using a windowed function like row_number, ordering the table by your requirement criteria
If this row is status 1 and it's date is before the date range check
the next row if it has a date inside the date range
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
else NULL
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
Implementing second criteria:
"If this row is status 1 and it's date is after the date range check
the row before if it has a date inside the date range."
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
when csh1.status_id=1 and csh1.effective_date>#range_en
then
case
when csh3.effective_date between #range_st and #range_en then true
else false
end
else null -- ¿?
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
left join company_status_history csh3
on csh1.id=csh3.id-1
I would suggest the use of a cte and the window functions ROW_NUMBER. With this you can find the desired records. An example:
DECLARE #t TABLE(
id INT
,company_id INT
,status_id INT
,effective_date DATETIME
)
INSERT INTO #t VALUES
(1, 10, 1, '2016-12-30 00:00:00.000')
,(2, 10, 5, '2017-02-04 00:00:00.000')
,(3, 11, 5, '2017-06-05 00:00:00.000')
,(4, 11, 1, '2018-04-30 00:00:00.000')
DECLARE #StartDate DATETIME = '2017-01-01';
DECLARE #EndDate DATETIME = '2017-12-31';
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) AS rn
FROM #t
),
cteLeadLag AS(
SELECT c.*, ISNULL(c2.effective_date, c.effective_date) LagEffective, ISNULL(c3.effective_date, c.effective_date)LeadEffective
FROM cte c
LEFT JOIN cte c2 ON c2.company_id = c.company_id AND c2.rn = c.rn-1
LEFT JOIN cte c3 ON c3.company_id = c.company_id AND c3.rn = c.rn+1
)
SELECT 'Included' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Following' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date > #EndDate
AND LagEffective BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Trailing' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date < #EndDate
AND LeadEffective BETWEEN #StartDate AND #EndDate
I first select all records with their leading and lagging Dates and then I perform your checks on the inclusion in the desired timespan.
Try with this, self-explanatory. Responds to this part of your question:
I want to answer to the question "Get all companies that have been at
least for some point in status 1 inside the time period 01/01/2017 -
31/12/2017"
Case that you want to find those id's that have been in any moment in status 1 and have records in the period requested:
SELECT *
FROM company_status_history
WHERE id IN
( SELECT Id
FROM company_status_history
WHERE status_id=1 )
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'
Case that you want to find id's in status 1 and inside the period:
SELECT *
FROM company_status_history
WHERE status_id=1
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'

SQL Join two tables by unrelated date

I’m looking to join two tables that do not have a common data point, but common value (date). I want a table that lists the date and total number of hired/terminated employees on that day. Example is below:
Table 1
Hire Date Employee Number Employee Name
--------------------------------------------
5/5/2018 10078 Joe
5/5/2018 10077 Adam
5/5/2018 10078 Steve
5/8/2018 10079 Jane
5/8/2018 10080 Mary
Table 2
Termination Date Employee Number Employee Name
----------------------------------------------------
5/5/2018 10010 Tony
5/6/2018 10025 Jonathan
5/6/2018 10035 Mark
5/8/2018 10052 Chris
5/9/2018 10037 Sam
Desired result:
Date Total Hired Total Terminated
--------------------------------------
5/5/2018 3 1
5/6/2018 0 2
5/7/2018 0 0
5/8/2018 2 1
5/9/2018 0 1
Getting the total count is easy, just unsure as the best approach from the standpoint of "adding" a date column
If you need all dates within some window then you need to join the data to a calendar. You can then left join and sum flags for data points.
DECLARE #StartDate DATETIME = (SELECT MIN(ActionDate) FROM(SELECT ActionDate = MIN(HireDate) FROM Table1 UNION SELECT ActionDate = MIN(TerminationDate) FROM Table2)AS X)
DECLARE #EndDate DATETIME = (SELECT MAX(ActionDate) FROM(SELECT ActionDate = MAX(HireDate) FROM Table1 UNION SELECT ActionDate = MAX(TerminationDate) FROM Table2)AS X)
;WITH AllDates AS
(
SELECT CalendarDate=#StartDate
UNION ALL
SELECT DATEADD(DAY, 1, CalendarDate)
FROM AllDates
WHERE DATEADD(DAY, 1, CalendarDate) <= #EndDate
)
SELECT
CalendarDate,
TotalHired = SUM(CASE WHEN H.HireDate IS NULL THEN NULL ELSE 1 END),
TotalTerminated = SUM(CASE WHEN T.TerminationDate IS NULL THEN NULL ELSE 1 END)
FROM
AllDates D
LEFT OUTER JOIN Table1 H ON H.HireDate = D.CalendarDate
LEFT OUTER JOIN Table2 T ON T.TerminationDate = D.CalendarDate
/* If you only want dates with data points then uncomment out the where clause
WHERE
NOT (H.HireDate IS NULL AND T.TerminationDate IS NULL)
*/
GROUP BY
CalendarDate
I would do this with a union all and aggregations:
select dte, sum(is_hired) as num_hired, sum(is_termed) as num_termed
from (select hiredate as dte, 1 as is_hired, 0 as is_termed from table1
union all
select terminationdate, 0 as is_hired, 1 as is_termed from table2
) ht
group by dte
order by dte;
This does not include the "missing" dates. If you want those, a calendar or recursive CTE works. For instance:
with ht as (
select dte, sum(is_hired) as num_hired, sum(is_termed) as num_termed
from (select hiredate as dte, 1 as is_hired, 0 as is_termed from table1
union all
select terminationdate, 0 as is_hired, 1 as is_termed from table2
) ht
group by dte
),
d as (
select min(dte) as dte, max(dte) as max_dte)
from ht
union all
select dateadd(day, 1, dte), max_dte
from d
where dte < max_dte
)
select d.dte, coalesce(ht.num_hired, 0) as num_hired, coalesce(ht.num_termed) as num_termed
from d left join
ht
on d.dte = ht.dte
order by dte;
Try this one
SELECT ISNULL(a.THE_DATE, b.THE_DATE) as Date,
ISNULL(a.Total_Hire,0) as Total_Hire,
ISNULL (b.Total_Terminate,0) as Total_terminate
FROM (SELECT Hire_date as the_date, COUNT(1) as Total_Hire
FROM TABLE_HIRE GROUP BY HIRE_DATE) a
FULL OUTER JOIN (SELECT Termination_Date as the_date, COUNT(1) as Total_Terminate
FROM TABLE_TERMINATE GROUP BY HIRE_DATE) a
ON a.the_date = b.the_date

Selecting first entry per day

My table will be structured like this
temp
ID | Date
---|-----------
1 | 2018-01-01
2 | 2018-01-01
3 | 2018-01-01
4 | 2018-01-02
5 | 2018-01-02
6 | 2018-01-03
And I will have an input from the user for start and end dates:
#StartDate DATE = '2018-01-01'
#EndDate DATE = '2018-01-03'
And I want my return structured like so:
ID | Date
---|-----------
1 | 2018-01-01
4 | 2018-01-02
6 | 2018-01-03
I've tried doing this:
select distinct temp.ID, joinTable.Date
from temp
inner join (
select min(innerTemp.Date), innerTemp.ID
from temp innerTemp
where innerTemp.Date >= #StartDate
and innerTemp.Date < #EndDate
group by innerTemp.ID, innerTemp.Date
) as joinTable on joinTable.ID = temp.ID and joinTable.Date = temp.Date
where temp.Date >= #StartDate
and temp.Date < #EndDate
order by temp.Date desc
To try to join the table to itself with only one entry per day then choose from that but that isn't working. I am pretty stumped on this one. Any ideas?
That seems very complicated. This returns the result set you want:
select min(id), date
from temp
where date >= #StartDate and date < #EndDate
group by date;
If you have other columns you want to keep (so group by is not appropriate), a simple method with good performance is:
select t.*
from temp t
where t.id = (select min(t2.id) from temp t2 where t2.date = t.date and t2.date >= #StartDate and t2.date < #EndDate);
Of course, you can also use row_number(), but with an index on temp(date, id) and temp(id), the above should be pretty fast.
WITH cte AS
(
SELECT
*
, ROW_NUMBER() OVER(PARTITION BY date ORDER BY id asc) rn
FROM
temp )
SELECT
id,
date
FROM
rn = 1

Finding missing dates compared to date range

I have one table (A) with date ranges and another (B) with just a set date. There are missing months in B that are within the date range of A. I need to identify the missing months.
A
Person StartDate EndDate
123 1/1/2016 5/1/2016
B
Person EffectiveDate
123 1/1/2016
123 2/1/2016
123 4/1/2016
123 5/1/2016
Expected result would be
123 3/1/2016
I'm using SQL Server 2012. Any assistance would be appreciated. Thanks!
One approach is to generate all values between the two dates. Here is an approach using a numbers table:
with n as (
select row_number() over (order by (select null)) - 1 as n
from master.spt_values
)
select a.person, dateadd(day, n.n, a.startdate) as missingdate
from a join
n
on dateadd(day, n.n, a.startdate) <= day.enddate left join
b
on b.person = a.person and b.effectivedate = dateadd(day, n.n, a.startdate)
where b.person is null;
Try this:
CREATE TABLE #A (Person INT, StartDate DATE, EndDate DATE)
INSERT INTO #A
SELECT '123','1/1/2016', '5/1/2016'
CREATE TABLE #B(Person INT, EffectiveDate DATE)
INSERT INTO #B
SELECT 123 ,'1/1/2016' UNION ALL
SELECT 123 ,'2/1/2016' UNION ALL
SELECT 123 ,'4/1/2016' UNION ALL
SELECT 123 ,'5/1/2016'
;WITH A1
AS(
SELECT PERSON , StartDate, EndDate
FROM #A
UNION ALL
SELECT PERSON ,DATEADD(MM,1,STARTDATE), EndDate
FROM A1
WHERE DATEADD(MM,1,STARTDATE) <= EndDate
)
SELECT PERSON , StartDate
FROM A1
WHERE
NOT EXISTS
(
SELECT 1 FROM #B B1
WHERE B1.Person = A1.PERSON
AND YEAR(B1.EffectiveDate) = YEAR(A1.STARTDATE) AND MONTH(B1.EffectiveDate) = MONTH(A1.STARTDATE)
)
This should work if you are interested in getting missing months
;WITH n
AS (SELECT ROW_NUMBER() OVER(ORDER BY
(
SELECT NULL
)) - 1 AS n
FROM master.dbo.spt_values)
SELECT a.person,
DATEADD(MONTH, n.n, a.startdate) AS missingdate
FROM a a
INNER JOIN n ON DATEADD(MONTH, n.n, a.startdate) <= a.enddate
LEFT JOIN b b ON MONTH(DATEADD(MONTH, n.n, a.startdate)) = MONTH(b.effectivedate) AND YEAR(DATEADD(MONTH, n.n, a.startdate)) = YEAR(b.effectivedate)
WHERE b.person IS NULL;