select rows in sql with end_date >= start_date for each ID repeated multiple times - sql

Attached the image how the data looks like. In my table I have 3 columns id, start date, and end date, and values like this:
id start date end date
-------------------------------
100 2015-01-01 2015-12-31
100 2016-01-10 2018-12-31
200 2015-02-15 2016-03-15
200 2016-03-15 2016-12-31
300 2016-01-01 2016-12-31
400 2017-01-01 2017-12-31
500 2017-02-01 2017-12-31
600 2017-01-15 2017-03-05
600 2017-02-01 2018-12-31
I want my output to be
id start date end date
--------------------------------
100 2015-01-01 2015-12-31
100 2016-01-10 2018-12-31
200 2015-02-15 2016-12-31
300 2016-01-01 2016-12-31
400 2017-01-01 2017-12-31
500 2017-02-01 2017-12-31
600 2017-01-15 2018-12-31
Query:
select
id, *
from
dbo.test_sl
where
id in (select id
from dbo.test_sl
where end_date >= start_date
group by id)
Please help me get the output I am looking for.

This is an example of a gaps-and-islands problem. In this case, you want to find adjacent rows that do not overlap for the same id. These are the starts of groups. A cumulative sum of the starts of a group providing a grouping number, which can be used for aggregation.
In a query, this looks like:
select id, min(startdate), max(enddate)
from (select t.*,
sum(isstart) over (partition by id order by startdate) as grp
from (select t.*,
(case when exists (select 1
from test_sl t2
where t2.id = t.id and
t2.startdate < t.startdate and
t2.enddate >= t.startdate
)
then 0 else 1
end) as isstart
from test_sl t
) t
) t
group by id, grp;

Assuming that only two records can be combined together, you can LEFT JOIN the table with itself and then use a CASE to display the end date of the self-joined record, if available.
SELECT
t1.id,
min(t1.start_date),
CASE WHEN t2.end_date IS NULL THEN t1.end_date ELSE t2.end_date END
FROM
table t1
LEFT JOIN table t2
ON t1.id = t2.id
AND t2.start_date > t1.start_date
AND t2.start_date <= t1.end_date
GROUP BY
t1.id,
CASE WHEN t2.end_date IS NULL THEN t1.end_date ELSE t2.end_date END
ORDER BY 1
Tested in this SQL Fiddle

Here's a solution that uses a Recursive CTE.
It basically loops through the dates per id, and keeps the smallest start_date for the overlapping end_date/start_date.
Then the result is grouped so there are no more overlaps.
Test here on rextester.
WITH SRC AS
(
SELECT id, start_date, end_date,
row_number() over (partition by id order by start_date) as rn
FROM test_sl
)
, RCTE AS
(
SELECT id, rn, start_date, end_date
FROM SRC
WHERE rn = 1
UNION ALL
SELECT t.id, t.rn, iif(r.end_date >= t.start_date, r.start_date, t.start_date), t.end_date
FROM RCTE r
JOIN SRC t ON t.id = r.id AND t.rn = r.rn + 1
)
SELECT id, start_date, max(end_date) as end_date
FROM RCTE
GROUP BY id, start_date
ORDER BY id, start_date;

Related

SQL Between date column filed not null

I would like to count all unique customers that were active on 2019-01-01 with the condition that they also were active in the subsequent 3 days.
Main table
date customer_id time_spent_online_min
2019-01-01 1 5
2019-01-01 2 6
2019-01-01 3 4
2019-01-02 1 7
2019-01-02 2 5
2019-01-03 3 3
2019-01-04 1 4
2019-01-04 2 6
Output table
date total_active_customers
2019-01-01 2
This is what I have tried so far:
with cte as(
select customer_id
,date
,time_spent_online_min
from main_table
where date between date '2019-01-01' and date '2019-01-04'
and customer_id is not null)
select date
,count(distinct(customer_id)) as total_active_customers
from cte
where date = date '2019-01-01'
group by 1
If you have only one record per day, you can use lead():
select date, count(*)
from (select t.*, lead(date, 3) over (partition by customer_id order by date) as date_3
from main_table t
) t
where date = '2019-01-01' and
date_3 = '2019-01-04'
group by date;
If you can have more than one record per day, then aggregate and then use lead():
select date, count(*)
from (select t.*, lead(date, 3) over (partition by customer_id order by date) as date_3
from (select customer_id, date, sum(time_spent_online_min) as time_spent_online_min
from maintable t
group by customer_id, date
) t
) t
where date = '2019-01-01' and
date_3 = '2019-01-04'
group by date;
You can also easily expand this to any dates:
select date, count(*)
from (select t.*, lead(date, 3) over (partition by customer_id order by date) as date_3
from main_table t
) t
where date_3 = date + interval '3' day
group by date;
I would use exists logic here:
SELECT COUNT(*)
FROM main_table t1
WHERE
date = '2019-01-01' AND
EXISTS (SELECT 1 FROM main_table t2
WHERE t2.customer_id = t1.customer_id AND t2.date = '2019-01-02') AND
EXISTS (SELECT 1 FROM main_table t2
WHERE t2.customer_id = t1.customer_id AND t2.date = '2019-01-03') AND
EXISTS (SELECT 1 FROM main_table t2
WHERE t2.customer_id = t1.customer_id AND t2.date = '2019-01-04');
This answer assumes that a given customer would only have one record for one date of activity.
WITH
-- your input
input(dt,customer_id,time_spent_online_min) AS (
SELECT DATE '2019-01-01',1,5
UNION ALL SELECT DATE '2019-01-01',2,6
UNION ALL SELECT DATE '2019-01-01',3,4
UNION ALL SELECT DATE '2019-01-02',1,7
UNION ALL SELECT DATE '2019-01-02',2,5
UNION ALL SELECT DATE '2019-01-03',3,3
UNION ALL SELECT DATE '2019-01-04',1,4
UNION ALL SELECT DATE '2019-01-04',2,6
)
,
-- count the active days in this row and the following 3 days
count_activity AS (
SELECT
*
, COUNT(customer_id) OVER(
PARTITION BY customer_id ORDER BY dt
RANGE BETWEEN CURRENT ROW AND INTERVAL '3 DAY' FOLLOWING
) AS act_count
FROM input
)
SELECT
dt
, COUNT(*) AS total_active_customers
FROM count_activity
WHERE dt='2019-01-01'
AND act_count > 2
GROUP BY dt
;
-- out dt | total_active_customers
-- out ------------+------------------------
-- out 2019-01-01 | 2

How to combine date ranges in SQL with small gaps

I have a dataset where each row has a date range. I want to combine records into single date ranges if they overlap or there's a gap of less than 30 days and they share the same ID number. If it's more than 30 days, I want them to remain separate. I can figure out how to do it if they are overlapping, and I can figure out how to do it no matter the size of the gap, but I can't figure out how to do it with a limited gap allowance.
So, for example, if my data looks like this:
ID Date1 Date2
ABC 2018-01-01 2018-02-14
ABC 2018-02-13 2018-03-17
ABC 2018-04-01 2018-07-24
DEF 2017-01-01 2017-06-30
DEF 2017-10-01 2017-12-01
I want it to come out like this:
ID Date1 Date2
ABC 2018-01-01 2018-07-24
DEF 2017-01-01 2017-06-30
DEF 2017-10-01 2017-12-01
The three date ranges for ABC are combined, because they either overlap or the gaps are less than 30 days. The two date ranges for DEF stay separate, because the gap between them is larger than 30 days.
I'm using Microsoft SSMS.
You can identify where the new periods begin. For a general problem, I would go with not exists. Then you can assign a group using cumulative sums:
select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.id = t.id and
t2.date1 >= dateadd(day, -30, t1.date1) and
t2.date2 < dateadd(day, 30, t1.date2)
)
then 1 else 0
end) as is_start
from t
) t;
The final step is aggregation:
with g as (
select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.id = t.id and
t2.date1 >= dateadd(day, -30, t1.date1) and
t2.date2 < dateadd(day, 30, t1.date2)
)
then 1 else 0
end) as is_start
from t
) t
)
select id, min(date1), max(date2)
from g
group by id, grp;

get max date when sum of a field equals a value

I have a problem with writing a query.
Row data is as follow :
DATE CUSTOMER_ID AMOUNT
20170101 1 150
20170201 1 50
20170203 1 200
20170204 1 250
20170101 2 300
20170201 2 70
I want to know when(which date) the sum of amount for each customer_id becomes more than 350,
How can I write this query to have such a result ?
CUSTOMER_ID MAX_DATE
1 20170203
2 20170201
Thanks,
Simply use ANSI/ISO standard window functions to calculate the running sum:
select t.*
from (select t.*,
sum(t.amount) over (partition by t.customer_id order by t.date) as running_amount
from t
) t
where running_amount - amount < 350 and
running_amount >= 350;
If for some reason, your database doesn't support this functionality, you can use a correlated subquery:
select t.*
from (select t.*,
(select sum(t2.amount)
from t t2
where t2.customer_id = t.customer_id and
t2.date <= t.date
) as running_amount
from t
) t
where running_amount - amount < 350 and
running_amount >= 350;
ANSI SQL
Used for the test: TSQL and MS SQL Server 2012
select
"CUSTOMER_ID",
min("DATE")
FROM
(
select
"CUSTOMER_ID",
"DATE",
(
SELECT
sum(T02."AMOUNT") AMOUNT
FROM "TABLE01" T02
WHERE
T01."CUSTOMER_ID" = T02."CUSTOMER_ID"
AND T02."DATE" <= T01."DATE"
) "AMOUNT"
from "TABLE01" T01
) T03
where
T03."AMOUNT" > 350
group by
"CUSTOMER_ID"
GO
CUSTOMER_ID | (No column name)
----------: | :------------------
1 | 03/02/2017 00:00:00
2 | 01/02/2017 00:00:00
db<>fiddle here
DB-Fiddle
SELECT
tmp.`CUSTOMER_ID`,
MIN(tmp.`DATE`) as MAX_DATE
FROM
(
SELECT
`DATE`,
`CUSTOMER_ID`,
`AMOUNT`,
(
SELECT SUM(`AMOUNT`) FROM tbl t2 WHERE t2.`DATE` <= t1.`DATE` AND `CUSTOMER_ID` = t1.`CUSTOMER_ID`
) AS SUM_UP
FROM
`tbl` t1
ORDER BY
`DATE` ASC
) tmp
WHERE
tmp.`SUM_UP` > 350
GROUP BY
tmp.`CUSTOMER_ID`
Explaination:
First I select all rows and subselect all rows with SUM and ID where the current row DATE is smaller or same as all rows for the customer. From this tabe i select the MIN date, which has a current sum of >350
I think it is not an easy calculation and you have to calculate something. I know It could be seen a little mixed but i want to calculate step by step. As fist step if we can get success for your scenario, I believe it can be made better about performance. If anybody can make better my query please edit my post;
Unfortunately the solution that i cannot try on computer is below, I guess it will give you expected result;
-- Get the start date of customers
SELECT MIN(DATE) AS DATE
,CUSTOMER_ID
INTO #table
FROM TABLE t1
-- Calculate all possible date and where is sum of amount greater than 350
SELECT t1.CUSTOMER_ID
,SUM(SELECT Amount FROM TABLE t3 WHERE t3.DATE BETWEEN t1.DATE
AND t2.DATE) AS total
,t2.DATE AS DATE
INTO #tableCalculated
FROM #table t1
INNER JOIN TABLE t2 ON t.ID = t2.ID
AND t1.DATE != t2.DATE
WHERE total > 350
-- SELECT Min amount and date for per Customer_ID
SELECT CUSTOMER_ID, MIN(DATE) AS DATE
FROM #tableCalculated
GROUP BY ID
SELECT CUSTOMER_ID, MIN(DATE) AS GOALDATE
FROM ( SELECT cd1.*, (SELECT SUM(AMOUNT)
FROM CustData cd2
WHERE cd2.CUSTOMER_ID = cd1.CUSTOMER_ID
AND cd2.DATE <= cd1.DATE) AS RUNNINGTOTAL
FROM CustData cd1) AS custdata2
WHERE RUNNINGTOTAL >= 350
GROUP BY CUSTOMER_ID
DB Fiddle

SQL Server- find all records within a certain date (not that straightforward!)

Ok. My SQL is pretty pants so I'm struggling to get my head around this.
I have a table that stores records complete with a time stamp.
What I want, is a list of uids where there are 2 or more records for that user within a time frame of 1 second of each other. Maybe I've made it more complicated in my head, just cannot figure it out.
Shortened version of table (pk ignored)
uid date
1 2015-01-01 10:00:30.020*
1 2015-01-01 10:00:30.300*
1 2015-01-01 10:00:30.500*
1 2015-01-01 10:00:39.000
1 2015-01-01 10:00:35.000
1 2015-01-01 10:00:37.800
2 2015-02-02 12:00:30.000
2 2015-02-02 14:00:30.000
2 2015-02-02 15:00:30.000
2 2015-02-02 18:00:30.000
3 2015-03-02 15:00:24.000
3 2015-03-02 15:00:20.000 *
3 2015-03-02 15:00:20.300 *
I've marked * next to the records I'd expect to match.
The results list I'd like is just a list of uid, so the result I'd want would just be
1
3
You can do this with exists:
select distinct uid
from t
where exists (select 1
from t t2
where t2.uid = t.uid and
t2.date > t.date and
t2.date <= t.date + interval 1 second
);
Note: The syntax for adding 1 second varies by database. But the above gives the idea for the logic.
In SQL Server, the syntax is:
select distinct uid
from t
where exists (select 1
from t t2
where t2.uid = t.uid and
t2.date > t.date and
t2.date <= dateadd(second, 1, t.date)
);
EDIT:
Or, in SQL Server 2012+, a faster alternative is to use lead() or lag():
select distinct uid
from (select t.*, lead(date) over (partition by uid order by date) as next_date
from t
) t
where next_date < dateadd(second, 1, date);
If you want the records, not just the uids, then you need to get both:
select t.*
from (select t.*,
lag(date) over (partition by uid order by date) as prev_date,
lead(date) over (partition by uid order by date) as next_date
from t
) t
where next_date <= dateadd(second, 1, date) or
prev_date >= dateadd(second, -1, date);

Combine Two Rows into One with Similar fields (DateTime) and NULL Vales in SQL

Could any one help me for the below request.
I have data of One row for the Login DateTime and another row for the Logout Datetime. The rest of the fields are same. I need to combine both rows in to one with Login (Datetime) and Logout (Datetime).
Sample Data
ID Code DateTime User Status
35 100 1/1/2014 14:50 a IN
35 100 1/1/2014 15:45 a OUT
35 100 1/1/2014 18:20 a IN
35 100 1/1/2014 19:10 a OUT
Result should look like below
ID Code Datetime1 Datetime2 User
35 100 2014-01-01 14:50 2014-01-01 15:45 a
35 100 2014-01-01 18:20 2014-01-01 19:10 a
Thank you.
Use the ROW_NUMBER() windowing function to determine the closest 'OUT' status for each 'IN' iteration:
SELECT * FROM (
SELECT t1.ID, t1.Code, t1.[Datetime] as Datetime1, tNext.[Datetime] as Datetime2, t1.[User],
ROW_NUMBER() OVER (PARTITION BY t1.ID, t1.Code, t1.[User], t1.[Datetime] ORDER BY tNext.[Datetime]) rowNum
FROM myTable t1
JOIN myTable tNext ON
t1.ID = tNext.ID AND
t1.Code = tNext.Code AND
t1.[User] = tNext.[User] AND
tNext.Status = 'OUT' AND
t1.[Datetime] < tNext.[Datetime]
WHERE t1.Status = 'IN' ) t
WHERE rowNum = 1
ORDER BY ID, Code, [User], Datetime1
SQLFiddle here
This finds the next date/time with an 'OUT' after each 'IN' :
(simplified to match small data sample, extra code required)
With YourData as (
SELECT 35 as ID, 100 as Code, '1/1/2014 14:50' as yDatetime,
'a' as yUser, 'IN' AS status UNION ALL
SELECT 35,100, '1/1/2014 15:45', 'a', 'OUT' UNION ALL
SELECT 35,100, '1/1/2014 18:20', 'a', 'IN' UNION ALL
SELECT 35,100, '1/1/2014 19:10', 'a', 'OUT'
)
SELECT
ID,
Code,
yDatetime AS When_IN,
(SELECT Min(yDatetime) FROM YourData yd2
WHERE (yd2.yDatetime>YourData.yDatetime)
AND Status='OUT'
-- extra matching needed here
-- for ID, CODE, User fields in use
) AS When_OUT,
yUser as _User
FROM YourData WHERE Status='IN'
Results :
35 100 1/1/2014 14:50 1/1/2014 15:45 a
35 100 1/1/2014 18:20 1/1/2014 19:10 a
Try
select
a.id,
a.code,
a.datetime as datetime1,
b.datetime as datetime2,
a.user
from
(select
id,
code,
datetime,
user
from
table
where
status='IN') a
inner join
(select
id,
code,
datetime,
user
from
table
where
status='OUT') b
on
(a.user=b.user and a.id=b.id and a.code=b.code)
try this
SELECT lin.ID, lin.CODE, lin.USER, lin.DateTime as LoginDate,
(select top 1 DateTime from TABLE lout
where lout.data > lin.data and lin.id=lout.id
and lin.user = lout.user and lin.code = lout.code and status = 'out'
order by lout.dateTime
) as LogOutDate
FROM TABLE lin
where lin.status='IN'