Related
i am trying to solve this this is table1 and i am trying to have below output i am not able to build up a logic that how could i fetch start date and end date from same timestemp column in SQL.
CREATE TABLE table1 (
`batch` INTEGER,
`timestemp` VARCHAR(8),
`mo` INTEGER,
`speed` INTEGER
);
INSERT INTO table1
(`batch`, `timestemp`, `mo`, `speed`)
VALUES
('1', '00:18:00', '0', '0'),
('1', '01:18:00', '0', '0'),
('1', '02:18:00', '0', '0'),
('1', '03:18:00', '1', '5'),
('1', '04:18:00', '1', '6'),
('1', '05:18:00', '1', '7'),
('1', '06:18:00', '2', '10'),
('1', '07:18:00', '2', '9'),
('1', '08:18:00', '2', '8'),
('1', '09:18:00', '3', '12'),
('1', '10:18:00', '3', '23'),
('1', '11:18:00', '3', '21'),
('1', '12:18:00', '4', '20'),
('1', '13:18:00', '4', '22');
mo=mode
batch
timestemp
mo
speed
1
00:18:00
0
0
1
01:18:00
0
0
1
02:18:00
0
0
1
03:18:00
1
5
1
04:18:00
1
6
1
05:18:00
1
7
1
06:18:00
2
10
1
07:18:00
2
9
1
08:18:00
2
8
1
09:18:00
3
12
1
10:18:00
3
23
1
11:18:00
3
21
1
12:18:00
4
20
1
13:18:00
4
22
ooutput:
batch
start time
end time
mode
1
00:18:00
03:17:00
0
1
03:18:00
06:17:00
1
1
06:18:00
09:17:00
2
1
09:18:00
12:17:00
3
1
12:18:00
13:18:00
4
Schema (MySQL v8.0)
CREATE TABLE table1 (
`batch` INTEGER,
`timestemp` TIME,
`mo` INTEGER,
`speed` INTEGER
);
INSERT INTO table1
(`batch`, `timestemp`, `mo`, `speed`)
VALUES
('1', '00:18:00', '0', '0'),
('1', '01:18:00', '0', '0'),
('1', '02:18:00', '0', '0'),
('1', '03:18:00', '1', '5'),
('1', '04:18:00', '1', '6'),
('1', '05:18:00', '1', '7'),
('1', '06:18:00', '2', '10'),
('1', '07:18:00', '2', '9'),
('1', '08:18:00', '2', '8'),
('1', '09:18:00', '3', '12'),
('1', '10:18:00', '3', '23'),
('1', '11:18:00', '3', '21'),
('1', '12:18:00', '4', '20'),
('1', '13:18:00', '4', '22');
Query
SELECT batch
, mode
, start_time
, COALESCE(SUBTIME(LEAD(start_time) OVER (ORDER BY start_time), '00:01:00'), end_time) end_time
FROM (
SELECT batch
, min(timestemp) start_time
, max(timestemp) end_time
, mo mode
FROM table1
GROUP BY batch, mo
) min_max;
batch
mode
start_time
end_time
1
0
00:18:00
03:17:00
1
1
03:18:00
06:17:00
1
2
06:18:00
09:17:00
1
3
09:18:00
12:17:00
1
4
12:18:00
13:18:00
View on DB Fiddle
I'm trying to return common date periods (per id) from below data, but I cannot find a way to handle case when date periods have a gap between common periods. Can anyone help?
|id|code_id|code|date_from|date_to|
|--|--|--|--|--|
|10|100| 1000 |02/02/2022 |03/02/2022 23:57:00|
|10|100| 1000 |07/02/2022 01:00:00 |08/02/2022 |
|10|100| 2000 |02/02/2022 |02/02/2022 23:00:00|
|10|100| 2000 |07/02/2022 03:00:00 |08/02/2022 |
|10|200| 2000 |02/02/2022 02:14:00 |04/02/2022 21:37:00|
|20|100| 1000 |01/02/2022 05:00:00 |03/02/2022 |
|30|100| 2000 |02/02/2022 |02/02/2022 23:00:00|
|30|200| 2000 |02/02/2022 02:14:00 |04/02/2022 |
|40|100| 2000 |07/02/2022 03:00:00 |08/02/2022 23:10:00|
|50|200| 2000 |04/02/2022 |04/02/2022 21:37:00|
|50|200| 3000 |04/02/2022 02:12:00 |05/02/2022 23:31:00|
Below simple query works fine, but only for ids which have one common period (with no gaps).
I would expect for id = 10 to return two rows (as there is a gap between dates) for periods which are:
I) 02/02/2022 00:00:00 <-> 04/02/2022 21:37:00
II) 07/02/2022 01:00:00 <-> 08/02/2022 00:00:00
SELECT id
,MIN(date_from) date_from
,MAX(date_to) date_to
FROM my_gtt
GROUP BY id
ORDER BY id
Current results (but id = 10 is incorrect):
|id|date_from|date_to|
|--|--|--|
|10| 02/02/2022 |08/02/2022 |
|20| 01/02/2022 05:00:00 |03/02/2022 |
|30| 02/02/2022 |04/02/2022 |
|40| 07/02/2022 03:00:00 |08/02/2022 23:10:00|
|50| 04/02/2022 |05/02/2022 23:31:00|
Data and table creation:
CREATE GLOBAL TEMPORARY TABLE my_gtt
(
id NUMBER(10),
code_id NUMBER(10),
code NUMBER(10),
date_from DATE,
date_to DATE
)
ON COMMIT PRESERVE ROWS;
INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('03-02-2022 23:57:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('07-02-2022 01:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (10, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (20, 100, 1000, TO_DATE('01-02-2022 05:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('03-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (30, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (30, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (40, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022 23:10:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (50, 200, 2000, TO_DATE('04-02-2022', 'dd-mm-yyyy'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (50, 200, 3000, TO_DATE('04-02-2022 02:12:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('05-02-2022 23:31:00', 'dd-mm-yyyy hh24:mi:ss'));
From Oracle 12, MATCH_RECOGNIZE is the simplest solution:
SELECT *
FROM my_gtt
MATCH_RECOGNIZE (
PARTITION BY id
ORDER BY date_from, date_to
MEASURES
MIN(date_from) AS start_date,
MAX(date_to) AS end_date
PATTERN (overlap* last_row)
DEFINE
overlap AS MAX(date_to) >= NEXT(date_from)
);
However, if you are on an earlier version you can find the output using:
SELECT id,
MIN(dt) AS date_from,
MAX(dt) AS date_to
FROM (
SELECT id,
dt,
SUM(value) OVER (PARTITION BY id ORDER BY dt, ROWNUM) AS match_no
FROM (
SELECT id,
dt,
type * SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) AS value
FROM my_gtt
UNPIVOT (dt FOR type IN (date_from AS 1, date_to AS -1))
)
WHERE value IN (1,0)
)
GROUP BY id, match_no
Which, for the sample data:
INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('03-02-2022 23:57:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('07-02-2022 01:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (10, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (20, 100, 1000, TO_DATE('01-02-2022 05:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('03-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (30, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (30, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (40, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022 23:10:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (50, 200, 2000, TO_DATE('04-02-2022', 'dd-mm-yyyy'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (50, 200, 3000, TO_DATE('04-02-2022 02:12:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('05-02-2022 23:31:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-01', DATE '2022-01-10');
INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-02', DATE '2022-01-04');
INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-06', DATE '2022-01-11');
INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-13', DATE '2022-01-16');
INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-14', DATE '2022-01-15');
Both output:
ID
START_DATE
END_DATE
10
2022-02-02 00:00:00
2022-02-04 21:37:00
10
2022-02-07 01:00:00
2022-02-08 00:00:00
20
2022-02-01 05:00:00
2022-02-03 00:00:00
30
2022-02-02 00:00:00
2022-02-04 00:00:00
40
2022-02-07 03:00:00
2022-02-08 23:10:00
50
2022-02-04 00:00:00
2022-02-05 23:31:00
60
2022-01-01 00:00:00
2022-01-11 00:00:00
60
2022-01-13 00:00:00
2022-01-16 00:00:00
db<>fiddle here
SQL pattern matching can help:
select * from my_gtt match_recognize (
partition by id
order by date_from, date_to
measures
min ( date_from ) start_date,
max ( date_to ) end_date
pattern ( overlap* gap )
define
overlap as next ( date_from ) <= max ( date_to )
);
ID START_DATE END_DATE
---------- -------------------- --------------------
10 02-FEB-2022 00:00:00 04-FEB-2022 21:37:00
10 07-FEB-2022 01:00:00 08-FEB-2022 00:00:00
20 01-FEB-2022 05:00:00 03-FEB-2022 00:00:00
30 02-FEB-2022 00:00:00 04-FEB-2022 00:00:00
40 07-FEB-2022 03:00:00 08-FEB-2022 23:10:00
50 04-FEB-2022 00:00:00 05-FEB-2022 23:31:00
I discuss how this works in more detail in pattern matching use cases
I have the following data:
DECLARE #t TABLE (usr VARCHAR(100), dt DATE, amount INT);
INSERT INTO #t VALUES
('a', '2018-01-01', 100), -- 100
('a', '2018-02-01', 100), -- 200
('a', '2018-03-01', 100), -- 300
('a', '2018-04-01', 100), -- 400
('a', '2018-05-01', 100), -- 500
('b', '2018-01-01', 150), -- 150
('b', '2018-02-01', 150), -- 300
('b', '2018-03-01', 150), -- 450
('b', '2018-04-01', 150), -- 600
('b', '2018-05-01', 150); -- 750
And a value such as 300 or 301 (a user variable or column). I want to select rows until running total of amount reaches the specified value, with the following twist:
For 300 I want to select first 3 rows for a and first 2 rows for b
For 301 I want to select first 4 rows for a and first 3 rows for b
This is supposed to be simple but the solutions I found do not handle the second case.
DECLARE #t TABLE (usr VARCHAR(100), dt DATE, amount INT);
INSERT INTO #t VALUES
('a', '2018-01-01', 100), -- 100
('a', '2018-02-01', 100), -- 200
('a', '2018-03-01', 100), -- 300
('a', '2018-04-01', 100), -- 400
('a', '2018-05-01', 100), -- 500
('b', '2018-01-01', 150), -- 150
('b', '2018-02-01', 150), -- 300
('b', '2018-03-01', 150), -- 450
('b', '2018-04-01', 150), -- 600
('b', '2018-05-01', 150); -- 750
DECLARE #Total INT = 301;
WITH cte AS
(
SELECT *, SUM(amount) OVER (PARTITION BY usr ORDER BY dt) AS RunTotal
FROM #t
)
SELECT *
FROM cte
WHERE cte.RunTotal - cte.amount < #Total -- running total for previous row is less
-- than #Total then include current row
DECLARE #t TABLE (usr VARCHAR(100), dt DATE, amount INT);
INSERT INTO #t VALUES
('a', '2018-01-01', 100), -- 100
('a', '2018-02-01', 100), -- 200
('a', '2018-03-01', 100), -- 300
('a', '2018-04-01', 100), -- 400
('a', '2018-05-01', 100), -- 500
('b', '2018-01-01', 150), -- 150
('b', '2018-02-01', 150), -- 300
('b', '2018-03-01', 150), -- 450
('b', '2018-04-01', 150), -- 600
('b', '2018-05-01', 150); -- 750
declare #target int = 300;
with cte_RunningTotal as
(
select
usr,
dt,
amount,
sum(amount) over (partition by usr order by dt rows unbounded preceding) as runningTotal
from #t
)
select *
from cte_RunningTotal
where runningTotal < #target + amount
order by usr, dt
I am working with a blood pressure database in SQL Server which contains patient_id, timestamp (per minute) and systolicBloodPressure.
My goals are to find:
the number of episodes in which a patient is under a certain blood pressure threshold
An episode consists of the timestmap where the patient drops below a certain threshold until the timestamp where the patient comes above the threshold.
the mean blood pressure per episode per patient
the duration of the episode per episode per patient
What I have tried so far:
I am able to identify episodes by just making a new column which sets to 1 if threshold is reached.
select *
, CASE
when sys < threshold THEN '1'
from BPDATA
However , I am not able to 'identify' different episodes within the patient; episode1 episode 2 with their relative timestamps.
Could someone help me with this? Or is there someone with a better different solution?
EDIT: Sample data with example threshold 100
ID Timestamp SysBP below Threshold
----------------------------------------------------
1 9:38 110 Null
1 9:39 105 Null
1 9:40 96 1
1 9:41 92 1
1 9:42 102 Null
2 12:23 95 1
2 12:24 98 1
2 12:25 102 Null
2 12:26 104 Null
2 12:27 94 1
2 12:28 88 1
2 12:29 104 Null
Thanks for the sample data.
This should work:
declare #t table (ID int, Timestamp time, SysBP int, belowThreshold bit)
insert #t
values
(1, '9:38', 110, null),
(1, '9:39', 105, null),
(1, '9:40', 96, 1),
(1, '9:41', 92, 1),
(1, '9:42', 102, null),
(2, '12:23', 95, 1),
(2, '12:24', 98, 1),
(2, '12:25', 102, null),
(2, '12:26', 104, null),
(2, '12:27', 94, 1),
(2, '12:28', 88, 1),
(2, '12:29', 104, null)
declare #treshold int = 100
;with y as (
select *, case when lag(belowThreshold, 1, 0) over(partition by id order by timestamp) = belowThreshold then 0 else 1 end epg
from #t
),
z as (
select *, sum(epg) over(partition by id order by timestamp) episode
from y
where sysbp < #treshold
)
select id, episode, count(episode) over(partition by id) number_of_episodes_per_id, avg(sysbp) avg_sysbp, datediff(minute, min(timestamp), max(timestamp))+1 episode_duration
from z
group by id, episode
This answer relies on LEAD() and LAG() functions so only works on 2012 or later:
Setup:
CREATE TABLE #bloodpressure
(
Patient_id int,
[TimeStamp] SmallDateTime,
SystolicBloodPressure INT
)
INSERT INTO #bloodpressure
VALUES
(1, '2017-01-01 09:01', 60),
(1, '2017-01-01 09:02', 55),
(1, '2017-01-01 09:03', 60),
(1, '2017-01-01 09:04', 70),
(1, '2017-01-01 09:05', 72),
(1, '2017-01-01 09:06', 75),
(1, '2017-01-01 09:07', 60),
(1, '2017-01-01 09:08', 50),
(1, '2017-01-01 09:09', 52),
(1, '2017-01-01 09:10', 53),
(1, '2017-01-01 09:11', 65),
(1, '2017-01-01 09:12', 71),
(1, '2017-01-01 09:13', 73),
(1, '2017-01-01 09:14', 74),
(2, '2017-01-01 09:01', 70),
(2, '2017-01-01 09:02', 75),
(2, '2017-01-01 09:03', 80),
(2, '2017-01-01 09:04', 70),
(2, '2017-01-01 09:05', 72),
(2, '2017-01-01 09:06', 75),
(2, '2017-01-01 09:07', 60),
(2, '2017-01-01 09:08', 50),
(2, '2017-01-01 09:09', 52),
(2, '2017-01-01 09:10', 53),
(2, '2017-01-01 09:11', 65),
(2, '2017-01-01 09:12', 71),
(2, '2017-01-01 09:13', 73),
(2, '2017-01-01 09:14', 74),
(3, '2017-01-01 09:12', 71),
(3, '2017-01-01 09:13', 60),
(3, '2017-01-01 09:14', 74)
Now using Lead And Lag to find the previous rows values, to find whether this is the beginning or end of a sequence of low blood pressures, in combination with a common table expression. Using a UNION of start and end events ensures that an event which covers just one minute is recorded as both a start and an end event.
;WITH CTE
AS
(
SELECT *,
LAG(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As PrevValue,
Lead(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As NextValue
FROM #bloodpressure
),
CTE2
AS
(
-- Get Start Events (EventType 1)
SELECT 1 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (PrevValue IS NULL AND SystolicBloodPressure < 70) OR
(PrevValue >= 70 AND SystolicBloodPressure < 70)
UNION
-- Get End Events (EventType 2)
SELECT 2 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (NextValue IS NULL AND SystolicBloodPressure < 70 ) OR
(NextValue >= 70 AND SystolicBloodPressure < 70)
)
SELECT C1.Patient_id, C1.TimeStamp As EventStart, C2.TimeStamp As EventEnd
FROM CTE2 C1
INNER JOIN CTE2 C2
ON C1.Patient_id = C2.Patient_id AND C1.RN = C2.RN
WHERE C1.EventType = 1 AND C2.EventType = 2
ORDER BY C1.Patient_id, C1.TimeStamp
I'm trying to combine multiple date ranges from two same tables with same or diferrent data. (PostgreSql 9.*)
Tables structure:
CREATE TABLE "first_activities" (
"id" int4 NOT NULL DEFAULT nextval('first_activities_id_seq'::regclass),
"start_time" timestamptz,
"end_time" timestamptz,
"activity_type" int2,
"user_id" int4
)
WITH (OIDS=FALSE);
ALTER TABLE "first_activities" ADD PRIMARY KEY ("id") NOT DEFERRABLE INITIALLY IMMEDIATE;
CREATE TABLE "second_activities" (
"id" int4 NOT NULL DEFAULT nextval('second_activities_id_seq'::regclass),
"start_time" timestamptz,
"end_time" timestamptz,
"activity_type" int2,
"user_id" int4
)
WITH (OIDS=FALSE);
ALTER TABLE "second_activities" ADD PRIMARY KEY ("id") NOT DEFERRABLE INITIALLY IMMEDIATE;
Data in First table:
INSERT INTO "first_activities" VALUES
(NULL, '2014-10-31 01:00:00', '2014-10-31 02:00:00', '3', '1'),
(NULL, '2014-10-31 02:00:00', '2014-10-31 03:00:00', '4', '1'),
(NULL, '2014-10-31 03:00:00', '2014-10-31 04:00:00', '2', '1'),
(NULL, '2014-10-31 04:30:00', '2014-10-31 05:00:00', '3', '1'),
(NULL, '2014-10-31 05:30:00', '2014-11-01 06:00:00', '4', '1'),
(NULL, '2014-11-01 06:30:00', '2014-11-01 07:00:00', '2', '1'),
(NULL, '2014-11-01 07:30:00', '2014-11-01 08:00:00', '1', '1'),
(NULL, '2014-11-01 08:00:00', '2014-11-01 09:00:00', '3', '1'),
(NULL, '2014-11-01 09:00:00', '2014-11-02 10:00:00', '4', '1'),
(NULL, '2014-08-27 10:00:00', '2014-08-27 11:00:00', '2', '1'),
(NULL, '2014-08-27 11:00:00', '2014-08-27 12:00:00', '1', '1'),
Data in Second table:
INSERT INTO "second_activities" VALUES
(NULL, '2014-10-31 01:00:00', '2014-10-31 02:00:00', '3', '1'),
(NULL, '2014-10-31 02:00:00', '2014-10-31 03:00:00', '4', '1'),
-- Differece from first table
(NULL, '2014-10-31 03:30:00', '2014-10-31 04:00:00', '1', '1'),
(NULL, '2014-10-31 04:25:00', '2014-10-31 04:35:00', '3', '1'),
(NULL, '2014-10-31 04:45:00', '2014-10-31 05:35:00', '3', '1'),
-- End of Difference from first table
(NULL, '2014-08-27 10:00:00', '2014-08-27 11:00:00', '2', '1'),
(NULL, '2014-08-27 11:00:00', '2014-08-27 12:00:00', '1', '1');
How can I filter result set that starting from query:
SELECT * FROM first_activities UNION ALL SELECT * from second_activities
ORDER BY start_time ASC;
to get final result set.
Final Result:
-- merge same data by user_id and activity_type and combine with
-- and split data with range intersection but not same user_id and acitvity_type
-- start_time end_time type user_id
'2014-10-31 01:00:00', '2014-10-31 02:00:00', '3', '1');
'2014-10-31 02:00:00', '2014-10-31 03:00:00', '4', '1');
--data dont merge. Splitting with range intersection
'2014-10-31 03:00:00', '2014-10-31 03:30:00', '2', '1'); -- from first table
'2014-10-31 03:30:00', '2014-10-31 04:00:00', '1', '1'); -- from second table
-- data merged by same user_id and activity_type
'2014-10-31 04:25:00', '2014-10-31 05:35:00', '3', '1');
'2014-10-31 05:30:00', '2014-11-01 06:00:00', '4', '1');
'2014-11-01 06:30:00', '2014-11-01 07:00:00', '2', '1');
'2014-11-01 07:30:00', '2014-11-01 08:00:00', '1', '1');
'2014-11-01 08:00:00', '2014-11-01 09:00:00', '3', '1');
'2014-11-01 09:00:00', '2014-11-02 10:00:00', '4', '1');
'2014-08-27 10:00:00', '2014-08-27 11:00:00', '2', '1');
'2014-08-27 11:00:00', '2014-08-27 12:00:00', '1', '1');
The issue can be reduced to the question of how to combine (compact) a group of adjacent (overlapping) ranges into one. I had to deal with this some time ago and found it a bit complicated in plain SQL. There is a simple solution using loop in a plpgsql code, but I found also a general solution with the use of custom aggregate.
The function compact_ranges(anyrange, anyrange) returns the sum of ranges if they are adjacent (overlapping) or the second range otherwise:
create or replace function compact_ranges(anyrange, anyrange)
returns anyrange language sql as $$
select case
when $1 && $2 or $1 -|- $2 then $1+ $2
else $2
end
$$;
create aggregate compact_ranges_agg (anyrange) (
sfunc = compact_ranges,
stype = anyrange
);
The aggregate has a narrow scope of usage, it should be called as a progressive window function like in the example:
with test(rng) as (
values
('[ 1, 2)'::int4range),
('[ 3, 7)'), -- group 1
('[ 5, 10)'), -- group 1
('[ 6, 8)'), -- group 1
('[11, 17)'), -- group 2
('[12, 16)'), -- group 2
('[15, 16)'), -- group 2
('[18, 19)')
)
select distinct on (lower(new_rng)) new_rng
from (
select *, compact_ranges_agg(rng) over (order by rng) new_rng
from test
) s
order by lower(new_rng), new_rng desc;
new_rng
---------
[1,2)
[3,10)
[11,17)
[18,19)
(4 rows)
In the same way you can use it for your tables:
with merged as (
select tstzrange(start_time, end_time) rng, activity_type, user_id
from first_activities
union
select tstzrange(start_time, end_time) rng, activity_type, user_id
from second_activities
),
compacted as (
select distinct on (user_id, activity_type, lower(new_rng))
lower(new_rng) start_time,
upper(new_rng) end_time,
activity_type,
user_id
from (
select
user_id, activity_type,
compact_ranges_agg(rng) over (partition by user_id, activity_type order by rng) new_rng
from merged
) s
order by user_id, activity_type, lower(new_rng), new_rng desc
)
select
start_time,
case when end_time > lead(start_time) over w then lead(start_time) over w else end_time end,
activity_type,
user_id
from compacted
window w as (order by start_time)
order by start_time;
The result:
start_time | end_time | activity_type | user_id
------------------------+------------------------+---------------+---------
2014-08-27 10:00:00+02 | 2014-08-27 11:00:00+02 | 2 | 1
2014-08-27 11:00:00+02 | 2014-08-27 12:00:00+02 | 1 | 1
2014-10-31 01:00:00+01 | 2014-10-31 02:00:00+01 | 3 | 1
2014-10-31 02:00:00+01 | 2014-10-31 03:00:00+01 | 4 | 1
2014-10-31 03:00:00+01 | 2014-10-31 03:30:00+01 | 2 | 1
2014-10-31 03:30:00+01 | 2014-10-31 04:00:00+01 | 1 | 1
2014-10-31 04:25:00+01 | 2014-10-31 05:30:00+01 | 3 | 1
2014-10-31 05:30:00+01 | 2014-11-01 06:00:00+01 | 4 | 1
2014-11-01 06:30:00+01 | 2014-11-01 07:00:00+01 | 2 | 1
2014-11-01 07:30:00+01 | 2014-11-01 08:00:00+01 | 1 | 1
2014-11-01 08:00:00+01 | 2014-11-01 09:00:00+01 | 3 | 1
2014-11-01 09:00:00+01 | 2014-11-02 10:00:00+01 | 4 | 1
(12 rows)