SQL intersect two timestamp pairs and group up by hours - sql

I have A little problem.
I have A table lets call it "events" with columns like: type, (1 or 0) , timestamp start , timestamp end.
I want to group them by hours (60 minutes periods)
Into 4 columns each calculating
How many minutes per hour there was no either type 1 or type 0 event.
How many minutes per hour there was an event type 1 and in the same time there was not event of type 2.
How many minutes per hour there was an event type 2 and in the same time there was no event of type 1
How many minutes per hour there was an event 2 and event 1 at the same time.
Result should look like this:
hour 00 10 01 11
12 10 20 20 10
13 5 15 25 15
Each row should always sum to 60 minutes.
Is it possible to do it in SQL? I need it in vertica so I can use verticas functions too.

Interesting Question! Here is a query which gets you what you need. I mocked up the following table and some dummy data, and then showed the results from the query at the end. As you required - the totals always add up to 60 minutes within each hour.
SETUP:
create table public.time_event_test(event_timestamp timestamptz, event_type int);
insert into public.time_event_test(event_timestamp,event_type) select getutcdate() as event_timestamp, 1 as event_type;
insert into public.time_event_test(event_timestamp,event_type) select TIMESTAMPADD('minute',5,getutcdate()) as event_timestamp, 1 as event_type;
insert into public.time_event_test(event_timestamp,event_type) select TIMESTAMPADD('minute',1,getutcdate()) as event_timestamp, 1 as event_type;
insert into public.time_event_test(event_timestamp,event_type) select TIMESTAMPADD('minute',1,getutcdate()) as event_timestamp, 2 as event_type;
insert into public.time_event_test(event_timestamp,event_type) select TIMESTAMPADD('minute',3,getutcdate()) as event_timestamp, 2 as event_type;
insert into public.time_event_test(event_timestamp,event_type) select TIMESTAMPADD('minute',6,getutcdate()) as event_timestamp, 2 as event_type;
insert into public.time_event_test(event_timestamp,event_type) select TIMESTAMPADD('minute',90,getutcdate()) as event_timestamp, 2 as event_type;
QUERY:
select date_trunc('hour',dat) as hr
, 60 - sum(case when event_type1 = 1 or event_type2 = 1 then 1 else 0 end) as type_00
, sum(case when event_type1 = 0 and event_type2 = 1 then 1 else 0 end) as type_01
, sum(case when event_type1 = 1 and event_type2 = 0 then 1 else 0 end) as type_10
, sum(case when event_type1 = 1 and event_type2 = 1 then 1 else 0 end) as type_11
from (
select date_trunc('minute',event_timestamp) as dat
, max(case when event_type = 1 then 1 else 0 end) as event_type1
, max(case when event_type = 2 then 1 else 0 end) as event_type2
from public.time_event_test
group by 1
) x
group by 1 order by 1;
RESULTS:
hr | type_00 | type_01 | type_10 | type_11
------------------------+---------+---------+---------+---------
2016-12-21 01:00:00+00 | 52 | 3 | 2 | 3
2016-12-21 02:00:00+00 | 59 | 1 | 0 | 0

Related

Count records by Grouping a period

I have a table that stores per day if a user worked or if he was on vacation based on a value.
Example table, Value = 1 -> WorkDay, Value = 2 -> Vacation:
User | Day | Value
--------|------------|-------
user-1 | 2021-01-01 | 1
user-1 | 2021-01-02 | 1
user-1 | 2021-01-03 | 1
user-1 | 2021-01-04 | 1
user-1 | 2021-01-05 | 2
user-1 | 2021-01-06 | 2
...
I'll like to convert this table to this (Using the simple example above):
User | Year | Month | WorkDay | Vacation
--------|------|-------|---------|---------
user-1 | 2021 | 01 | 4 | 2
...
I tried using group by, subqueries and case, but the whole thing become a mess.
SELECT
YEAR(Day),
MONTH(DAY),
User,
...
From Table1
Group By YEAR(Day), MONTH(DAY), User
Just use conditional aggregation:
select year(day), month(day),
sum(case when value = 1 then 1 else 0 end) as workday,
sum(case when value = 2 then 1 else 0 end) as vacation
from table1
group by year(day), month(day)
You can use conditional aggregation as below:
select User,
year(day),
month(day),
sum(case when value = 1 then 1 else 0 end) as WorkDay,
sum(case when value = 2 then 1 else 0 end) as Vacation
from table1
group by User, year(day), month(day)
DB-Fiddle:
Schema and insert statements:
create table table1([User] varchar(50), Day date, Value int);
insert into table1 values('user-1' , '2021-01-01' , 1);
insert into table1 values('user-1' , '2021-01-02' , 1);
insert into table1 values('user-1' , '2021-01-03' , 1);
insert into table1 values('user-1' , '2021-01-04' , 1);
insert into table1 values('user-1' , '2021-01-05' , 2);
insert into table1 values('user-1' , '2021-01-06' , 2);
Query:
select [User] as "user",
year(day) as "year",
month(day) as "month",
sum(case when value = 1 then 1 else 0 end) as WorkDay,
sum(case when value = 2 then 1 else 0 end) as Vacation
from table1
group by [User], year(day), month(day)
GO
Output:
user
year
month
WorkDay
Vacation
user-1
2021
1
4
2
db<fiddle here

SQL Conditional Counting

I am working with a dataset that contains information about train delays. The dataset contains an arrival delay column and departing delay column. Each delay column is measured in minutes. I need to calculate the number of total delays for each day of the week to determine which day has the most train delays. If the delay is equal to or more than 1 minute, it needs to be counted as a delay. How can I complete this in SQL? I have tried the following code.
select dayofweek
count(case when arrivaldelay>=1 then 1 end)+
count(case when departuredelay>=1 then 1 end)
group by dayofweek;
dayofweek arrivaldelay departuredelay
2 12 5
4 7 10
4 6 -3
6 5 4
dayofweek delays
2 1
4 1
6 1
Assuming dayofweek is a stored column and not a function, then you can use either count or sum
select
dayofweek
, count(case when arrivaldelay >= 1 then 1 end)
+ count(case when departuredelay >= 1 then 1 end)
as delays
from mytable as t
group by dayofweek;
select
dayofweek
, sum(case when arrivaldelay >= 1 then 1 else 0 end)
+ sum(case when departuredelay >= 1 then 1 else 0 end)
as delays
from mytable as t
group by dayofweek;
both give the following result from the sample data in the question
+-----------+--------+
| dayofweek | delays |
+-----------+--------+
| 2 | 2 |
| 4 | 3 |
| 6 | 2 |
+-----------+--------+
IF dayofweek is NOT a stored column then you can extract the day of week from a date or timestamp, BUT there are differences in how this is achieved in different databases
demonstrated #db<>fiddle here
You can use sum() like this:
select dayofweek
( sum(case when arrivaldelay >= 1 then 1 else 0 end)+
sum(case when departuredelay >= 1 then 1 else 0 end)
)
from t
group by dayofweek;

Time series group by day and kind

I create a table using the command below:
CREATE TABLE IF NOT EXISTS stats (
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
session_kind INTEGER NOT NULL,
ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
I insert some time series data using the command below:
INSERT INTO stats (session_kind) values (?1)
Some time after having executed several times the insert command, I have some time series data as below:
id session_kind ts
-----------------------------------------
1 0 2020-04-18 12:59:51 // day 1
2 1 2020-04-19 12:59:52 // day 2
3 0 2020-04-19 12:59:53
4 1 2020-04-19 12:59:54
5 0 2020-04-19 12:59:55
6 2 2020-04-19 12:59:56
7 2 2020-04-19 12:59:57
8 2 2020-04-19 12:59:58
9 2 2020-04-19 12:59:59
10 0 2020-04-20 12:59:51 // day 3
11 1 2020-04-20 12:59:52
12 0 2020-04-20 12:59:53
13 1 2020-04-20 12:59:54
14 0 2020-04-20 12:59:55
15 2 2020-04-20 12:59:56
16 2 2020-04-20 12:59:57
17 2 2020-04-20 12:59:58
18 2 2020-04-21 12:59:59 // day 4
What I would like to have a command that groups my data by date from the most recent day to the least and the number of each session_kind like below (I don't want to give any parameter to this command):
0 1 2 ts
-------------------------
0 0 1 2020-04-21 // day 4
3 2 3 2020-04-20 // day 3
2 2 4 2020-04-19 // day 2
1 0 0 2020-04-18 // day 1
How can I group my data as above?
You can do conditional aggregation:
select
sum(session_kind= 0) session_kind_0,
sum(session_kind= 1) session_kind_1,
sum(session_kind= 2) session_kind_2,
date(ts) ts_day
from mytable
group by date(ts)
order by ts_day desc
If you want something dynamic, then it might be simpler to put the results in rows rather than columns:
select date(ts) ts_day, session_kind, count(*) cnt
from mytable
group by date(ts), session_kind
order by ts_day desc, session_kind
If I understand correctly, you just want to sum the values:
select date(timestamp),
sum(case when session_kind = 1 then 1 else 0 end) as cnt_1,
sum(case when session_kind = 2 then 1 else 0 end) as cnt_2,
sum(case when session_kind = 3 then 1 else 0 end) as cnt_3
from t
group by date(timestamp);
You can also simplify this:
select date(timestamp),
sum( session_kind = 1 ) as cnt_1,
sum( session_kind = 2 ) as cnt_2,
sum( session_kind = 3 ) as cnt_3
from t
group by date(timestamp);

How to write a query to attach rownumber(1 to n) to each records for each group

I have a dataset something like below
|date|flag|
|20190503|0|
|20190504|1|
|20190505|1|
|20190506|1|
|20190507|1|
|20190508|0|
|20190509|0|
|20190510|0|
|20190511|1|
|20190512|1|
|20190513|0|
|20190514|0|
|20190515|1|
What I want to achieve is to group the consecutive dates by flag=1, and add one column counter to mark 1 for the first day of the consecutive days where flag=1, and 2 for the 2nd day and etc, assign 0 for flag=0
|date|flag|counter|
|20190503|0|0|
|20190504|1|1|
|20190505|1|2|
|20190506|1|3|
|20190507|1|4|
|20190508|0|0|
|20190509|0|0|
|20190510|0|0|
|20190511|1|1|
|20190512|1|2|
|20190513|0|0|
|20190514|0|0|
|20190515|1|1|
I tried analytical function and hierarchy query, but still haven't found a solution, seeking help, any hint is appreciated!
Thanks,
Hong
You can define the groups using a cumulative sum of the zeros. Then use row_number():
select t.*,
(case when flag = 0 then 0
else row_number() over (partition by grp order by date)
end) as counter
from (select t.*,
sum(case when flag = 0 then 1 else 0 end) over (order by date) as grp
from t
) t;
A very different approach is to take the difference between the current date and a cumulative max of the flag = 0 date:
select t.*,
datediff(day,
max(case when flag = 0 then date end) over (order by date),
date
) as counter
from t;
Note that the logic of these two approaches is different -- although they should produce the same results for the data you have provided. For missing dates, the first just ignores missing dates. The second will increment the counter for missing dates.
Well - Vertica has a very nice CONDITIONAL_CHANGE_EVENT() function that could help you there ...
Everytime the expression between the brackets changes, an integer is incremented by 1. This gives you a new group identifier, or a criterion to PARTITION BY, every time the flag changes. So one SELECT to get the grouping info, and then partition by the obtained grouping info. Here goes:
WITH
input(dt,flag) AS (
SELECT '2019-05-03'::DATE,0
UNION ALL SELECT '2019-05-04'::DATE,1
UNION ALL SELECT '2019-05-05'::DATE,1
UNION ALL SELECT '2019-05-06'::DATE,1
UNION ALL SELECT '2019-05-07'::DATE,1
UNION ALL SELECT '2019-05-08'::DATE,0
UNION ALL SELECT '2019-05-09'::DATE,0
UNION ALL SELECT '2019-05-10'::DATE,0
UNION ALL SELECT '2019-05-11'::DATE,1
UNION ALL SELECT '2019-05-12'::DATE,1
UNION ALL SELECT '2019-05-13'::DATE,0
UNION ALL SELECT '2019-05-14'::DATE,0
UNION ALL SELECT '2019-05-15'::DATE,1
)
,
grp_input AS (
SELECT
*
, CONDITIONAL_CHANGE_EVENT(flag) OVER(ORDER BY dt) AS grp
FROM input
)
SELECT
dt
, flag
, CASE FLAG
WHEN 0 THEN 0
ELSE ROW_NUMBER() OVER(PARTITION BY grp ORDER BY dt)
END AS counter
FROM grp_input;
-- out dt | flag | counter
-- out ------------+------+---------
-- out 2019-05-03 | 0 | 0
-- out 2019-05-04 | 1 | 1
-- out 2019-05-05 | 1 | 2
-- out 2019-05-06 | 1 | 3
-- out 2019-05-07 | 1 | 4
-- out 2019-05-08 | 0 | 0
-- out 2019-05-09 | 0 | 0
-- out 2019-05-10 | 0 | 0
-- out 2019-05-11 | 1 | 1
-- out 2019-05-12 | 1 | 2
-- out 2019-05-13 | 0 | 0
-- out 2019-05-14 | 0 | 0
-- out 2019-05-15 | 1 | 1
-- out (13 rows)
-- out

Count parts of total value as columns per row (pivot table)

I'm stuck with a seemingly easy query, but couldn't manage to get it working the last hours.
I have a table files that holds file names and some values like records in this file, DATE of creation (create_date), DATE of processing (processing_date) and so on. There can be multiple files for a create date in different hours and it is likely that they will not get processed in the same day of creaton, in fact it can even take up to three days or longer for them to get processed.
So let's assume I have these rows, as an example:
create_date | processing_date
------------------------------
2012-09-10 11:10:55.0 | 2012-09-11 18:00:18.0
2012-09-10 15:20:18.0 | 2012-09-11 13:38:19.0
2012-09-10 19:30:48.0 | 2012-09-12 10:59:00.0
2012-09-11 08:19:11.0 | 2012-09-11 18:14:44.0
2012-09-11 22:31:42.0 | 2012-09-21 03:51:09.0
What I want in a single query is to get a grouped column truncated to the day create_date with 11 additional columns for the differences between the processing_date and the create_date, so that the result should roughly look like this:
create_date | diff0days | diff1days | diff2days | ... | diff10days
------------------------------------------------------------------------
2012-09-10 | 0 2 1 ... 0
2012-09-11 | 1 0 0 ... 1
and so on, I hope you get the point :)
I have tried this and so far it works getting a single aggregated column for a create_date with a difference of - for example - 3:
SELECT TRUNC(f.create_date, 'DD') as created, count(1) FROM files f WHERE TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD') = 3 GROUP BY TRUNC(f.create_date, 'DD')
I tried combining the single queries and I tried sub-queries, but that didn't help or at least my knowledge about SQL is not sufficient.
What I need is a hint so that I can include the various differences as columns, like shown above. How could I possibly achieve this?
That's basically the pivoting problem:
SELECT TRUNC(f.create_date, 'DD') as created
, sum(case TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD')
when 0 then 1 end) as diff0days
, sum(case TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD')
when 1 then 1 end) as diff1days
, sum(case TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD')
when 2 then 1 end) as diff2days
, ...
FROM files f
GROUP BY
TRUNC(f.create_date, 'DD')
SELECT CreateDate,
sum(CASE WHEN DateDiff(day, CreateDate, ProcessDate) = 1 THEN 1 ELSE 0 END) AS Diff1,
sum(CASE WHEN DateDiff(day, CreateDate, ProcessDate) = 2 THEN 1 ELSE 0 END) AS Diff2,
...
FROM table
GROUP BY CreateDate
ORDER BY CreateDate
As you are using Oracle 11g you can also get desired result by using pivot query.
Here is an example:
-- sample of data from your question
SQL> create table Your_table(create_date, processing_date) as
2 (
3 select '2012-09-10', '2012-09-11' from dual union all
4 select '2012-09-10', '2012-09-11' from dual union all
5 select '2012-09-10', '2012-09-12' from dual union all
6 select '2012-09-11', '2012-09-11' from dual union all
7 select '2012-09-11', '2012-09-21' from dual
8 )
9 ;
Table created
SQL> with t2 as(
2 select create_date
3 , processing_date
4 , to_date(processing_date, 'YYYY-MM-DD')
- To_Date(create_date, 'YYYY-MM-DD') dif
5 from your_table
6 )
7 select create_date
8 , max(diff0) diff0
9 , max(diff1) diff1
10 , max(diff2) diff2
11 , max(diff3) diff3
12 , max(diff4) diff4
13 , max(diff5) diff5
14 , max(diff6) diff6
15 , max(diff7) diff7
16 , max(diff8) diff8
17 , max(diff9) diff9
18 , max(diff10) diff10
19 from (select *
20 from t2
21 pivot(
22 count(dif)
23 for dif in ( 0 diff0
24 , 1 diff1
25 , 2 diff2
26 , 3 diff3
27 , 4 diff4
28 , 5 diff5
29 , 6 diff6
30 , 7 diff7
31 , 8 diff8
32 , 9 diff9
33 , 10 diff10
34 )
35 ) pd
36 ) res
37 group by create_date
38 ;
Result:
Create_Date Diff0 Diff1 Diff2 Diff3 Diff4 Diff5 Diff6 Diff7 Diff8 Diff9 Diff10
--------------------------------------------------------------------------------
2012-09-10 0 2 1 0 0 0 0 0 0 0 0
2012-09-11 1 0 0 0 0 0 0 0 0 0 1