Generating multiple rows from a single row based on dates - sql

I have a database table with a start date and a number of months. How can I transform that into multiple rows based on the number of months?
I want to transform this
Into this:

We can try using a calendar table here, which includes all possible start of month dates which might appear in the expected output:
with calendar as (
select '2017-09-01'::date as dt union all
select '2017-10-01'::date union all
select '2017-11-01'::date union all
select '2017-12-01'::date union all
select '2018-01-01'::date union all
select '2018-02-01'::date union all
select '2018-03-01'::date union all
select '2018-04-01'::date union all
select '2018-05-01'::date union all
select '2018-06-01'::date union all
select '2018-07-01'::date union all
select '2018-08-01'::date
)
select
t.id as subscription_id,
c.dt,
t.amount_monthly
from calendar c
inner join your_table t
on c.dt >= t.start_date and
c.dt < t.start_date + (t.month_count::text || ' month')::interval
order by
t.id,
c.dt;
Demo

This can easily be done using generate_series() in Postgres
select t.id,
g.dt::date,
t.amount_monthly
from the_table t
cross join generate_series(t.start_date,
t.start_date + interval '1' month * (t.month_count - 1),
interval '1' month) as g(dt);

OK, it's very easy to implement this in PostgreSQL, just use generate_series, as below:
select * from month_table ;
id | start_date | month_count | amount | amount_monthly
------+------------+-------------+--------+----------------
1382 | 2017-09-01 | 3 | 38 | 1267
1383 | 2018-02-01 | 6 | 50 | 833
(2 rows)
select
id,
generate_series(start_date,start_date + (month_count || ' month') :: interval - '1 month'::interval, '1 month'::interval)::date as date,
amount_monthly
from
month_table ;
id | date | amount_monthly
------+------------+----------------
1382 | 2017-09-01 | 1267
1382 | 2017-10-01 | 1267
1382 | 2017-11-01 | 1267
1383 | 2018-02-01 | 833
1383 | 2018-03-01 | 833
1383 | 2018-04-01 | 833
1383 | 2018-05-01 | 833
1383 | 2018-06-01 | 833
1383 | 2018-07-01 | 833
(9 rows)

You may not need so many subqueries but this should help you understand how it can be broken down
WITH date_minmax AS(
SELECT
min(start_date) as date_first,
(max(start_date) + (month_count::text || ' months')::interval)::date AS date_last
FROM "your_table"
GROUP BY month_count
), series AS (
SELECT generate_series(
date_first,
date_last,
'1 month'::interval
)::date as list_date
FROM date_minmax
)
SELECT
id as subscription_id,
list_date as date,
amount_monthly as amount
FROM series
JOIN "your_table"
ON list_date <# daterange(
start_date,
(start_date + (month_count::text || ' months')::interval)::date
)
ORDER BY list_date
This should achieve the desired result http://www.sqlfiddle.com/#!17/7d943/1

Related

Pivot two columns and keep the values same in sql

I have created a query to get different time types and hours
SELECT calc_time.hours measure,
calc_time.payroll_time_type elements,
calc_time.person_id,
calc_time.start_time
FROM hwm_tm_rep_work_hours_sum_v calc_time,
per_all_people_f papf
WHERE grp_type_id = 200
AND payroll_time_type IN ( 'Afternoon shift',
'TL',
'Evening shift',
'Regular Pay ',
'OT' )
AND (To_date(To_char(calc_time.start_time, 'YYYY-MM-DD') , 'YYYY-MM-DD') BETWEEN To_date(To_char(:From_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD')
AND To_date( To_char(:To_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD' ))
AND papf.person_id = calc_time.person_id
I get the output like -
Start_time person_id elements measure
01-Jan-2021 198 Regular Pay 10
01-Jan-2021 198 OT 2
01-jAN-2021 198 Afternoon shift 2
16-JAN-2021 198 Regular Pay 10
17-JAN-2021 198 OT 3
20-JAN-2021 198 EVENING SHIFT 8
08-JAN-2021 11 Regular Pay 8
09-JAN-2021 11 OT 1
08-JAN-2021 11 tl 2
10-JAN-2021 12 Evening shift 9
11-JAN-2021 12 Evening shift 9
I want this output to be dispplayed as follows WITHIN TWO DATES THAT I PASS AS PARAMETER - LIKE PARAMETER TO AND FROM DATE 01-JAN-2021 AND 31-JAN-2021
person_id Regular_pay OT OTHER_MEASURE OTHER_CODE
198 20 5 2 Afternoon shift
198 20 5 8 EVENING SHIFT
11 8 1 2 TL
12 18 Evening shift
So sum of Regular pay and OT IN separate columns and all others in other_measure and other_code
How can I tweak the main query to achieve this?
You can use:
SELECT *
FROM (
SELECT c.person_id,
SUM(CASE c.payroll_time_type WHEN 'Regular Pay' THEN SUM(c.hours) END)
OVER (PARTITION BY c.person_id) AS regular_pay,
SUM(CASE c.payroll_time_type WHEN 'OT' THEN SUM(c.hours) END)
OVER (PARTITION BY c.person_id) AS OT,
SUM(c.hours) AS other_measure,
c.payroll_time_type AS Other_code
FROM hwm_tm_rep_work_hours_sum_v c
INNER JOIN per_all_people_f p
ON (p.person_id = c.person_id)
WHERE grp_type_id = 200
AND payroll_time_type IN (
'Afternoon shift',
'TL',
'Evening shift',
'Regular Pay',
'OT'
)
AND c.start_time >= TRUNC(:from_date)
AND c.start_time < TRUNC(:to_date) + INTERVAL '1' DAY
GROUP BY
c.person_id,
c.payroll_time_type
)
WHERE other_code NOT IN ('Regular Pay', 'OT');
Which, for the sample data:
CREATE TABLE hwm_tm_rep_work_hours_sum_v (start_time, person_id, payroll_time_type, hours) AS
SELECT DATE '2021-01-01', 198, 'Regular Pay', 10 FROM DUAL UNION ALL
SELECT DATE '2021-01-01', 198, 'OT', 2 FROM DUAL UNION ALL
SELECT DATE '2021-01-01', 198, 'Afternoon shift', 2 FROM DUAL UNION ALL
SELECT DATE '2021-01-16', 198, 'Regular Pay', 10 FROM DUAL UNION ALL
SELECT DATE '2021-01-17', 198, 'OT', 3 FROM DUAL UNION ALL
SELECT DATE '2021-01-20', 198, 'Evening shift', 8 FROM DUAL UNION ALL
SELECT DATE '2021-01-08', 11, 'Regular Pay', 8 FROM DUAL UNION ALL
SELECT DATE '2021-01-09', 11, 'OT', 1 FROM DUAL UNION ALL
SELECT DATE '2021-01-08', 11, 'TL', 2 FROM DUAL UNION ALL
SELECT DATE '2021-01-10', 12, 'Evening shift', 9 FROM DUAL UNION ALL
SELECT DATE '2021-01-11', 12, 'Evening shift', 9 FROM DUAL;
CREATE TABLE per_all_people_f (person_id, grp_type_id) AS
SELECT 198, 200 FROM DUAL UNION ALL
SELECT 11, 200 FROM DUAL UNION ALL
SELECT 12, 200 FROM DUAL;
Outputs:
PERSON_ID
REGULAR_PAY
OT
OTHER_MEASURE
OTHER_CODE
11
8
1
2
TL
12
18
Evening shift
198
20
5
2
Afternoon shift
198
20
5
8
Evening shift
db<>fiddle here
You could try something like this - In your question, unfortunately, it is not clear in which table which columns/values ​​are available.
SELECT
calc_time.person_id,
(select sum(calc_time.start_time) FROM hwm_tm_rep_work_hours_sum_v calc_time where papf.person_id = calc_time.person_id and calc_time.payroll_time_type = 'Regular Pay') as Regular_Pay,
...
FROM hwm_tm_rep_work_hours_sum_v calc_time,
per_all_people_f papf
WHERE grp_type_id = 200
AND payroll_time_type IN ( 'Afternoon shift',
'TL',
'Evening shift',
'Regular Pay ',
'OT' )
AND (
To_date(To_char(calc_time.start_time, 'YYYY-MM-DD') , 'YYYY-MM-DD') BETWEEN To_date(To_char(:From_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD')
AND To_date( To_char(:To_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD' ) )
and papf.person_id = calc_time.person_id
-- use a group by
GROUP BY
calc_time.person_id
You may use aggregation and then apply model clause to calculate the required columns. Below is the code with comments, assuming you can manage filter by dates.
select *
from t
PERSON_ID | ELEMENTS | MEASURE
--------: | :-------------- | ------:
198 | Regular Pay | 1
198 | Regular Pay | 2
198 | Afternoon shift | 3
198 | Afternoon shift | 4
198 | OT | 5
198 | OT | 6
198 | EVENING SHIFT | 7
198 | EVENING SHIFT | 8
11 | Regular Pay | 11
11 | Regular Pay | 12
11 | TL | 13
11 | TL | 14
11 | EVENING SHIFT | 15
11 | EVENING SHIFT | 16
12 | TL | 21
12 | TL | 22
12 | EVENING SHIFT | 23
12 | EVENING SHIFT | 24
select
person_id,
ot,
regular_pay,
elements as other_code,
mes as other_measure
from (
/*First you need to aggregate all the measures by person_id and code*/
select
person_id,
elements,
sum(measure) as mes
from t
/*Date filter goes here*/
group by
person_id,
elements
)
model
/*RETURN UPDATED ROWS
will do the trick,
because we'll update only "other"
measures, so OT and Regular pay will no go
to the output*/
return updated rows
/*Where to break the calculation*/
partition by (person_id)
/*To be able to reference by code*/
dimension by (elements)
measures (
mes,
0 as ot,
0 as regular_pay
)
rules upsert (
ot[
elements not in ('OT', 'Regular Pay')
] = sum(mes)['OT'],
regular_pay[
elements not in ('OT', 'Regular Pay')
] = sum(mes)['Regular Pay']
)
PERSON_ID | OT | REGULAR_PAY | OTHER_CODE | OTHER_MEASURE
--------: | ---: | ----------: | :-------------- | ------------:
198 | 11 | 3 | EVENING SHIFT | 15
198 | 11 | 3 | Afternoon shift | 7
11 | null | 23 | TL | 27
11 | null | 23 | EVENING SHIFT | 31
12 | null | null | TL | 43
12 | null | null | EVENING SHIFT | 47
db<>fiddle here

SQL statement to return the Min and Max amount of stock per article for a given Month

I have a table from which I am trying to return the quantity per day that the article was in the system.
Example is in table Bestand the are multiple palletes of a different articles that each have a Booking In and Out date; I am try to find out the Min and Max amount of stock that was in the system per article and month.
My thinking is that if I can return the stock quantity for each day and then read out the Min and Max values.
The Timespan would be set at the time of running the SQL and the articles would be fixed.
To find out the quantity for each day I have used the following SQL:
SELECT DISTINCT
a.artbez1 AS Artikelbezeichnung,
b.artikelnr AS Artikelnummer,
SUM(CASE WHEN TO_DATE('2019-11-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') BETWEEN b.neu_datum AND b.aender_datum THEN 1 * b.menge_ist ELSE 0 END) AS "01 Nov 2019"
FROM
artikel a, bestand b
WHERE
b.artikelnr IN ('273632002', .... (huge long list of numbers) ....)
AND b.artikelnr = a.artikelnr
GROUP BY
a.artbez1, b.artikelnr;
This returns for example:
ARTIKELBEZEICHNUNG
ARTIKELNUMMER
01 Nov 2019
SC-4400.CW
220450002
39
S-320.FK120
220502004
0
H-595.FK120
220800004
35
AC-548.FK209
220948032
0
AS-6800.CW
221355002
20
I would like return this for each day of the Month and then from that return the Min and Max Value for each Article
I have the following SQL to return the days of a given Month and was wondering if anyone had any ideas on how they could be combined (If at all possible):
SELECT to_date('01.11.2019','dd.mm.yyyy')+LEVEL-1
FROM dual
CONNECT BY LEVEL <= TO_CHAR(LAST_DAY(to_date('01.11.2019','dd.mm.yyyy')),'DD')
DATES
2019-11-01 00:00:00
2019-11-02 00:00:00
2019-11-03 00:00:00
2019-11-04 00:00:00
2019-11-05 00:00:00
2019-11-06 00:00:00
2019-11-07 00:00:00
The result i am try to get would be something like:
ARTIKELBEZEICHNUNG
ARTIKELNUMMER
Nov 19 Min
Nov 19 Max
SC-4400.CW
220450002
5
39
S-320.FK120
220502004
0
15
H-595.FK120
220800004
2
35
AC-548.FK209
220948032
0
0
AS-6800.CW
221355002
10
20
Is this at all possible in SQL?
Thanks for taking the time to read my post.
JeRi
You can use a partitioned outer join:
WITH calendar ( day ) AS (
SELECT DATE '2019-11-01'
FROM DUAL
UNION ALL
SELECT day + INTERVAL '1' DAY
FROM calendar
WHERE day < LAST_DAY( DATE '2019-11-01' )
),
daily_totals ( artbez1, Artikelnr, Day, total_menge_ist ) AS (
SELECT MAX( ab.artbez1 ),
ab.artikelnr,
c.day,
COALESCE( SUM( ab.menge_ist ), 0 )
FROM calendar c
LEFT OUTER JOIN
( SELECT a.artikelnr,
a.artbez1,
b.neu_datum,
b.aender_datum,
b.menge_ist
FROM artikel a
LEFT JOIN bestand b
ON ( a.artikelnr = b.artikelnr )
-- WHERE b.artikelnr IN ('273632002', .... (huge long list of numbers) ....)
) ab
PARTITION BY ( ab.artikelnr, ab.artbez1 )
ON ( c.day BETWEEN ab.neu_datum AND ab.aender_datum )
GROUP BY ab.artikelnr, c.day
)
SELECT MAX( artbez1 ) AS Artikelbezeichnung,
artikelnr AS Artikelnummer,
TRUNC( day, 'MM' ) AS month,
MIN( total_menge_ist ) AS min_total_menge_ist,
MAX( total_menge_ist ) AS max_total_menge_ist
FROM daily_totals
GROUP BY artikelnr, TRUNC( day, 'MM' );
Which, for the sample data:
CREATE TABLE artikel ( artikelnr, artbez1 ) AS
SELECT 220450002, 'SC-4400.CW' FROM DUAL UNION ALL
SELECT 220502004, 'S-320.FK120' FROM DUAL UNION ALL
SELECT 220800004, 'H-595.FK120' FROM DUAL UNION ALL
SELECT 220948032, 'AC-548.FK209' FROM DUAL UNION ALL
SELECT 221355002, 'AS-6800.CW' FROM DUAL;
CREATE TABLE bestand ( artikelnr, neu_datum, aender_datum, menge_ist ) AS
SELECT 220450002, DATE '2019-10-30', DATE '2019-11-01', 20 FROM DUAL UNION ALL
SELECT 220450002, DATE '2019-11-01', DATE '2019-11-05', 19 FROM DUAL UNION ALL
SELECT 220502004, DATE '2019-11-05', DATE '2019-11-03', 5 FROM DUAL UNION ALL
SELECT 220800004, DATE '2019-11-01', DATE '2019-11-15', 35 FROM DUAL UNION ALL
SELECT 221355002, DATE '2019-10-20', DATE '2019-11-05', 5 FROM DUAL UNION ALL
SELECT 221355002, DATE '2019-10-25', DATE '2019-11-10', 5 FROM DUAL UNION ALL
SELECT 221355002, DATE '2019-10-28', DATE '2019-11-13', 5 FROM DUAL UNION ALL
SELECT 221355002, DATE '2019-10-30', DATE '2019-11-15', 5 FROM DUAL UNION ALL
SELECT 221355002, DATE '2019-11-05', DATE '2019-11-20', 5 FROM DUAL;
Outputs:
ARTIKELBEZEICHNUNG | ARTIKELNUMMER | MONTH | MIN_TOTAL_MENGE_IST | MAX_TOTAL_MENGE_IST
:----------------- | ------------: | :------------------ | ------------------: | ------------------:
SC-4400.CW | 220450002 | 2019-11-01 00:00:00 | 0 | 39
S-320.FK120 | 220502004 | 2019-11-01 00:00:00 | 0 | 0
AC-548.FK209 | 220948032 | 2019-11-01 00:00:00 | 0 | 0
H-595.FK120 | 220800004 | 2019-11-01 00:00:00 | 0 | 35
AS-6800.CW | 221355002 | 2019-11-01 00:00:00 | 0 | 25
db<>fiddle here

Get data from multiple tables by two timestamp

PostgreSQL 10.12
I have a table with calculated data grouped by date with hour, e.g.:
hourly_stats
clicks_count | visitors_count | product_id | promoter_id | bundle_id | date_time
------------------------------------------------------------------------------------------
15 | 6 | 123 | 456 | 789 | 2018-11-02 12:00:00
8 | 3 | 123 | 456 | 789 | 2018-11-02 16:00:00
2 | 1 | 123 | 456 | 789 | 2018-11-13 10:00:00
5 | 2 | 123 | 456 | 789 | 2018-11-13 21:00:00
Every new hour I collect statistics for the previous hour and insert it into the table.
In addition, to always display fresh data, I use a materialized view, which stores the calculated data from the beginning of the current hour to the current moment (refreshed every 5 minutes).
The core part of the query is always based on two timestamp values and looks like this:
SELECT *
FROM (
SELECT
clicks_count,
visitors_count,
product_id,
promoter_id,
bundle_id,
date_time
FROM hourly_stats
UNION ALL (
SELECT
clicks_count,
visitors_count,
product_id,
promoter_id,
bundle_id,
date_time
FROM materialized_stats
)
)
WHERE (date_time > start_date AND date_time <= end_date)
This core part is used in multiple really complex queries, which are too slow. For example, it takes more than a 1.5 minute to complete the query (if no row is filtered by start_date and end_date) if table has more than 20 million records in one of the cases.
I decided to add two more table with calculated data grouped by year-month-day:
daily_stats
clicks_count | visitors_count | product_id | promoter_id | bundle_id | date_time
------------------------------------------------------------------------------------------
23 | 9 | 123 | 456 | 789 | 2018-11-02
7 | 3 | 123 | 456 | 789 | 2018-11-13
and by year-month:
monthly_stats
clicks_count | visitors_count | product_id | promoter_id | bundle_id | date_time
------------------------------------------------------------------------------------------
30 | 12 | 123 | 456 | 789 | 2018-11
So, if I have start_date = '2019-01-01 00:00:00' and end_date = '2020-08-12 16:00:00' I will be able to collect data like this
(SELECT
clicks_count,
visitors_count,
product_id,
promoter_id,
bundle_id,
date_time
FROM monthly_stats
WHERE 'monthly_condition')
UNION ALL
(SELECT
clicks_count,
visitors_count,
product_id,
promoter_id,
bundle_id,
date_time
FROM daily_stats
WHERE 'daily_condition')
UNION ALL
(SELECT
clicks_count,
visitors_count,
product_id,
promoter_id,
bundle_id,
date_time
FROM hourly_stats
WHERE 'hourly_condition')
UNION ALL (
SELECT
clicks_count,
visitors_count,
product_id,
promoter_id,
bundle_id,
date_time
FROM materialized_stats
)
Each calculated row is added to the corresponding table only after the base time period (month, day, or hour) is over. So for specific set of product_id | promoter_id | bundle_id I should get:
19 rows from monthly_stats +
11 rows from daily_stats +
16 rows from hourly_stats +
1 row from materialized_stats
Already implemented restrictions (on a application layer):
max end_date value may be equal to the end of the current day
start_date is always less than end_date
start_date and end_date values ​​are specified with an hour precision
Question: how to implement these 'monthly_condition', 'daily_condition' and 'hourly_condition' above? They should be based on the start_date and end_date parts, but I quite don't understand how to do this.
Thanks for any help.
This is an interesting problem. I had to solve this once before for SQL Server. PostgreSQL makes it much easier. Everything down to the fullness cte has been tested. The allstats cte is a best guess since I do not have your tables or data.
with invars as (
select '2016-08-15 12:35:00'::timestamptz as start_date,
'2020-08-12 19:00:00'::timestamptz as end_date
), days as (
select c.dhour,
tstzrange(
date_trunc('hour', i.start_date),
date_trunc('hour', i.end_date), '[)') as qrange
from invars i
cross join lateral generate_series(
date_trunc('hour', i.start_date),
date_trunc('hour', i.end_date),
interval '1 hour'
) as c(dhour)
), calendar as (
select dhour,
date_trunc('day', dhour) as dday,
date_trunc('month', dhour) as dmonth,
qrange
from days
), fullness as (
select dhour, dday, dmonth, qrange,
qrange #> tstzrange(dday, dday + interval '1 day', '[)') as full_day,
qrange #> tstzrange(dmonth, dmonth + interval '1 month', '[)') as full_month
from calendar
), allstats as (
select clicks_count, visitors_count, product_id, promoter_id, bundle_id
from monthly_stats
where date_time in (select distinct to_char(dmonth, 'YYYY-MM')
from fullness where full_month)
union all
select clicks_count, visitors_count, product_id, promoter_id, bundle_id
from daily_stats
where date_time in (select distinct to_char(dday, 'YYYY-MM-DD')
from fullness where full_day and not full_month)
union all
select clicks_count, visitors_count, product_id, promoter_id, bundle_id
from hourly_stats
where date_time in (select dhour from fullness
where not full_day and not full_month
and dhour < date_trunc(hour, now()))
union all
select clicks_count, visitors_count, product_id, promoter_id, bundle_id
from materialized_stats
)
select * from allstats;
I think your problem description leaves off the fact that the start_date can begin in the middle of a month or even a day. This query covers that.

SQL: Getting all dates between a set of date pairs

I have a table with some data and a time period i.e. start date and end date
------------------------------
| id | start_date | end_date |
|------------------------------|
| 0 | 1-1-2019 | 3-1-2019 |
|------------------------------|
| 1 | 6-1-2019 | 8-1-2019 |
|------------------------------|
I want to run a query that will return the id and all the dates that are within those time periods. for instance, the result of the query for the above table will be:
------------------
| id | date |
|------------------|
| 0 | 1-1-2019 |
|------------------|
| 0 | 2-1-2019 |
|------------------|
| 0 | 3-1-2019 |
|------------------|
| 1 | 6-1-2019 |
|------------------|
| 1 | 7-1-2019 |
|------------------|
| 1 | 8-1-2019 |
------------------
I am using Redshift therefor I need it supported in Postgres and take this into consideration
Your help will be greatly appriciated
The common way this is done is to create a calendar table with a list of dates. In fact, a calendar table can be extended to include columns like:
Day number (in year)
Week number
First day of month
Last day of month
Weekday / Weekend
Public holiday
Simply create the table in Excel, save as CSV and then COPY it into Redshift.
You could then just JOIN to the table, like:
SELECT
table.id,
calendar.date
FROM table
JOIN calendar
WHERE
calendar.date BETWEEN table.start_date AND table.end_date
This question was originally tagged Postgres.
Use generate_series():
select t.id, gs.dte
from t cross join lateral
generate_series(t.start_date, t.end_date, interval '1 day') as gs(dte);
ok, It took me a while to get there but this is what I did (though not really proud of it):
I created a query that generates a calendar for the last 6 years, cross joined it with my table and then selected the relevant dates from my calendar table.
WITH
days AS (select 0 as num UNION select 1 as num UNION select 2 UNION select 3 UNION select 4 UNION select 5 UNION select 6 UNION select 7 UNION select 8 UNION select 9 UNION select 10 UNION select 11 UNION select 12 UNION select 13 UNION select 14 UNION select 15 UNION select 16 UNION select 17 UNION select 18 UNION select 19 UNION select 20 UNION select 21 UNION select 22 UNION select 23 UNION select 24 UNION select 25 UNION select 26 UNION select 27 UNION select 28 UNION select 29 UNION select 30 UNION select 31),
month AS (select num from days where num <= 12),
years AS (select num from days where num <= 6),
rightnow AS (select CAST( TO_CHAR(GETDATE(), 'yyyy-mm-dd hh24') || ':' || trim(TO_CHAR((ROUND((DATEPART (MINUTE, GETDATE()) / 5), 1) * 5 ),'09')) AS TIMESTAMP) as start),
calendar as
(
select
DATEADD(years, -y.num, DATEADD( month, -m.num, DATEADD( days, -d.num, n.start ) ) ) AS period_date
from days d, month m, years y, rightnow n
)
select u.id, calendar.period_date
from periods u
cross join calendar
where date_part(DAY, u.finishedat) >= date_part(DAY, u.startedat) + 1 and date_part(DAY, calendar.period_date) < date_part(DAY, u.finishedat) and date_part(DAY, calendar.period_date) > date_part(DAY, u.startedat) and calendar.period_date < u.finishedat and calendar.period_date > u.startedat
This was based on the answer here: Using sql function generate_series() in redshift

How to count ratio hourly?

I`m stuck a bit with understanding of my further actions while performing queries.
I have two tables "A"(date, response, b_id) and "B"(id, country). I need to count hourly ratio of a number of entries where response exists to the total number of entries on a specific date. The final selection should consist of columns "hour", "ratio".
SELECT COUNT(*) FROM A WHERE RESPONSE IS NOT NULL//counting entries with response
SELECT COUNT(*) FROM A//counting total number of entries
How to count the ratio? Should I create a separate variable for it?
How to count for each hour on a day? Should I make smth like a loop? + How can I get the "hour" part of a date?
What is the best way to select the hours and counted ratio? Should I make a separate table for it?
I`m rather new to make complex queries, so I woud be happy for every kind of help
You can do this as:
select to_char(datecol, 'HH24') as hour,
count(response) as has_response, count(*) as total,
count(response) / count(*) as ratio
from a
where datecol >= date '2018-09-18' and datecol < date '2018-09-19'
group by to_char(datecol, 'HH24');
You can also do this using avg() -- which is also fun:
select to_char(datecol, 'HH24'),
avg(case when response is not null then 1.0 else 0 end) as ratio
from a
where datecol >= date '2018-09-18' and datecol < date '2018-09-19'
group by to_char(datecol, 'HH24')
In this case, that requires more typing, though.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE A ( dt, response, b_id ) AS
SELECT DATE '2018-09-18' + INTERVAL '00:00' HOUR TO MINUTE, NULL, 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '00:10' HOUR TO MINUTE, 'A', 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '00:20' HOUR TO MINUTE, 'B', 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '01:00' HOUR TO MINUTE, 'C', 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '01:10' HOUR TO MINUTE, 'D', 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '02:00' HOUR TO MINUTE, NULL, 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '03:00' HOUR TO MINUTE, 'E', 1 FROM DUAL UNION ALL
SELECT DATE '2018-09-18' + INTERVAL '05:10' HOUR TO MINUTE, 'F', 1 FROM DUAL;
Query 1:
SELECT b_id,
TO_CHAR( TRUNC( dt, 'HH' ), 'YYYY-MM-DD HH24:MI:SS' ) AS hour,
COUNT(RESPONSE) AS total_response_per_hour,
COUNT(*) AS total_per_hour,
total_response_per_day,
total_per_day,
COUNT(response) / total_response_per_day AS ratio_for_responses,
COUNT(*) / total_per_day AS ratio
FROM (
SELECT A.*,
COUNT(RESPONSE) OVER ( PARTITION BY b_id, TRUNC( dt ) ) AS total_response_per_day,
COUNT(*) OVER ( PARTITION BY b_id, TRUNC( dt ) ) AS total_per_day
FROM A
)
GROUP BY
b_id,
total_per_day,
total_response_per_day,
TRUNC( dt, 'HH' )
ORDER BY
TRUNC( dt, 'HH' )
Results:
| B_ID | HOUR | TOTAL_RESPONSE_PER_HOUR | TOTAL_PER_HOUR | TOTAL_RESPONSE_PER_DAY | TOTAL_PER_DAY | RATIO_FOR_RESPONSES | RATIO |
|------|---------------------|-------------------------|----------------|------------------------|---------------|---------------------|-------|
| 1 | 2018-09-18 00:00:00 | 2 | 3 | 6 | 8 | 0.3333333333333333 | 0.375 |
| 1 | 2018-09-18 01:00:00 | 2 | 2 | 6 | 8 | 0.3333333333333333 | 0.25 |
| 1 | 2018-09-18 02:00:00 | 0 | 1 | 6 | 8 | 0 | 0.125 |
| 1 | 2018-09-18 03:00:00 | 1 | 1 | 6 | 8 | 0.16666666666666666 | 0.125 |
| 1 | 2018-09-18 05:00:00 | 1 | 1 | 6 | 8 | 0.16666666666666666 | 0.125 |
SELECT withResponses.hour,
withResponses.cnt AS withResponse,
alls.cnt AS AllEntries,
(withResponses.cnt / alls.cnt) AS ratio
FROM
( SELECT to_char(d, 'DD-MM-YY - HH24') || ':00 to :59 ' hour,
count(*) AS cnt
FROM A
WHERE RESPONSE IS NOT NULL
GROUP BY to_char(d, 'DD-MM-YY - HH24') || ':00 to :59 ' ) withResponses,
( SELECT to_char(d, 'DD-MM-YY - HH24') || ':00 to :59 ' hour,
count(*) AS cnt
FROM A
GROUP BY to_char(d, 'DD-MM-YY - HH24') || ':00 to :59 ' ) alls
WHERE alls.hour = withResponses.hour ;
SQLFiddle: http://sqlfiddle.com/#!4/c09b9/2