Calculate all days, group by week and include empty weeks? - sql

I need a query that sums all the values for each day in a given week and groups by week including empty weeks.
This query groups by week and includes empty weeks but it isn't summing all days in the week as expected:
Expected Output:
[
...
{"week"=>"2019-02-28", "amount_net"=>"0"},
{"week"=>"2019-03-07", "amount_net"=>"300"}
]
Actual Output:
[
...
{"week"=>"2019-02-28", "amount_net"=>"0"},
{"week"=>"2019-03-07", "amount_net"=>"0"}
]
Here is the query I came up with:
SELECT
week,
COALESCE (amount_net, 0) as amount_net
FROM
(
SELECT
to_char(
generate_series(
timestamp '2018-12-13 22:34:31 UTC',
timestamp '2019-03-14', interval '1 week'
):: date,
'YYYY-MM-DD'
) as week
) d
LEFT JOIN (
SELECT
to_char(
date_trunc('week', created_at),
'YYYY-MM-DD'
) AS week,
SUM(
ROUND(
(
coalesce(cost_items.base_price, 0) - coalesce(cost_items.base_discount, 0) + coalesce(cost_items.base_fee, 0) + coalesce(cost_items.base_taxes_total, 0) + coalesce(
cost_items.base_commission_included,
0
) - coalesce(cost_items.base_voided_price, 0) + coalesce(
cost_items.base_voided_discount,
0
) - coalesce(cost_items.base_voided_fee, 0) - coalesce(
cost_items.base_voided_taxes_total,
0
) - coalesce(
cost_items.base_voided_commission_included,
0
)
):: numeric,
2
)
) as amount_net
FROM
cost_items
WHERE
id IN ('0', '1', '2')
GROUP BY
1
) t USING (week)
ORDER BY
week;
How do I adjust this query to properly sum all values for each day in the week?

Figured it out:
with host_weeks as (
SELECT
generate_series(
timestamp '2018-12-01',
timestamp '2019-04-01', interval '1 day'
)::date as host_week )
select date_trunc('week', day)::date as week, sum(amount_net) from
(
select hw.host_week as day,
SUM(
ROUND(
(
coalesce(ci.base_price, 0) - coalesce(ci.base_discount, 0) + coalesce(ci.base_fee, 0) + coalesce(ci.base_taxes_total, 0) + coalesce(
ci.base_commission_included,
0
) - coalesce(ci.base_voided_price, 0) + coalesce(
ci.base_voided_discount,
0
) - coalesce(ci.base_voided_fee, 0) - coalesce(
ci.base_voided_taxes_total,
0
) - coalesce(
ci.base_voided_commission_included,
0
)
):: numeric,
2
)
) as amount_net
from host_weeks hw left join cost_items ci on hw.host_week = ci.created_at::date and ci.id in (....)
group by 1 order by 1) t group by 1 order by 1;

Related

Getting a period index from a date in PostgreSQL

Here is a Postgres code I created, it works. Is there a way to code it in a more efficient way? My goal is to get how much periods a given date falls from 2014-03-01. One period is a half-year starting from March or September.
I updated this code below on 2022-05-18 at 10:19 UTC+2
select date,
dense_rank() over (order by half_year_mar_sep) as period_index
from
(
select date as date,
case when extract(month from date) = 12 then (extract(year from date) || '-09-01')
when extract(month from date) in (1, 2) then (extract(year from date) - 1 || '-09-01')
when extract(month from date) in (3, 4, 5) then (extract(year from date) || '-03-01')
when extract(month from date) in (6, 7, 8) then (extract(year from date) || '-03-01')
else extract(year from date) || '-09-01'
end::date as half_year_mar_sep
from
(
select generate_series(date '2014-03-01', CURRENT_DATE, interval '1 day')::date as date
) s1
) s2
If I encapsulate the code above into select min(date), period_index from (<code above>) s3 group by 2 order by 1 then here is the result what I need:
WITH cte AS (
SELECT
date1::date,
rank() OVER (ORDER BY date1)
FROM generate_series(date '2014-03-01', CURRENT_DATE + interval '1' month, interval '6 month') g (date1)
),
cteall AS (
SELECT
all_date::date
FROM
generate_series(date '2014-03-01', CURRENT_DATE + interval '1' month, interval ' 1 day') s (all_date)
),
cte3 AS (
SELECT
*
FROM
cteall c1
LEFT JOIN cte c2 ON date1 = all_date
),
cte4 AS (
SELECT
*,
count(rank) OVER w AS ct_str
FROM
cte3
WINDOW w AS (ORDER BY all_date))
SELECT
*,
rank() OVER (PARTITION BY ct_str ORDER BY all_date) AS rank1,
dense_rank() OVER (ORDER BY all_date) AS dense_rank1
FROM
cte4;
Hope it's not intimidating. personally I found cte is a good tool, since it make logic more clearly.
demo
useful link: How to do forward fill as a PL/PGSQL function
If some column don't need, you can simple replace * with the columns you want.
Based on #Mark's answer I wrote this code below, but it's not simpler than the original code.
select s.date,
m.period_index
from
(
select date::date as half_year_start,
rank() over (order by date) as period_index,
coalesce(lead(date::date, 1) over (), CURRENT_DATE) as following_half_year_start
from generate_series(date '2014-03-01', CURRENT_DATE + interval '1' month, interval '6 month') as date
) m
left join
(
select generate_series(date '2014-03-01', CURRENT_DATE, interval '1 day')::date as date
) s
on s.date between m.half_year_start and m.following_half_year_start
;

Conditional Aggregation for multiple days of data

I am able to do conditional aggregation for a single day using below SQL but wondering how can I accomplish it within a single query for multiple days. I am trying to do a cartesian product between logs_20190715 and dates but not able to think through further to solve this. Any inputs would be appreciated.
--1
SELECT CAST('2018-11-19' AS TIMESTAMP ) AS time_id,
city_id,
COUNT( DISTINCT CASE WHEN DATE_TRUNC('DAY',logged_at) = CAST( '2018-11-19' AS TIMESTAMP ) THEN user_id END ) AS A,
COUNT( DISTINCT CASE WHEN logged_at >= CAST( '2018-11-19' AS TIMESTAMP )
AND logged_at < CAST( '2018-11-19' AS TIMESTAMP ) + interval '7' DAY
THEN user_id
END
) AS B,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-19' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-19' AS TIMESTAMP ) - interval '7' DAY
THEN user_id
END
) AS C,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-19' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-19' AS TIMESTAMP ) - interval '28' DAY
THEN user_id
END
) AS D,
'2018-11-19'
FROM logs_20190715
WHERE logged_at <= CAST('2018-11-19' AS TIMESTAMP) + interval '10' DAY
AND logged_at >= CAST('2018-11-19' AS TIMESTAMP) - interval '40' DAY
GROUP BY 1,2;
--2
SELECT CAST('2018-11-18' AS TIMESTAMP ) AS time_id,
city_id,
COUNT( DISTINCT CASE WHEN DATE_TRUNC('DAY',logged_at) = CAST( '2018-11-18' AS TIMESTAMP ) THEN user_id END ) AS A,
COUNT( DISTINCT CASE WHEN logged_at >= CAST( '2018-11-18' AS TIMESTAMP )
AND logged_at < CAST( '2018-11-18' AS TIMESTAMP ) + interval '7' DAY
THEN user_id
END
) AS B,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-18' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-18' AS TIMESTAMP ) - interval '7' DAY
THEN user_id
END
) AS C,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-18' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-18' AS TIMESTAMP ) - interval '28' DAY
THEN user_id
END
) AS D,
'2018-11-18'
FROM logs_20190715
WHERE logged_at <= CAST('2018-11-18' AS TIMESTAMP) + interval '10' DAY
AND logged_at >= CAST('2018-11-18' AS TIMESTAMP) - interval '40' DAY
GROUP BY 1,2;
How can I combine the above two queries into a single query and have the same results produced?( Have date dimension which has all dates in it. )

how to calculate number of working days in pure sql [duplicate]

I have two date columns and trying to measure days between the two dates excluding weekends. I'm getting a negative number and need help solving.
Table
CalendarDate DayNumber FirstAssgn FirstCnt DayNumber2 Id BusinessDays
5/21/2017 Sunday 5/21/17 5/21/17 Sunday 1 -1
Query:
TRUNC(TO_DATE(A.FIRST_CONTACT_DT, 'DD/MM/YYYY')) - TRUNC(TO_DATE(A.FIRST_ASSGN_DT, 'DD/MM/YYYY'))
- ((((TRUNC(A.FIRST_CONTACT_DT,'D'))-(TRUNC(A.FIRST_ASSGN_DT,'D')))/7)*2)
- (CASE WHEN TO_CHAR(A.FIRST_ASSGN_DT,'DY','nls_date_language=english') ='SUN' THEN 1 ELSE 0 END)
- (CASE WHEN TO_CHAR(A.FIRST_CONTACT_DT,'DY','nls_date_language=english')='SAT' THEN 1 ELSE 0 END)
- (SELECT COUNT(1) FROM HUM.CALENDAR CAL
WHERE 1=1
AND CAL.CALENDAR_DATE >= A.FIRST_ASSGN_DT
AND CAL.CALENDAR_DATE < A.FIRST_CONTACT_DT
--BETWEEN A.FIRST_ASSGN_DT AND A.FIRST_CONTACT_DT
AND CAL.GRH_HOLIDAY_IND = 'Y'
) AS Business_Days
Looks like below piece needs editing...
- (CASE WHEN TO_CHAR(A.FIRST_ASSGN_DT,'DY','nls_date_language=english')='SUN' THEN 1 ELSE 0 END)
Adapted from my answer here:
Get the number of days between the Mondays of both weeks (using TRUNC( datevalue, 'IW' ) as an NLS_LANGUAGE independent method of finding the Monday of the week) then add the day of the week (Monday = 1, Tuesday = 2, etc., to a maximum of 5 to ignore weekends) for the end date and subtract the day of the week for the start date. Like this:
SELECT ( TRUNC( end_date, 'IW' ) - TRUNC( start_date, 'IW' ) ) * 5 / 7
+ LEAST( end_date - TRUNC( end_date, 'IW' ) + 1, 5 )
- LEAST( start_date - TRUNC( start_date, 'IW' ) + 1, 5 )
AS WeekDaysDifference
FROM your_table
With RANGE_TEMP as (
SELECT
STARTPERIOD start_date,
ENDPERIOD end_date
FROM
TABLE_DATA -- YOUR TABLE WITH ALL DATA DATE
), DATE_TEMP AS (
SELECT
(start_date + LEVEL) DATE_ALL
FROM
RANGE_TEMP
CONNECT BY LEVEL <= (end_date - start_date)
), WORK_TMP as (
SELECT
COUNT(DATE_ALL) WORK_DATE
FROM
DATE_TEMP
WHERE
TO_CHAR(DATE_ALL,'D', 'NLS_DATE_LANGUAGE=ENGLISH') NOT IN ('1','7')
), BUSINESS_TMP as (
SELECT
COUNT(DATE_ALL) BUSINESS_DATE
FROM
DATE_TEMP
WHERE
TO_CHAR(DATE_ALL,'D', 'NLS_DATE_LANGUAGE=ENGLISH') IN ('1','7')
)
SELECT
L.WORK_DATE,
H.BUSINESS_DATE
FROM
BUSINESS_TMP H,
WORK_TMP L
;

PostgreSQL generate month and year series based on table field and fill with nulls if no data for a given month

I want to generate series of month and year from the next month of current year(say, start_month) to 12 months from start_month along with the corresponding data (if any, else return nulls) from another table in PostgreSQL.
SELECT ( ( DATE '2019-03-01' + ( interval '1' month * generate_series(0, 11) ) )
:: DATE ) dd,
extract(year FROM ( DATE '2019-03-01' + ( interval '1' month *
generate_series(0, 11) )
)),
coalesce(SUM(price), 0)
FROM items
WHERE s.date_added >= '2019-03-01'
AND s.date_added < '2020-03-01'
AND item_type_id = 3
GROUP BY 1,
2
ORDER BY 2;
The problem with the above query is that it is giving me the same value for price for all the months. The requirement is that the price column be filled with nulls or zeros if no price data is available for a given month.
Put the generate_series() in the FROM clause. You are summarizing the data -- i.e. calculating the price over the entire range -- and then projecting this on all months. Instead:
SELECT gs.yyyymm,
coalesce(SUM(i.price), 0)
FROM generate_series('2019-03-01'::date, '2020-02-01', INTERVAL '1 MONTH'
) gs(yyyymm) LEFT JOIN
items i
ON gs.yyyymm = DATE_TRUNC('month', s.date_added) AND
i.item_type_id = 3
GROUP BY gs.yyyymm
ORDER BY gs.yyyymm;
You want generate_series in the FROM clause and join with it, somewhat like
SELECT months.m::date, ...
FROM generate_series(
start_month,
start_month + INTERVAL '11 months',
INTERVAL '1 month'
) AS months(m)
LEFT JOIN items
ON months.m::date = items.date_added

Get data for last 12 weeks oracle

Hello i need to get transaction count and total amount for transactions in last quarter.
i use following to get data for a quarter (last 90 days)
WITH date_range as
(SELECT TRUNC(sysdate) - 90 + level AS week_day
FROM dual
CONNECT BY ROWNUM <= 90),
the_data
AS (SELECT TRUNC(systemdate) AS log_date, count(*) AS num_obj,status AS log_status, nvl(sum(
CASE
WHEN VERSION = '1.1'
THEN nvl(amount/100,'0.0')
ELSE nvl(amount,'0.0')
END), 0) AS totalamount
from transactionlog where ((merchantcode in (
SELECT regexp_substr('MERC0003','[^,]+', 1, LEVEL) FROM dual
connect by regexp_substr('MERC0003', '[^,]+', 1, level) is not null ) OR 'MERC0003' IS NULL) AND status = 'xxxx')
GROUP BY TRUNC(systemdate),status)
SELECT TO_CHAR(dr.week_day,'DD/MM/YYYY HH:MI AM') AS TXNDATE, NVL(trans_log.num_obj,0) as TXNCOUNT,trans_log.log_status,trans_log.totalamount
FROM date_range dr LEFT OUTER JOIN the_data trans_log
on trans_log.log_date = dr.week_day
ORDER BY dr.week_day DESC ;
From above, I get 90 days record , as in 90 rows containing transaction count and amount for 90 days,
i need to get data in terms of weeks in quarter. That is, 12 rows containing data for each week which has transaction count and amount in last 12 weeks.
You can use TRUNC( date_value ,'IW' ) to truncate a date to the start of the ISO week (Monday 00:00) and add multiples of a week in your date range and perform the same truncation in your query:
WITH date_range( week_day ) as (
SELECT TRUNC( sysdate - 90, 'IW' ) + ( level - 1 ) * INTERVAL '7' DAY
FROM dual
CONNECT BY TRUNC( sysdate - 90, 'IW' ) + ( level - 1 ) * INTERVAL '7' DAY <= SYSDATE
),
the_data ( log_date, num_obj, log_status, totalamount ) AS (
SELECT TRUNC(systemdate, 'IW'),
-- rest of your query
GROUP BY TRUNC(systemdate, 'IW'),
status
)
SELECT *
FROM date_range dr
LEFT OUTER JOIN the_data trans_log
on trans_log.log_date = dr.week_day
ORDER BY dr.week_day DESC;