SQL query to get values by the calendar month - sql

I have the following schema:
CREATE TABLE phone_clicks (
id integer NOT NULL,
start_date date NOT NULL,
industry_id integer NOT NULL,
clicks integer DEFAULT 0 NOT NULL
);
insert into phone_clicks(id, start_date, industry_id)
values
(1, '2021-10-03', 1),
(2, '2021-10-03', 2),
(3, '2021-10-02', 3),
(4, '2021-10-01', 1),
(5, '2021-11-01', 3),
(6, '2021-11-01', 4),
(7, '2021-11-02', 1),
(8, '2021-11-02', 2);
and i want to get a return object that has the industry id along with the number of items but based on the calendar month for the previous two months, not including the current. So i want the number of items in October and the number in November.
What i have, that is not working
SELECT industry_id,
sum(
CASE
WHEN start_date >= (date_trunc('month', start_date) - interval '1 month')::date AND start_date < (date_trunc('month', start_date) - interval '2 month')::date THEN 1
ELSE 0
END) AS last_month,
sum(
CASE
WHEN start_date >= (date_trunc('month', start_date) - interval '2 month')::date AND start_date <= (date_trunc('month', start_date) - interval '3 month')::date THEN 1
ELSE 0
END) AS prev_month
FROM phone_clicks
Group by industry_id
What i am currently getting from the above :
industry_id last_month prev_month
4 0 0
1 0 0
3 0 0
2 0 0
What i am expecting:
industry_id last_month prev_month
4 1 0
1 1 2
3 1 1
2 1 1
So industry 1, it has two items with start_date in October and 1 in November.

date_trunc('month', ...) always returns the first of the month, so you don't really need the >= .. <= in your CASE expression.
It's enough to compare the month start of "start_date" with the month start of last month, which is date_trunc('month', current_date - interval '1 month')
The query can also be simplified a bit by using the filter operator.
SELECT industry_id,
count(*) filter (where date_trunc('month', start_date) = date_trunc('month', current_date - interval '1 month')) as last_month,
count(*) filter (where date_trunc('month', start_date) = date_trunc('month', current_date - interval '2 month')) as prev_month
FROM phone_clicks
group by industry_id
order by industry_id

Related

Getting a Monthly Date Range

How can you make a date range in a big query? A date range starts from 29th of the month and ends with 28th of the next month. It should be like this
Date | Starting Date | Ending Date
03-13-2020 | 02-29-2020 | 03-28-2021
06-30-2020 | 06-29-2020 | 07-28-2021
01-01-2021 | 12-29-2020 | 01-28-2021
11-11-2021 | 10-28-2021 | 11-29-2021
Actually, i make an article on it.
Check this out:
https://www.theaccountingtactics.com/2021/12/BigQueryBQ-DateProblems-DateSituations-that-are-Hard-to-Analyze-and-Takes-Time-ToCrack%20.html?m=1
Consider below approach
create temp function set_day(date date, day int64) as (
ifnull(
safe.date(extract(year from date), extract(month from date), day),
last_day(date)
)
);
select Date,
set_day(Starting_Date, 29) as Starting_Date,
set_day(Ending_Date, 28) as Ending_Date
from (
select *, if(extract(day from Date) < 29,
struct(date_sub(Date, interval 1 month) as Starting_Date, Date as Ending_Date),
struct(Date as Starting_Date, date_add(Date, interval 1 month) as Ending_Date)
).*
from your_table
)
if applied to sample data as in your question
with your_table as (
select date '2020-03-13' Date union all
select '2021-03-13' union all
select '2020-06-30' union all
select '2021-01-01' union all
select '2021-11-11'
)
output is
You can test whole stuff using below
create temp function set_day(date date, day int64) as (
ifnull(
safe.date(extract(year from date), extract(month from date), day),
last_day(date)
)
);
with your_table as (
select date '2020-03-13' Date union all
select '2021-03-13' union all
select '2020-06-30' union all
select '2021-01-01' union all
select '2021-11-11'
)
select Date,
set_day(Starting_Date, 29) as Starting_Date,
set_day(Ending_Date, 28) as Ending_Date
from (
select *, if(extract(day from Date) < 29,
struct(date_sub(Date, interval 1 month) as Starting_Date, Date as Ending_Date),
struct(Date as Starting_Date, date_add(Date, interval 1 month) as Ending_Date)
).*
from your_table
)

Numbering dates from Sunday to Saturday

Does anyone have a easy solution to make a numbering from sunday to saturday and generate the dates in PostgreSQL(version 11).I have the below solution but it is limited to only 5 weeks and i need something that is flexible.
I have dates as a column in my source table, i want those dates to be numbered from saturday to sunday like below.
Current Query
WITH CTE AS
(
SELECT 1 as rno,generate_series( date_trunc('week', current_date)::date - 1
, date_trunc('week', current_date)::date + 5
, interval '1 day') current_week
)
,CTE_1 AS
(
SELECT rno,current_week FROM CTE
UNION
select 2,dt::date d from generate_series( (SELECT MIN(current_week)::DATE FROM CTE)- interval '7 days', (SELECT MIN(current_week)::DATE FROM CTE)- interval '1 days', interval '1 days') dt
)
,CTE_2 AS
(
SELECT rno,current_week FROM CTE_1
UNION
select 3,dt::date d from generate_series( (SELECT MIN(current_week)::DATE FROM CTE_1)- interval '7 days', (SELECT MIN(current_week)::DATE FROM CTE_1)- interval '1 days', interval '1 days') dt
)
,CTE_3 AS
(
SELECT rno,current_week FROM CTE_2
UNION
select 4,dt::date d from generate_series( (SELECT MIN(current_week)::DATE FROM CTE_2)- interval '7 days', (SELECT MIN(current_week)::DATE FROM CTE_2)- interval '1 days', interval '1 days') dt
)
,last_5_weeks as
(
SELECT rno,current_week FROM CTE_3
UNION
select 5,dt::date d from generate_series( (SELECT MIN(current_week)::DATE FROM CTE_3)- interval '7 days', (SELECT MIN(current_week)::DATE FROM CTE_3)- interval '1 days', interval '1 days') dt
)
SELECT rno,current_week::DATE as selected_date FROM last_5_weeks order by selected_date DESC
Current output
rno Date
1 "2020-10-24"
1 "2020-10-23"
1 "2020-10-22"
1 "2020-10-21"
1 "2020-10-20"
1 "2020-10-19"
1 "2020-10-18"
2 "2020-10-17"
2 "2020-10-16"
2 "2020-10-15"
2 "2020-10-14"
2 "2020-10-13"
2 "2020-10-12"
2 "2020-10-11"
3 "2020-10-10"
3 "2020-10-09"
3 "2020-10-08"
3 "2020-10-07"
3 "2020-10-06"
3 "2020-10-05"
3 "2020-10-04"
4 "2020-10-03"
4 "2020-10-02"
4 "2020-10-01"
4 "2020-09-30"
4 "2020-09-29"
4 "2020-09-28"
4 "2020-09-27"
5 "2020-09-26"
5 "2020-09-25"
5 "2020-09-24"
5 "2020-09-23"
5 "2020-09-22"
5 "2020-09-21"
5 "2020-09-20"
How about using arithmetics?
select 1 + (row_number() over(order by dt desc) - 1) / 7 rn, dt::date dt
from generate_series(
date_trunc('week', current_date)::date + 5 - interval '5 week -1 day',
date_trunc('week', current_date)::date + 5,
'1 day'
) s(dt)
order by dt desc
generate_series() produces all dates at once. You control the number of weeks that are generated with the value given to week in the literal interval. Then, in the outer query, we use row_number() to enumerate the week numbers.
Demo on DB Fiddle
One sequence for weeks and another one for days. It is flexible, 5 is a parameter. date_trunc('week',now()+'P1W'::interval)::date-2 is this week's saturday.
select
w rno,
date (date_trunc('week',now()+'P1W'::interval)::date-2 + make_interval(weeks => 1-w, days => 1-d)) "Date"
from generate_series(1, 5, 1) w
cross join generate_series(1, 7, 1) d;

Date SQL to get 31st August

I am trying to get the last 31st August every year dynamically.
E.g if current date is today I would like to get 31st August 2019
next year, and I want this to be dynamic and get 31st August 2020?
I have tried Date_Sub and Date_Trunc and they are not working. Any ideas would be really helpful?
SELECT DATE_SUB(current_date(), INTERVAL 5 DAY) as five_days_ago
Below will always return last /latest August 31st
#standardSQL
SELECT IF(CURRENT_DATE() < last_august_31, DATE_SUB(last_august_31, INTERVAL 1 YEAR), last_august_31) AS last_august_31
FROM UNNEST([DATE(EXTRACT(YEAR FROM CURRENT_DATE()), 8, 31)]) last_august_31
In case if you need to use this within the query with date field - consider below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT DATE '2019-01-01'dt UNION ALL
SELECT '2019-12-31' UNION ALL
SELECT CURRENT_DATE()
)
SELECT dt, IF(dt < last_august_31, DATE_SUB(last_august_31, INTERVAL 1 YEAR), last_august_31) AS last_august_31
FROM `project.dataset.table`,
UNNEST([DATE(EXTRACT(YEAR FROM dt), 8, 31)]) last_august_31
-- ORDER BY dt
with result
Row dt last_august_31
1 2019-01-01 2018-08-31
2 2019-12-31 2019-08-31
3 2020-02-25 2019-08-31
with dates as (
select cast('2019-01-01' as date) as my_date union all select '2019-12-31' union all select current_date()
)
select
my_date,
date(extract(year from my_date) - case when extract(month from my_date) < 9 then 1 else 0 end, 8, 31) as prev_aug_31,
date(extract(year from my_date) + case when extract(month from my_date) >= 9 then 1 else 0 end, 8, 31) as next_aug_31
from dates

Calculate total time without vacations in postgres

I have a database table that represents activities and for each activity, how long it took.
It looks something like this :
activity_id | name | status | start_date | end_date
=================================================================
1 | name1 | WIP | 2019-07-24 ... | 2019-07-24 ...
start_date and end_date are timestamps. I use a view with a column total_time that is described like that:
date_part('day'::text,
COALESCE(sprint_activity.end_date::timestamp with time zone, CURRENT_TIMESTAMP)
- sprint_activity.start_date::timestamp with time zone
) + date_part('hour'::text,
COALESCE(sprint_activity.end_date::timestamp with time zone, CURRENT_TIMESTAMP)
- sprint_activity.start_date::timestamp with time zone
) / 24::double precision AS total_time
I would like to create a table for vacation or half day vacations that looks like:
date | work_percentage
=================================================
2019-07-24 | 0.4
2019-07-23 | 0.7
And then, I would like to calculate total_time in a way that uses this vacations table such that:
If a date is not in the column it's considered to have work_percentage==1
For every date that is in the table, reduce the relative percentage from the total_time query.
So let's take an example:
Activity - "Write report" started at 11-July-2019 14:00 and ended at 15-July-2019 19:00 - so the time diff is 4 days and 5 hours.
The 13th and 14th were weekend so I'd like to have a column in the vacations table that holds 2019-07-13 with work_percentage == 1 and the same for the 14th.
Deducting those vacations, the time diff would be 2 days and 5 hours as the 13th and 14th are not workdays.
Hope this example explains it better.
I think you can take this example and add some modifications based on your database
Just ddl statements to test script
create table activities (
user_id int,
activity_id int,
name text,
status text,
start_date timestamp,
end_date timestamp
);
create table vacations (
user_id int,
date date,
work_percentage numeric
);
insert into activities
values
(1, 1, 'name1', 'WIP', timestamp'2019-07-20 10:00:00', timestamp'2019-07-25 8:00:00'),
(2, 2, 'name2', 'DONE', timestamp'2019-07-28 19:00:00', timestamp'2019-08-01 7:00:00'),
(1, 3, 'name3', 'DONE', timestamp'2019-07-21 12:00:00', timestamp'2019-07-21 15:00:00'),
(-1, 4, 'Write report', 'DONE', timestamp'2019-07-11 14:00:00', timestamp'2019-07-15 19:00:00');
insert into vacations
values
(1, date'2019-07-21', 0.5),
(1, date'2019-07-22', 0),
(1, date'2019-07-23', 0.25),
(2, date'2019-07-29', 0),
(2, date'2019-07-30', 0),
(-1, date'2019-07-13', 0),
(-1, date'2019-07-14', 0);
sql script
with
daily_activity as (
select
*,
date(
generate_series(
date(start_date),
date(end_date),
interval'1 day')
) as date_key
from
activities
),
raw_data as (
select
da.*,
v.work_percentage,
case
when date(start_date) = date(end_date)
then (end_date - start_date) * coalesce(work_percentage, 1)
when date(start_date) = date_key
then (date(start_date) + 1 - start_date) * coalesce(work_percentage, 1)
when date(end_date) = date_key
then (end_date - date(end_date)) * coalesce(work_percentage, 1)
else interval'24 hours' * coalesce(work_percentage, 1)
end as activity_coverage
from
daily_activity as da
left join vacations as v on da.user_id = v.user_id
and da.date_key = v.date
)
select
user_id,
activity_id,
name,
status,
start_date,
end_date,
justify_interval(sum(activity_coverage)) as total_activity_time
from
raw_data
group by
1, 2, 3, 4, 5, 6

Query to return month-wise count in PostgreSQL

I am trying to write a query in PostgreSQL that should return "month" wise results when two dates are given.
SELECT count(overall_sl) as total_sales, count(CASE WHEN overall_sl < value_1 THEN 1 END) failed_sales
FROM (
SELECT overall_sl, value_1
FROM salecombined c where c.date_updated between '2018-01-01' and '2018-12-31'
GROUP BY dept_name, date_updated, date, overall_sl, no_addons,
value_1, category_id, subcategory_id, branch_name
) sales;
I expect results to be like this (The above query doesn't do that)
start_dt end_dt total_sales failed_sales
-------- ------ ----------- -------------
2018-01-01 2018-01-31 0 0
2018-02-01 2018-02-28 589 154
2018-03-01 2018-03-31 107 14
2018-04-01 2018-04-30 375 114
-- and so on
I wrote below query but it is taking longer to execute; how can I optimise this?
SELECT date_trunc('month', dd):: date start_dt,
(date_trunc('month', dd::DATE) + interval '1 month' - interval '1 day')::date as end_dt,
(select count(overall_sl) from (
SELECT overall_sl
FROM salecombined c
WHERE c.date_updated between date_trunc('month', dd):: date and (date_trunc('month', dd::DATE) + interval '1 month' - interval '1 day')::date
GROUP BY dept_name, date_updated, date, overall_sl, no_addons,
category_id, subcategory_id, branch_name
) jobs
) total_sales,
(select count(CASE WHEN overall_sl < value_1 THEN 1 END) from (
SELECT overall_sl, value_1
FROM salecombined c
WHERE c.date_updated between date_trunc('month', dd):: date and (date_trunc('month', dd::DATE) + interval '1 month' - interval '1 day')::date
GROUP BY dept_name, date_updated, date, overall_sl, no_addons,
value_1, category_id, subcategory_id, branch_name
) jobs
) failed_sales
FROM generate_series
( '2018-01-01'::timestamp
, '2018-12-11'::timestamp
, '1 month'::interval) dd
I am not expert in SQL or PostgreSQL.
I am not quiet sure but maybe you are searching for GROUP BY date_trunc():
demo:db<>fiddle
SELECT
date_trunc('month', sdate)::date as month_begin,
(date_trunc('month', sdate) + interval '1 month -1 day')::date as month_end,
SUM(value)
FROM sales
WHERE sdate BETWEEN <start> and <end>
GROUP BY date_trunc('month', sdate)
date_trunc('month', date) converts the date to the first of the month. So you can group all dates within one month (because all dates are the first of a month in that group)
date_trunc('month', sdate) + interval '1 month -1 day' calculates the last day of a month (date_trunc to go to first, add one month to go the first of the next month, subtract one day to get to the day before the next first = last day of current month.)