Obtain Name Column Based on Value - sql

I have a table that calculates the number of associated records that fit a criteria for each parent record. See example below:
note - morning, afternoon and evening are only weekdays
| id | morning | afternoon | evening | weekend |
| -- | ------- | --------- | ------- | ------- |
| 1 | 0 | 2 | 3 | 1 |
| 2 | 2 | 9 | 4 | 6 |
What I am trying to achieve is to determine which columns have the lowest value and get their column name as such:
| id | time_of_day |
| -- | ----------- |
| 1 | morning |
| 2 | afternoon |
Here is my current SQL code to result in the first table:
SELECT
leads.id,
COALESCE(morning, 0) morning,
COALESCE(afternoon, 0) afternoon,
COALESCE(evening, 0) evening,
COALESCE(weekend, 0) weekend
FROM leads
LEFT OUTER JOIN (
SELECT DISTINCT ON (lead_id) lead_id, COUNT(*) AS morning
FROM lead_activities
WHERE lead_activities.modality = 'Call' AND lead_activities.bound_type = 'outbound' AND extract('dow' from created_at) IN (0,1,2,3,4,5) AND (extract('hour' from created_at) >= 0 AND extract('hour' from created_at) < 12)
GROUP BY lead_id
) morning ON morning.lead_id = leads.id
LEFT OUTER JOIN (
SELECT DISTINCT ON (lead_id) lead_id, COUNT(*) AS afternoon
FROM lead_activities
WHERE lead_activities.modality = 'Call' AND lead_activities.bound_type = 'outbound' AND extract('dow' from created_at) IN (0,1,2,3,4,5) AND (extract('hour' from created_at) >= 12 AND extract('hour' from created_at) < 17)
GROUP BY lead_id
) afternoon ON afternoon.lead_id = leads.id
LEFT OUTER JOIN (
SELECT DISTINCT ON (lead_id) lead_id, COUNT(*) AS evening
FROM lead_activities
WHERE lead_activities.modality = 'Call' AND lead_activities.bound_type = 'outbound' AND extract('dow' from created_at) IN (0,1,2,3,4,5) AND (extract('hour' from created_at) >= 17 AND extract('hour' from created_at) < 25)
GROUP BY lead_id
) evening ON evening.lead_id = leads.id
LEFT OUTER JOIN (
SELECT DISTINCT ON (lead_id) lead_id, COUNT(*) AS weekend
FROM lead_activities
WHERE lead_activities.modality = 'Call' AND lead_activities.bound_type = 'outbound' AND extract('dow' from created_at) IN (6,7)
GROUP BY lead_id
) weekend ON weekend.lead_id = leads.id

You can use CASE/WHEN/ELSE to check for the specific conditions and produce different values. For example:
with
q as (
-- your query here
)
select
id,
case
when morning <= least(afternoon, evening, weekend) then 'morning'
when afternoon <= least(morning, evening, weekend) then 'afternoon'
when evening <= least(morning, afternoon, weekend) then 'evening'
else 'weekend'
end as time_of_day
from q

Related

Count if previous month data exists postgres

i'm stuck with a query to count id where if it exists in previous month than 1
my table look like this
date | id |
2020-02-02| 1 |
2020-03-04| 1 |
2020-03-04| 2 |
2020-04-05| 1 |
2020-04-05| 3 |
2020-05-06| 2 |
2020-05-06| 3 |
2020-06-07| 2 |
2020-06-07| 3 |
i'm stuck with this query
SELECT date_trunc('month',date), id
FROM table
WHERE id IN
(SELECT DISTINCT id FROM table WHERE date
BETWEEN date_trunc('month', current_date) - interval '1 month' AND date_trunc('month', current_date)
the main problem is that i stuck with current_date function. is there any dynamic ways change current_date? thanks
What i expected to be my result is
date | count |
2020-02-01| 0 |
2020-03-01| 1 |
2020-04-01| 1 |
2020-05-01| 1 |
2020-06-01| 2 |
Solution 1 with SELF JOIN
SELECT date_trunc('month', c.date) :: date AS date
, count(DISTINCT c.id) FILTER (WHERE p.date IS NOT NULL)
FROM test AS c
LEFT JOIN test AS p
ON c.id = p.id
AND date_trunc('month', c.date) = date_trunc('month', p.date) + interval '1 month'
GROUP BY date_trunc('month', c.date)
ORDER BY date_trunc('month', c.date)
Result :
date count
2020-02-01 0
2020-03-01 1
2020-04-01 1
2020-05-01 1
2020-06-01 2
Solution 2 with WINDOW FUNCTIONS
SELECT DISTINCT ON (date) date
, count(*) FILTER (WHERE count > 0 AND previous_month) OVER (PARTITION BY date)
FROM
( SELECT DISTINCT ON (id, date_trunc('month', date))
id
, date_trunc('month', date) AS date
, count(*) OVER w AS count
, first_value(date_trunc('month', date)) OVER w = date_trunc('month', date) - interval '1 month' AS previous_month
FROM test
WINDOW w AS (PARTITION BY id ORDER BY date_trunc('month', date) GROUPS BETWEEN 1 PRECEDING AND 1 PRECEDING)
) AS a
Result :
date count
2020-02-01 0
2020-03-01 1
2020-04-01 1
2020-05-01 1
2020-06-01 2
see dbfiddle

Converting PostgreSQL recursive CTE to SQL Server

I'm having trouble adapting some recursive CTE code from PostgreSQL to SQL Server, from the book "Fighting Churn with Data"
This is the working PostgreSQL code:
with recursive
active_period_params as (
select interval '30 days' as allowed_gap,
'2021-09-30'::date as calc_date
),
active as (
-- anchor
select distinct account_id, min(start_date) as start_date
from subscription inner join active_period_params
on start_date <= calc_date
and (end_date > calc_date or end_date is null)
group by account_id
UNION
-- recursive
select s.account_id, s.start_date
from subscription s
cross join active_period_params
inner join active e on s.account_id=e.account_id
and s.start_date < e.start_date
and s.end_date >= (e.start_date-allowed_gap)::date
)
select account_id, min(start_date) as start_date
from active
group by account_id
This is my attempt at converting to SQL Server. It gets stuck in a loop. I believe the issue has to do with the UNION ALL required by SQL Server.
with
active_period_params as (
select 30 as allowed_gap,
cast('2021-09-30' as date) as calc_date
),
active as (
-- anchor
select distinct account_id, min(start_date) as start_date
from subscription inner join active_period_params
on start_date <= calc_date
and (end_date > calc_date or end_date is null)
group by account_id
UNION ALL
-- recursive
select s.account_id, s.start_date
from subscription s
cross join active_period_params
inner join active e on s.account_id=e.account_id
and s.start_date < e.start_date
and s.end_date >= dateadd(day, -allowed_gap, e.start_date)
)
select account_id, min(start_date) as start_date
from active
group by account_id
The subscription table is a list of subscriptions belonging to customers. A customer can have multiple subscriptions with overlapping dates or gaps between dates. null end_date means the subscription is currently active and has no defined end_date. Example data for a single customer (account_id = 15) below:
subscription
---------------------------------------------------
| id | account_id | start_date | end_date |
---------------------------------------------------
| 6 | 15 | 01/06/2021 | null |
| 5 | 15 | 01/01/2021 | null |
| 4 | 15 | 01/06/2020 | 01/02/2021 |
| 3 | 15 | 01/04/2020 | 15/05/2020 |
| 2 | 15 | 01/03/2020 | 15/05/2020 |
| 1 | 15 | 01/06/2019 | 01/01/2020 |
Expected query result (as produced by PostgreSQL code):
------------------------------
| account_id | start_date |
------------------------------
| 15 | 01/03/2020 |
Issue:
The SQL Server code above gets stuck in a loop and doesn't produce a result.
Description of the PostgreSQL code:
anchor block finds subs that are active as at the calc_date (30/09/2021) (id 5 & 6), and returns the min start_date (01/01/2021)
the recursion block then looks for any earlier subs that existed within the allowed_gap, which is 30 days prior to the min_start date found in 1). id 4 meets this criteria, so the new min start_date is 01/06/2020
recursion repeats and finds two subs within the allowed_gap (01/06/2020 - 30 days). Of these subs (id 2 & 3), the new min start_date is 01/03/2020
recursion fails to find an earlier sub within the allowed_gap (01/03/2020 - 30 days)
query returns a start date of 01/03/2020 for account_id 15
Any help appreciated!
It seems the issue is related to the way SQL Server deals with recursive CTEs.
This is a type of gaps-and-islands problem, and does not actually require recursion.
There are a number of solutions, here is one. Given your requirement, there may be more efficient methods, but this should get you started.
Using LAG we identify rows which are within the specified gap of the next row
We use a running COUNT to give each consecutive set of rows an ID
We group by that ID, and take the minimum start_date, filtering out non-qualifying groups
Group again to get the minimum per account
DECLARE #allowed_gap int = 30,
#calc_date datetime = cast('2021-09-30' as date);
WITH PrevValues AS (
SELECT *,
IsStart = CASE WHEN ISNULL(LAG(end_date) OVER (PARTITION BY account_id
ORDER BY start_date), '2099-01-01') < DATEADD(day, -#allowed_gap, start_date)
THEN 1 END
FROM subscription
),
Groups AS (
SELECT *,
GroupId = COUNT(IsStart) OVER (PARTITION BY account_id
ORDER BY start_date ROWS UNBOUNDED PRECEDING)
FROM PrevValues
),
ByGroup AS (
SELECT
account_id,
GroupId,
start_date = MIN(start_date)
FROM Groups
GROUP BY account_id, GroupId
HAVING COUNT(CASE WHEN start_date <= #calc_date
and (end_date > #calc_date or end_date is null) THEN 1 END) > 0
)
SELECT
account_id,
start_date = MIN(start_date)
FROM ByGroup
GROUP BY account_id;
db<>fiddle

Group by month and name SQL

I need some help with SQL.
I have
Table1 with columns Id, Date1 and Date2
Table2 with columns Table1Id and Table2Id
Table3 with columns Id and Name
Here is my try:
with tmp_tab as (
select
v."Name" as name
, date_part('month', cv."OfferAcceptedDate") as MonthAcceptedName
, date_part('month', cv."OfferSentDate") as MonthSentName
, 1 as cntAcc
, 1 as cntSent
from hr_metrics."CvInfo" as cv
join hr_metrics."CvInfoVacancy" as civ
on civ."CvInfosId" = cv."Id"
join hr_metrics."Vacancy" as v
on civ."VacanciesId" = v."Id"
where cv."OfferSentDate" is not null
and date_part('year', cv."OfferSentDate") = date_part('year', CURRENT_DATE)
group by v."Name" , date_part('month', cv."OfferAcceptedDate"),
date_part('month', cv."OfferSentDate")
)
select distinct
tmp_tab."name" as name,
tmp_tab.MonthSentName as mSent,
tmp_tab.MonthAcceptedName as mAcc,
Sum(tmp_tab.cntSent) as sented,
Sum(tmp_tab.cntacc) as accepted
from tmp_tab as tmp_tab
group by tmp_tab.name, tmp_tab.MonthSentName, tmp_tab.MonthAcceptedName;
I need to take Count(date2)/Count(date1) grouped by monthes and name.
I have no idea how to do that, as there is no table with monthes.
DB - Postgres
sample data from comment:
t1
1 | 01/01/2021 | 31/03/2021
2 | 05/01/2021 | 18/01/2021
3 | 12/01/2021 | 31/01/2021
4 | 13/03/2021 | 22/03/2021
t2
1 | 1
2 | 1
3 | 2
4 | 1
t3
1 | SomeName1
2 | someName2
Desired result:
Name | month | value
SomeName1 | 1 | 1\2
SomeName1 | 3 | 2
SomeName2 | 1 | 1
Update: if count(date2) == 0, than count(date2) = -1
Source answer
Here code for my question thats work. And yeah, i've asked it on ru too.
select name, month, sum((SRC=1)::int) as AcceptedCount, sum((SRC=2)::int) as SentCount,
case when sum((SRC=1)::int) = 0 then -1
else sum((SRC=2)::int)::float / sum((SRC=1)::int) end as Result
from (
select v.name, SRC,
extract('month' from case SRC when 1 then OfferAcceptedDate else OfferSentDate end) as month
from (select (date_part('year', CURRENT_DATE)::char(4) || '-01-01')::timestamptz as from_date) x
cross join (select 1 as SRC union all select 2) s
join CvInfo as cv on (SRC=1 and cv.OfferAcceptedDate >= from_date and cv.OfferAcceptedDate < from_date + interval '1 year')
or (SRC=2 and cv.OfferSentDate >= from_date and cv.OfferSentDate < from_date + interval '1 year')
join CvInfoVacancy as civ on civ.CvInfosId = cv.Id
join Vacancy as v on civ.VacanciesId = v.Id
where case SRC when 1 then OfferAcceptedDate else OfferSentDate end is not null
) x
group by name, month

How to force zero values in Redshift?

If this is my query:
select
(min(timestamp))::date as date,
(count(distinct(user_id)) as user_id_count
(row_number() over (order by signup_day desc)-1) as days_since
from
data.table
where
timestamp >= current_date - 3
group by
timestamp
order by
timestamp asc;
And these are my results
date | user_id_count | days_since
------------+-----------------+-------------
2018-01-22 | 3 | 1
2018-01-23 | 5 | 0
How can I get it the table to show (where the user ID count is 0?):
date | user_id_count | days_since
------------+-----------------+-------------
2018-01-21 | 0 | 0
2018-01-22 | 3 | 1
2018-01-23 | 5 | 0
You need to generate the dates. In Postgres, generate_series() is the way to go:
select g.ts as dte,
count(distinct t.user_id) as user_id_count
row_number() over (order by signup_day desc) - 1) as days_since
from generate_series(current_date::timestamp - interval '3 day', current_date::timestamp, interval '1 day') g(ts) left join
data.table t
on t.timestamp::date = g.ts
group by t.ts
order by t.ts;
You have to create a "calendar" table with dates and left join your aggregated result like that:
with
aggregated_result as (
select ...
)
select
t1.date
,coalesce(t2.user_id_count,0) as user_id_count
,coalesce(t2. days_since,0) as days_since
from calendar t1
left join aggregated_result t2
using (date)
more on creating calendar table: How do I create a dates table in Redshift?

SQL - Conditional column selection in join

I am not sure if this scenario can be achieved using TSQL. I have a table called WorkingDays, which have this info
ID | EmployeeId | Monday | Tuesday | Wednesday | Thursday | Friday
----------------------------------------------------------------------
1 | 1 | 2 | 2 | 3 | 6 | 5
2 | 2 | 1 | 7 | 5 | 2 | 3
The days columns store Ids of WorkingSchedule table, which has this columns:
ID int Primary Key
StartTime time
EndTime time
So what I need id get the StartTime and EndTime of an employee depending on the current date.
What I need to get from query is the start and end time depending on the day. The day I want to filter is de current date (using getdate() function)
So need to select the correct day column name to make the join.
How can I achieve this scenario?
The dynamic sql version:
declare #sql nvarchar(max) ='
select
t.EmployeeId
, StarTime = max(case when t.rn=1 then '+quotename(datename(weekday,getdate()))+' end)
, EndTime = max(case when t.rn=2 then '+quotename(datename(weekday,getdate()))+' end)
from (
select *
, rn = row_number() over (partition by t.EmployeeId order by t.Id)
from t
) t
group by t.EmployeeId;'
exec sp_executesql #sql;
rextester demo: http://rextester.com/WNH34961
returns:
+------------+----------+---------+
| EmployeeId | StarTime | EndTime |
+------------+----------+---------+
| 1 | 5 | 3 |
+------------+----------+---------+
Depending on how you want the output, here are two other ways that do not use dynamic sql:
Both use cross apply() to unpivot the data, and WorkDay = datename(weekday,getdate()) to get the current WorkDay column.
For one row output we add some conditional aggregation:
/* one row per employeeId */
select
t.EmployeeId
, x.WorkDay
, StarTime = max(case when t.rn=1 then x.Time end)
, EndTime = max(case when t.rn=2 then x.Time end)
from (
select *
, rn = row_number() over (partition by t.EmployeeId order by t.Id)
from t
) t
cross apply (values
('Monday',Monday),('Tuesday',Tuesday),('Wednesday',Wednesday)
,('Thursday',Thursday),('Friday',Friday)
) x (WorkDay,Time)
where WorkDay = datename(weekday,getdate())
group by t.EmployeeId, x.WorkDay
returns:
+------------+---------+----------+---------+
| EmployeeId | WorkDay | StarTime | EndTime |
+------------+---------+----------+---------+
| 1 | Friday | 5 | 3 |
+------------+---------+----------+---------+
If you want the output on two rows, like your current output:
/* two rows per employeeId */
select
t.Id
, t.EmployeeId
, x.WorkDay
, t.StartEnd
, x.Time
from (
select *
, StartEnd = case
when row_number() over (partition by t.EmployeeId order by t.Id) = 1
then 'StartTime'
else 'EndTime'
end
from t
) t
cross apply (values
('Monday',Monday),('Tuesday',Tuesday),('Wednesday',Wednesday)
,('Thursday',Thursday),('Friday',Friday)
) x (WorkDay,Time)
where WorkDay = datename(weekday,getdate());
returns:
+----+------------+---------+-----------+------+
| Id | EmployeeId | WorkDay | StartEnd | Time |
+----+------------+---------+-----------+------+
| 1 | 1 | Friday | StartTime | 5 |
| 2 | 1 | Friday | EndTime | 3 |
+----+------------+---------+-----------+------+
select wd.Employee, ws.StartTime, ws.EndTime
from WorkingDays wd
join WorkingSchedule ws on ws.Id = case datename(weekday, getdate())
when 'Monday' then ws.Monday
when 'Tuesday' then ws.Tuesday
when 'Wednesday' then ws.Wednesday
when 'Thursday' then ws.Thursday
when 'Friday' then ws.Friday
else 0
end
Hint: datename(weekday, getdate()) returns you the weekday name in your current locale! This might be better:
select wd.Employee, ws.StartTime, ws.EndTime
from WorkingDays wd
join WorkingSchedule ws on ws.Id = case datepart(weekday, getdate())
when 1 then wd.Monday
when 2 then wd.Tuesday
when 3 then wd.Wednesday
when 4 then wd.Thursday
when 5 then wd.Friday
else 0
end
But then you have to check which day is the first of week (0, 1), depending on your settings.