SQL SUMs in where clause with conditionals - sql

I want to get a "totals" report for business XYZ. They want the season,term,distinct count of employees, and total employee's dropped hours, only when dropped hours of anemployee != any adds that equal the drops.
trying to do something like this:
select year,
season,
(select count(distinct empID)
from tableA
where a.season = season
and a.year = year) "Employees",
(select sum(hours)
from(
select distinct year,season,empID,hours
from tableA
where code like 'Drop%'
)
where a.season = season
and a.year = year) "Dropped"
from tableA a
-- need help below
where (select sum(hours)
from(
select distinct year,season,empID,hours
from tableA
where code like 'Drop%'
)
where a.season = season
and a.year = year
and a.emplID = emplID)
!=
(select sum(hours)
from(
select distinct year,season,empID,hours
from tableA
where code like 'Add%'
)
where a.season = season
and a.year = year
and a.emplID = emplID)
group by year,season
It appears I am not correctly doing my where clause correctly. I dont believe I am joining the emplID to each emplID correctly to exlude those whos "drops" <> "adds"
EDIT:
sample data:
year,season,EmplID,hours,code
2015, FALL, 001,10,Drop
20150 FALL, 001,10,Add
2015,FALL,002,5,Drop
2015,FALL,003,10,Drop
The total hours should be 15. EmplyID 001 should be removed from the totaling because he has drops that are exactly equal to adds.

I managed to work it out with a bit of analytics .. ;)
with tableA as (
select 2015 year, 1 season, 1234 empID, 2 hours , 'Add' code from dual union all
select 2015 year, 1 season, 1234 empID, 3 hours , 'Add' code from dual union all
select 2015 year, 1 season, 1234 empID, 4 hours , 'Add' code from dual union all
select 2015 year, 1 season, 1234 empID, 2 hours , 'Drop' code from dual union all
select 2015 year, 1 season, 2345 empID, 5 hours , 'Add' code from dual union all
select 2015 year, 1 season, 2345 empID, 3.5 hours, 'Add' code from dual union all
select 2015 year, 2 season, 1234 empID, 7 hours , 'Add' code from dual union all
select 2015 year, 2 season, 1234 empID, 5 hours , 'Add' code from dual union all
select 2015 year, 2 season, 2345 empID, 5 hours , 'Add' code from dual union all
select 2015 year, 2 season, 7890 empID, 3 hours , 'Add' code from dual union all
select 2014 year, 1 season, 1234 empID, 1 hours , 'Add' code from dual union all
select 2014 year, 1 season, 1234 empID, 2 hours , 'Add' code from dual union all
select 2014 year, 1 season, 1234 empID, 4 hours , 'Add' code from dual
),
w_group as (
select year, season, empID, hours, code,
lead(hours) over (partition by year, season, empID, hours
order by case when code like 'Drop%' then 'DROP'
when code like 'Add%' then 'ADD'
else NULL end ) new_hours
from tableA
)
select year, season, count(distinct empID),
sum(hours-nvl(new_hours,0)) total_hours
from w_group
where code like 'Add%'
group by year, season
/
YEAR SEASON COUNT(DISTINCTEMPID) TOTAL_HOURS
---------- ---------- -------------------- -----------
2015 1 2 15.5
2014 1 1 7
2015 2 3 20
(the first part "with tableA" is just faking some data, since you didn't provide any) :)
[edit]
corrected based on your data, and your explanation - in short, you're counting the DROPs, (minus the ADDs), I was doing the reverse
[edit2] replaced below query with minor tweak based on comment/feedback: don't count an empID if their DROP-ADD zero out)
with tableA as (
select 2015 year, 'FALL' season, '001' empID, 10 hours, 'Drop' code from dual union all
select 2015 year, 'FALL' season, '001' empID, 10 hours, 'Add' code from dual union all
select 2015 year, 'FALL' season, '002' empID, 5 hours, 'Drop' code from dual union all
select 2015 year, 'FALL' season, '003' empID, 10 hours, 'Drop' code from dual
),
w_group as (
select year, season, empID, hours, code,
lag(hours) over (partition by year, season, empID, hours
order by case when code like 'Drop%' then 'DROP'
when code like 'Add%' then 'ADD'
else NULL end ) new_hours
from tableA
)
select year, season, count(distinct empID),
sum(hours-nvl(new_hours,0)) total_hours
from w_group
where code like 'Drop%'
and hours - nvl(new_hours,0) > 0
group by year, season
/
YEAR SEAS COUNT(DISTINCTEMPID) TOTAL_HOURS
---------- ---- -------------------- -----------
2015 FALL 2 15
[/edit]

I think you can do what you want with just conditional aggregation. Something like this:
select year, season, count(distinct empID) as Employees,
sum(case when code like 'Drop%' then hours end) as Dropped
from tableA
group by year, season;
It is hard to tell exactly what you want, because you do not have sample data and desired results (or better yet, a SQL Fiddle). You might also want a having clause:
having (sum(case when code like 'Drop%' then hours end) <>
sum(case when code like 'Add%' then hours end)
)

Are you wanting the result of something like this?
SELECT
year
,season
,COUNT(DISTINCT empID) AS Employees
,SUM(CASE WHEN code LIKE 'Drop%' THEN hours ELSE 0 END) AS Dropped
FROM
TableA
GROUP BY
year
,season
HAVING
(
SUM(CASE WHEN code LIKE 'Drop%' THEN hours ELSE 0 END)
- SUM(CASE WHEN code LIKE 'Add%' THEN hours ELSE 0 END)
) <> 0

Related

What to use in place of union in above query i wrote or more optimize query then my given query without union and union all

I am counting the birthdays , sales , order in all 12 months from customers table in SQL server like these
In Customers table birth_date ,sale_date, order_date are columns of the table
select 1 as ranking,'Birthdays' as Type,[MONTH],TOTAL
from ( select DATENAME(month, birth_date) AS [MONTH],count(*) TOTAL
from customers
group by DATENAME(month, birth_date)
)x
union
select 2 as ranking,'sales' as Type,[MONTH],TOTAL
from ( select DATENAME(month, sale_date) AS [MONTH],count(*) TOTAL
from customers
group by DATENAME(month, sale_date)
)x
union
select 3 as ranking,'Orders' as Type,[MONTH],TOTAL
from ( select DATENAME(month, order_date) AS [MONTH],count(*) TOTAL
from customers
group by DATENAME(month, order_date)
)x
And the output is like these(just dummy data)
ranking
Type
MONTH
TOTAL
1
Birthdays
January
12
1
Birthdays
April
6
1
Birthdays
May
10
2
Sales
Febrary
8
2
Sales
April
14
2
Sales
May
10
3
Orders
June
4
3
Orders
July
3
3
Orders
October
6
3
Orders
December
17
I want to find count of these all these three types without using UNION and UNION ALL, means I want these data by single query statement (or more optimize version of these query)
Another approach is to create a CTE with all available ranking values ​​and use CROSS APPLY for it, as shown below.
WITH ranks(ranking) AS (
SELECT * FROM (VALUES (1), (2), (3)) v(r)
)
SELECT
r.ranking,
CASE WHEN r.ranking = 1 THEN 'Birthdays'
WHEN r.ranking = 2 THEN 'Sales'
WHEN r.ranking = 3 THEN 'Orders'
END AS Type,
DATENAME(month, CASE WHEN r.ranking = 1 THEN c.birth_date
WHEN r.ranking = 2 THEN c.sale_date
WHEN r.ranking = 3 THEN c.order_date
END) AS MONTH,
COUNT(*) AS TOTAL
FROM customers c
CROSS APPLY ranks r
GROUP BY r.ranking,
DATENAME(month, CASE WHEN r.ranking = 1 THEN c.birth_date
WHEN r.ranking = 2 THEN c.sale_date
WHEN r.ranking = 3 THEN c.order_date
END)
ORDER BY r.ranking, MONTH

Subtracting the revenue value for the earliest date minus the latest date in redshift sql

account name
year
revenue
abc
2006
1000
abc
2007
2000
abc
2008
5000
Hello everyone,
So I am trying to find a way to subtract the revenue for the latest year for a given account name to the earliest year found in a dataset.
For example in the above table
the latest year for abc -> 2008
the earliest year for abc -> 2006,
I can't hardcode the years in the code, I don't know what the years would be.
So, I want to get something like this
account name
subtracted revenue
abc
4000
I wish I could share some code but I have no idea how to proceed. I was thinking of using windowing function, but don't know how to apply it in this scenario.
Here is something that I tried, just check if this is any helpful
insert into revenue
select * from
(select 'abc' as accountname, 2006 as year, 1000 as revenue union
select 'abc' as accountname, 2007 as year, 2000 as revenue union
select 'abc' as accountname, 2008 as year, 5000 as revenue union
select 'def' as accountname, 2004 as year, 1000 as revenue union
select 'def' as accountname, 2006 as year, 3000 as revenue union
select 'xyz' as accountname, 2005 as year, 5000 as revenue
) as a
select accountname
, sum(case when yearname ='maxyear' then revenue else 0 end) - sum(case when yearname ='minyear' then revenue else 0 end) subtractedrevenue
from
(select *
, case when min(year)over(partition by accountname order by accountname) = max(year)over(partition by accountname order by accountname) then 'maxyear'
when year = min(year)over(partition by accountname order by accountname) then 'minyear'
when year= max(year)over(partition by accountname order by accountname) then 'maxyear'
else '' end yearname
from revenue
) as a
where yearname<>''
group by accountname```
You can use window functions:
select account_name,
sum(case when seqnum_desc = 1 then revenue else - revenue end)
from (select t.*,
row_number() over (partition by account_name order by year) as seqnum_asc,
row_number() over (partition by account_name order by year desc) as seqnum_desc
from t
) t
where 1 in (seqnum_asc, seqnum_desc)
group by account_name;
Here is a db<>fiddle.

Selecting min count('x') by year

I'm trying to create a table that displays the song(s) with the minimum number of plays by year.
For instance if Song1 and Song2 both only had 1 play and Song3 had 2 plays in 2018 and Song1 had 1 play in 2017 and Song2 had 2 plays in 2017, I want a table that would return 3 rows:
Song1 - 2018 - 1 play Song2 - 2018 - 1 play
Song1 - 2017 - 1 play
Is there a way to display the songs where min(count('x')) = count('x').
I'm sure that isn't the proper syntax but it's essentially what I'm trying to find.
SELECT * FROM music
NATURAL JOIN (SELECT extract(year from date) AS yr, song_code, COUNT('x')
FROM singles NATURAL JOIN plays
GROUP BY extract(year from date), song_code
ORDER BY yr desc, COUNT('x') desc);
Currently I have the songs grouped by number of plays a year, but I'm not sure how to only show those that have played the minimum amount of times.
-- You can prefer using analytic functions such as dense_rank() rather than joins or in-subqueries.
with songs( id, year, play_id ) as
(
select 1, 2018, 1 from dual union all
select 2, 2018, 1 from dual union all
select 3, 2018, 1 from dual union all
select 3, 2018, 2 from dual union all
select 1, 2017, 1 from dual union all
select 2, 2017, 1 from dual union all
select 2, 2017, 2 from dual
)
select id, year, play_cnt
from
(select s.*, dense_rank() over (partition by year order by play_cnt) dr
from
(select id, year, count(play_id) as play_cnt
from songs s
group by id, year
) s
)
where dr = 1;
ID YEAR PLAY_CNT
---------- ---------- ----------
1 2017 1
2 2018 1
1 2018 1
This should work. This is also a good example of why we should use WITH (subquery factoring) instead of an inline view. creating a song_count subquery factoring with year, song_code and count.
Also, I would explicitly define the joins for clarity
WITH song_count as
SELECT extract(year from date) AS song_year, song_code, count(*) as play_count
FROM singles NATURAL JOIN plays
group by extract(year from date),song_code
select * from song_count
where
(song_year,play_count) in (select song_year,min(play_count) from song_count group by song_year)
You can try with row_number():
SELECT *
FROM music
NATURAL JOIN
(select yr, song_code, play_count
from
(SELECT extract(year from date) AS yr, song_code, COUNT('x') play_count, row_number() over (partition by extract(year from date), song_code order by COUNT('x')) rn
FROM singles NATURAL JOIN plays
GROUP BY extract(year from date), song_code
)
where rn = 1;

Moving average of 2 columns

Hello I have a problem. I know how to calculate moving average last 3 months using oracle analytic functions... but my situatiion is a little different
Month-----ProductType-----Sales----------Average(HAVE TO FIND THIS)
1---------A---------------10
1---------B---------------12
1---------C---------------17
2---------A---------------21
3---------C---------------2
3---------B---------------21
4---------B---------------23
5
6
7
8
9
So we have sales for each month and each product type... I need to calculate the moving average of the last 3 months and the particular product.
example:
For month 4 and Produt B it would be (21+0+12)/3
Any ideas ?
Another option is to use the windowing clause of analytic functions
with my_data as (
select 1 as month, 'A' as product, 10 as sales from dual union all
select 1 as month, 'B' as product, 12 as sales from dual union all
select 1 as month, 'C' as product, 17 as sales from dual union all
select 2 as month, 'A' as product, 21 as sales from dual union all
select 3 as month, 'C' as product, 2 as sales from dual union all
select 3 as month, 'B' as product, 21 as sales from dual union all
select 4 as month, 'B' as product, 23 as sales from dual
)
select
month,
product,
sales,
nvl(sum(sales)
over (partition by product order by month
range between 3 preceding and 1 preceding),0)/3 as average_sales
from my_data
order by month, product
SELECT month,
productType,
sales,
(lag(sales, 3) over (partition by produtType order by month) +
lag(sales, 2) over (partition by productType order by month) +
lag(sales, 1) over (partition by productType order by month)/3 moving_avg
FROM your_table_name

Combining Union results

I have the below SQL Query
select Count(emailID) as ViewsThatMonth,
Day(entry_date) as day,
Month(entry_date) as month,
Year(entry_date) as year
from email_views
where emailID = 110197
Group By Day(entry_date), Month(entry_date), Year(entry_date)
UNION ALL
select Count(emailID) as ViewsThatMonth,
Day(Record_Entry) as day,
Month(Record_Entry) as month,
Year(Record_Entry) as year
from dbo.tblOnlineEmail_Views
where emailID = 110197
Group By Day(Record_Entry), Month(Record_Entry), Year(Record_Entry)
order by 4, 3, 2
The results are showing as below. I need the results on the same date to be combined. I.e. the total for the 23/8/2010 should be 800.
ViewsThatMonth day month year
---------------------------------
799 23 8 2010
1 23 8 2010
281 24 8 2010
88 25 8 2010
1 25 8 2010
You only need to group by once:
SELECT Count(emailID) as ViewsThatMonth,
Day(entry_date) as day,
Month(entry_date) as month,
Year(entry_date) as year
from(
select emailID, Record_Entry AS entry_date
from email_views
where emailID = 110197
UNION ALL
select emailID, entry_date
from dbo.tblOnlineEmail_Views
where emailID = 110197
) AS t
Group By Day(entry_date), Month(entry_date), Year(entry_date)
order by 4, 3, 2
Basically the easiest way is to make your union a derived table or CTE and then group them by date.
IE.
select
sum(dt.ViewsThatMonth) as ViewsThatMonth
,dt.[day]
,dt.[month]
,dt.[year]
from
(select Count(emailID) as ViewsThatMonth, Day(entry_date) as day, Month(entry_date) as month, Year(entry_date) as year from email_views
where emailID = 110197
Group By Day(entry_date), Month(entry_date), Year(entry_date)
UNION ALL
select Count(Record_Entry) as ViewsThatMonth, Day(Record_Entry) as day, Month(Record_Entry) as month, Year(Record_Entry) as year from dbo.tblOnlineEmail_Views
where emailID = 110197
Group By Day(Record_Entry), Month(Record_Entry), Year(Record_Entry)
) dt
group by [day], [month], [year]
order by dt.[year], dt.[month], dt.[day]
Keeping UNIONed code to a minimum:
select Count(emailID) as ViewsThatMonth,
Day(sort_date) as day,
Month(sort_date) as month,
Year(sort_date) as year
from (select v.*,
case c.caseid when 1 then entry_date else record_entry end sort_date
from email_views v
cross join (select 1 caseid union all select 2 caseid) c
where v.emailID = 110197) sq
Group By Day(sort_date), Month(sort_date), Year(sort_date)
EDIT: Added alias to subquery