sql sum by month and year - sql

I have tried by writing a correlated query but fails to result in output....I am trying using only joins now
SELECT sum(t1.acc)
,t1.mon
,t1.yr
FROM gl t1
WHERE mon IN (
SELECT mon
FROM gl t2
WHERE t2.mon <= t1.mon
AND t2.yr = t1.yr
)
GROUP BY t1.mon
,t1.yr

You should join your data with itself and apply the sum condition.
drop table if exists #input_data
select * into #input_data
from
(
select 1 as acc, 5 as mon, 2020 as yr union all
select 2 as acc, 6 as mon, 2020 as yr union all
select 3 as acc, 4 as mon, 2021 as yr union all
select 4 as acc, 3 as mon, 2021 as yr
) d
select
sum(e0.acc) as acc_sum,
d.mon,
d.yr
from
#input_data d
join #input_data e0
on e0.yr = d.yr and e0.mon <= d.mon
group by
d.yr, d.mon
order by
d.yr, d.mon
If you have multiple records for same mon/yr then you have to add additional grouping.
drop table if exists #input_data
select * into #input_data
from
(
select 1 as acc, 5 as mon, 2020 as yr union all
select 2 as acc, 6 as mon, 2020 as yr union all
select 3 as acc, 4 as mon, 2021 as yr union all
select 4 as acc, 3 as mon, 2021 as yr union all
select 0 as acc, 5 as mon, 2020 as yr
) d
;with cte_source as
(
select
sum(acc) as acc,
mon,
yr
from #input_data d
group by d.yr, d.mon
)
select
sum(e0.acc) as acc_sum,
d.mon,
d.yr
from
cte_source d
join cte_source e0
on e0.yr = d.yr and e0.mon <= d.mon
group by
d.yr, d.mon
order by
d.yr, d.mon

You seem to want a cumulative sum within a year:
select year, mon, sum(acc) over (partition by year order by mon)
from t
order by year, acc;
Getting the data back in the original order is a bit tricky but just requires an explicit order by.

Related

SQL query group by with null values is returning duplicates

I have following query
My #dates table has following records:
month year saledate
9 2020 2020-09-01
10 2020 2020-10-01
11 2020 2020-11-01
with monthlysalesdata as(
select month(salesdate) as salemonth, year(salesdate) as saleyear,salesrepid, salespercentage
from salesrecords r
join #dates d on d.saledate = r.salesdate
group by salesrepid, salesdate),
averagefor3months as(
select 0 as salemonth, 0 as saleyear, salesrepid, salespercentage
from monthlysalesdata
group by salesrepid)
finallist as(
select * from monthlysalesdata
union
select * from averagefor3months
This query returns following records which gives duplicate for a averagefor3months result set when there is null record in the first monthlyresultdata. how to achieve average for 3 months as one record instead of having duplicates?
salesrepid salemonth saleyear percentage
232 0 0 null -------------this is the duplicate record
232 0 0 90
232 9 2020 80
232 10 2020 null
232 11 2020 100
My first cte has this result:
salerepid month year percentage
---------------------------------------------
232 9 2020 80
232 10 2020 null
232 11 2020 100
My second cte has this result:
salerepid month year percentage
---------------------------------------------
232 0 0 null
232 0 0 90
How to avoid the duplicate record in my second cte,
I suspect that you want a summary row per sales rep based on some aggregation. Your question is not clear on what is needed for the aggregation, but something like this:
with ym as (
select r.salesrepid, d.year, d.month, sum(<something>) as whatever
from salesrecords r join
#dates d
on d.saledate = r.salesdate
group by r.salesrepid, d.year, d.month
)
select ym.*
from ym
union all
select salesrepid, null, null, avg(whatever)
from hm
group by salesrepid;
I updated to selected the group by from the table directly instead of the previous cte and got my results. Thank you all for helping
with ym as (
select r.salesrepid, d.year, d.month, sum(<something>) as whatever
from salesrecords r join
#dates d
on d.saledate = r.salesdate
group by r.salesrepid, d.year, d.month
),
threemonthsaverage as(
select r.salesrepid, r.year, r.month, sum(something) as whatever
from salesrecords as r
group by salesrepid)
select ym *
union
select threemonthsaverage*

Left Join returns NULL on matching data

I perform simple LEFT JOIN between two tables:
A:
YR QTR MTH DAY DEPT SALES
2017 2 04 2017-04-01 B xxxxxx
2017 1 03 2017-03-31 A xxxxxxxx
2017 1 03 2017-03-31 B xxxxx
2017 1 03 2017-03-30 A xxxx
Second table (B) I use to bring QTR_ALT number
YEAR MONTH QTR QTR_ALT
2016 12 4 12
2017 01 1 12
2017 02 1 12
2017 03 1 11
2017 04 2 11
Following LEFT JOIN B ON A.YR = B.YEAR AND A.QTR = B.QTR AND A.MTH=B.MONTH returns NULL for QTR_ALT for A.DAY BETWEEN '2016-12-01' AND '2017-03-31'
YR QTR QTR_ALT MTH DAY DEPT SALES
2017 2 11 04 2017-04-02 A xxxxxx
2017 2 11 04 2017-04-01 A xxxxxx
2017 2 11 04 2017-04-01 B xxxxxx
2017 1 NULL 03 2017-03-31 A xxxxxxxx
2017 1 NULL 03 2017-03-31 B xxxxx
2017 1 NULL 03 2017-03-30 A xxxx
I tried moving WHERE condition to JOIN but no luck. How is it possible these dates don't get join even though corresponding record exists in table B?
Full code:
SELECT YEAR(A.DAY) as YR,
QUARTER(A.DAY) as QTR,
B.QTR_ALT,
(REPEAT(0, 2-LENGTH(MONTH(A.DAY))) || MONTH(A.DAY)) MTH,
A.DAY,
A.DEPT,
SUM(A.VAL) as SALES
FROM A
LEFT JOIN (SELECT TO_CHAR(ADD_MONTHS(a.DT, - b.Y), 'YYYY') as YEAR,
TO_CHAR(ADD_MONTHS(a.DT, - b.Y), 'MM') as MONTH,
CEIL(TO_NUMBER(TO_CHAR(add_months(a.dt, -b.y), 'MM')) / 3) as QTR,
CEIL(b.y/3) as QTR_ALT
FROM (SELECT TRUNC(CURRENT_DATE, 'MONTH') as DT) a
CROSS JOIN (SELECT SEQ8()+1 as Y FROM TABLE(GENERATOR(ROWCOUNT => 36)) ORDER BY 1) b
ORDER BY YEAR, MONTH) B
ON QUARTER(A.DAY) = B.QTR
AND (REPEAT(0, 2-LENGTH(MONTH(A.DAY))) || MONTH(A.DAY)) = B.MONTH
WHERE (YEAR(A.DAY) = B.YEAR)
AND (A.DAY BETWEEN '2016-12-01' AND '2017-03-31')
AND A.DEPT in ('A', 'B')
GROUP BY A.DAY, YEAR(A.DAY),QUARTER(A.DAY),B.QTR_ALT,(REPEAT(0, 2-LENGTH(MONTH(A.DAY))) || MONTH(A.DAY)), DEPT
ORDER BY A.DAY DESC
CREATE TEMP TABLE A (yr number, qtr number, mth text, day date, dept text, sales number);
INSERT INTO A values (2017,2,'04','2017-04-01','B', 10),
(2017,1,'03','2017-03-31','A', 11),
(2017,1,'03','2017-03-31','B', 20),
(2017,2,'03','2017-03-30','A', 6);
WITH
sub_b AS (
SELECT
TRUNC(CURRENT_DATE, 'MONTH') AS dt,
SEQ8() AS s,
ROW_NUMBER() OVER (ORDER BY s) AS y,
ADD_MONTHS(dt, - y) as tmp_date,
TO_CHAR(tmp_date, 'YYYY') AS year,
--TO_CHAR(tmp_date, 'MM') AS month, -- NOT USED
--QUARTER(tmp_date) as QTR, -- NOT USED
CEIL(y/3) as qtr_alt -- this value seems broken
FROM TABLE(GENERATOR(ROWCOUNT => 36))
)
SELECT a.yr,
a.qtr,
b.qtr_alt,
a.mth,
a.day,
a.dept,
SUM(a.sales) AS sales
FROM a
LEFT JOIN sub_b AS b
ON a.yr = b.year AND date_trunc('month',a.day) = b.tmp_date
WHERE a.day BETWEEN '2016-12-01' AND '2017-03-31'
AND a.dept in ('A', 'B')
GROUP BY 1,2,3,4,5,6
ORDER BY a.day DESC;
seems to work as given how I read your code/intent.

Selecting min count('x') by year

I'm trying to create a table that displays the song(s) with the minimum number of plays by year.
For instance if Song1 and Song2 both only had 1 play and Song3 had 2 plays in 2018 and Song1 had 1 play in 2017 and Song2 had 2 plays in 2017, I want a table that would return 3 rows:
Song1 - 2018 - 1 play Song2 - 2018 - 1 play
Song1 - 2017 - 1 play
Is there a way to display the songs where min(count('x')) = count('x').
I'm sure that isn't the proper syntax but it's essentially what I'm trying to find.
SELECT * FROM music
NATURAL JOIN (SELECT extract(year from date) AS yr, song_code, COUNT('x')
FROM singles NATURAL JOIN plays
GROUP BY extract(year from date), song_code
ORDER BY yr desc, COUNT('x') desc);
Currently I have the songs grouped by number of plays a year, but I'm not sure how to only show those that have played the minimum amount of times.
-- You can prefer using analytic functions such as dense_rank() rather than joins or in-subqueries.
with songs( id, year, play_id ) as
(
select 1, 2018, 1 from dual union all
select 2, 2018, 1 from dual union all
select 3, 2018, 1 from dual union all
select 3, 2018, 2 from dual union all
select 1, 2017, 1 from dual union all
select 2, 2017, 1 from dual union all
select 2, 2017, 2 from dual
)
select id, year, play_cnt
from
(select s.*, dense_rank() over (partition by year order by play_cnt) dr
from
(select id, year, count(play_id) as play_cnt
from songs s
group by id, year
) s
)
where dr = 1;
ID YEAR PLAY_CNT
---------- ---------- ----------
1 2017 1
2 2018 1
1 2018 1
This should work. This is also a good example of why we should use WITH (subquery factoring) instead of an inline view. creating a song_count subquery factoring with year, song_code and count.
Also, I would explicitly define the joins for clarity
WITH song_count as
SELECT extract(year from date) AS song_year, song_code, count(*) as play_count
FROM singles NATURAL JOIN plays
group by extract(year from date),song_code
select * from song_count
where
(song_year,play_count) in (select song_year,min(play_count) from song_count group by song_year)
You can try with row_number():
SELECT *
FROM music
NATURAL JOIN
(select yr, song_code, play_count
from
(SELECT extract(year from date) AS yr, song_code, COUNT('x') play_count, row_number() over (partition by extract(year from date), song_code order by COUNT('x')) rn
FROM singles NATURAL JOIN plays
GROUP BY extract(year from date), song_code
)
where rn = 1;

Getting Average based on 2 conditions (columns and rows)

My data is looking like this:
PRODUCT DEPT DATE PERCENTAGE
1 A JAN 2
1 B FEB 4
1 A MAR 1
1 B JAN 5
1 A FEB 3
1 B MAR 7
1 A JAN 3
1 B FEB 4
1 A MAR 2
1 B JAN 8
1 A FEB 9
1 B MAR 6
... ... ... ...
With thousands of different products and dozens of departments.
The calculation I have to go through is:
1 - Sum the percentages as follow: by product, dept and date (so Product 1 / DEPT A / JAN => SUM(PERCENTAGE). For each PRODUCT, DEPT and DATE.
2 - When I have my sums, get the average of the 3 months for each product and dept (product 1 dept A: JAN / FEB / MAR, and so on)
3 - Get the max average (for each product, which dept has the highest average).
I have something which works but it's so long I am sure I can learn and make something better:
Select
Verylong_q.TFC,
Round(MAX(verylong_q.average),2) AS HIGHEST_AVERAGE
FROM
(
SELECT
Long_Q.TFC,
Long_Q.DEPT,
Long_Q.Percentage1,
Long_Q.Percentage2,
Long_Q.Percentage3,
((Percentage1 + Percentage2 + Percentage3)/3) AS Average
FROM
(
SELECT
t_Month1.TFC,
t_Month1.DEPT,
t_Month1.Percentage1,
t_Month2.Percentage2,
t_Month3.Percentage3
From
(
Select
pos.TFC,
mv.Dept AS Sector,
sum(pos.percentage) AS Percentage3
FROM
TBO_POS pos,
TBL_MV mv
Where
pos.IV_ID = mv.IV_ID
and Date = […]
and TFC in […]
group by pos.TFC, mv.Dept, pos.Date
order by 1 DESC ) t_Month1
LEFT JOIN
(
Select
pos.TFC,
mv.Dept AS Sector,
sum(pos.percentage) AS Percentage2
FROM
TBO_POS pos,
TBL_MV mv
Where
pos.IV_ID = mv.IV_ID
and Date = […]
and TFC in […]
group by pos.TFC, mv.Dept, pos.Date
order by 1 DESC ) t_Month2
On t_month1.DEPT = t_month2.DEPT and t_month1.TFC = t_month2.TFC
LEFT JOIN
(
Select
pos.TFC,
mv.Dept AS Sector,
sum(pos.percentage) AS Percentage3
FROM
TBO_POS pos,
TBL_MV mv
Where
pos.IV_ID = mv.IV_ID
and Date = […]
and TFC in […]
group by pos.TFC, mv.Dept, pos.Date
order by 1 DESC ) t_Month3
on t_month1.DEPT = t_month3.DEPT and t_month1.TFC = t_month3.TFC
) Long_Q
) VeryLong_Q
Group by verylong_q.TFC
How could I do this in a better way? Thanks!
Isn't that simply:
Sum the percentages by product, dept and date in the innermost subquery
Get the average of the months for each product and dept in the next subquery
Get the max average for each product in the main query.
Query:
select product, max(avg_sum_percentage)
from
(
select product, dept, avg(sum_percentage) as avg_sum_percentage
from
(
select product, dept, date, sum(percentage) as sum_percentage
from mytable
group by product, dept, date
) per_product_dept_date
group by product, dept
) per_product_dept
group by product;
From what you describer lag() seems like the appropriate method, along with aggregation and selection of the best:
select *
from (select product, dept, (sump_1 + sump_2 + sump_3) /3 as avg_max,
row_number() over (partition by product order by (sump_1 + sump_2 + sump_3) /3 desc) as seqnum
from (select product, dept, date, sum(percentage) as sump,
lag(sum(percentage)) over (partition by product, dept order by date) as sump_1,
lag(sum(percentage, 2)) over (partition by product, dept order by date) as sump_2
from TBO_POS pos join
TBL_MV mv
on pos.IV_ID = mv.IV_ID
where Date = […] and TFC in […]
group by product, dept, date
) t
) t
where seqnum = 1;
This solution follows the description of the problem. It produces one row for each month and product. This version does not take into account missing values and other issues. I think this is the logic you want, but without expected results the question might be ambiguous.

pivot tables in sql server

Have a table data as:
d1 d2 MON REPORT_DATE
67 46 Dec 2014-12-19 06:19:05.337
69 46 Dec 2014-12-22 06:21:47.430
67 85 Jan 2015-01-23 06:08:09.030
I need a result set as
DEC JAN
D1 69 67
D2 46 85
So far
SELECT *
FROM (SELECT PerValueStreamOnAutoDiags,
PerProdSupportOnAutoDiags,
LEFT(Datename(Month, ReportDate), 3)[Month],
ReportDate
FROM Temp_AutoApprovalMembercnt)AS s
PIVOT ( Max(ReportDate)
FOR [month] IN (dec,
jan) )AS p
This should work:
;with cte as
(select * from
(select PerValueStreamOnAutoDiags,
PerProdSupportOnAutoDiags,
LEFT(Datename(Month, ReportDate), 3)[Month]
from Temp_AutoApprovalMembercnt) as s
unpivot
(val
for cols in (PerValueStreamOnAutoDiags,PerValueStreamOnAutoDiags)) as u)
select * from
(select * from cte) as s
pivot
(max(val)
for [Month] in (DEC,JAN)) as p
Demo
First unpivot the data to bring PerValueStreamOnAutoDiags and PerProdSupportOnAutoDiags into rows from columns, and then pivot to get the data summarised by the month.
You need to pivot the data and reaggregate it, because you are swapping rows and columns. Here is a method using aggregation and union all:
select 'd1' as col, max(case when mon = 'Dec' then d1 end) as dec,
max(case when mon = 'Jan' then d1 end) as Jan
from Temp_AutoApprovalMembercnt
union all
select 'd2' as col, max(case when mon = 'Dec' then d2 end) as dec,
max(case when mon = 'Jan' then d2 end) as Jan
from Temp_AutoApprovalMembercnt;