Calculating missing months - sql

I use the following query in ordre to filling missing months
Declare #Sample Table(year int, month int,product as nvarchar(50), qty_ytd int);
Insert #Sample(year, month, qty_ytd) Values
(2017, 01,'book', 20),
(2017, 02, 'pc', 30),
(2018, 01, 'book', 50);
;With Months As
(Select 1 As month
Union All
Select month + 1 From Months Where month < 12)
, YearsAndMonths As
(Select distinct year,m.month from #Sample cross join Months m)
select ym.*, coalesce(s.qty_ytd, s2.qty_ytd) qty_ytd, coalesce(s.qty_ytd, 0) QTY from YearsAndMonths ym
left join #sample s on ym.year = s.year and ym.month = s.month
left join (select qty_ytd, year,
row_number() over (partition by year order by month desc) rn
from #Sample) s2 on ym.year = s2.year and rn = 1
How could I add 'product ' as well ?

Firstly, I would recommend creating a calendar table since this pops up as a use case every once in a while. A quick example can be found here
Now, once you have the calendar table (let's call it static.calendar) ready, the code is fairly simple as follows:
with Products
as
(
SELECT distinct product
FROM #Sample
),
TimeRange
as
(
SELECT DISTINCT year,
month
FROM static.calendar
)
ProductTimeRange
as
(
SELECT p.products,
tr.year,
tr.month
FROM Products as p
CROSS JOIN TimeRange as tr
)
SELECT ptr.products,
ptr.year,
ptr.month,
s.qty_ytd
FROM ProductTimeRange as ptr
LEFT JOIN #sample as s
ON ptr.products = s.products
AND ptr.year = s.year
AND ptr.month = s.month
ORDER BY ptr.products,
ptr.year,
ptr.month

Use a cross join to generate the rows that you want -- all the years, months, and products.
Then use left join to bring in the data you want:
With Months As (
Select 1 As month
Union All
Select month + 1
From Months
Where month < 12
)
select y.year, m.month, s.product, coalesce(qty_ytd, 0) as qty_ytd
from (select distinct year from #sample) y cross join
months m cross join
(select distinct product from #sample) p left join
#sample s
on s.year = y.year and s.month = m.month and s.product = p.product;
Here is a db<>fiddle.

Related

How to calculate value based on average of previous month and average of same month last year in SQL

I would like to calculate targets for opened rates and clicked rates based on actuals of the last month and the same month last year.
My table is aggregated at daily level and I have grouped it by month and year to get the monthly averages. I have then created a self-join to join my current dates on the results of the previous months. This works fine for all months except for January because SQL can't know that it's supposed to join 1 on 12. Is there a way to specify this in my join clause?
Essentially, the results for January 2021 shouldn't be null because I have December 2020 data.
This is my data and my query:
CREATE TABLE exasol_last_year_avg(
date_col date,
country text,
brand text,
category text,
delivered integer,
opened integer,
clicked integer
)
INSERT INTO exasol_last_year_avg
(date_col,country,brand,category,delivered,opened,clicked) VALUES
(2021-01-01,'AT','brand1','cat1',100,60,23),
(2021-01-01,'AT','brand1','cat2',200,50,45),
(2021-01-01,'AT','brand2','cat1',300,49,35),
(2021-01-01,'AT','brand2','cat2',400,79,57),
(2021-02-02,'AT','brand1','cat1',130,78,30),
(2021-02-02,'AT','brand1','cat2',260,65,59),
(2021-02-02,'AT','brand2','cat1',390,64,46),
(2021-02-02,'AT','brand2','cat2',520,103,74),
(2020-12-02,'AT','brand1','cat1',130,78,30),
(2020-12-02,'AT','brand1','cat2',260,65,59),
(2020-12-02,'AT','brand2','cat1',390,64,46),
(2020-12-02,'AT','brand2','cat2',520,103,74),
(2020-02-02,'AT','brand1','cat2',236,59,53),
(2020-02-02,'AT','brand2','cat1',355,58,41),
(2020-02-02,'AT','brand2','cat2',473,93,67),
(2020-02-02,'AT','brand1','cat1',118,71,27)
This is written in PostgresSQL because I think it's more accessible to most people, but my production database is Exasol!
select *
from
(Select month_col,
year_col,
t_campaign_cmcategory,
t_country,
t_brand,
(t2_clicktoopenrate + t3_clicktoopenrate)/2 as target_clicktoopenrate,
(t2_openrate + t3_openrate)/2 as target_openrate
from (
with CTE as (
select extract(month from date_col) as month_col,
extract(year from date_col) as year_col,
category as t_campaign_cmcategory,
country as t_country,
brand as t_brand,
round(sum(opened)/nullif(sum(delivered),0),3) as OpenRate,
round(sum(clicked)/nullif(sum(opened),0),3) as ClickToOpenRate
from public.exasol_last_year_avg
group by 1, 2, 3, 4, 5)
select t1.month_col,
t1.year_col,
t2.month_col as t2_month_col,
t2.year_col as t2_year_col,
t3.month_col as t3_month_col,
t3.year_col as t3_year_col,
t1.t_campaign_cmcategory,
t1.t_country,
t1.t_brand,
t1.OpenRate,
t1.ClickToOpenRate,
t2.OpenRate as t2_OpenRate,
t2.ClickToOpenRate as t2_ClickToOpenRate,
t3.OpenRate as t3_OpenRate,
t3.ClickToOpenRate as t3_ClickToOpenRate
from CTE t1
left join CTE t2
on t1.month_col = t2.month_col + 1
and t1.year_col = t2.year_col
and t1.t_campaign_cmcategory = t2.t_campaign_cmcategory
and t1.t_country = t2.t_country
and t1.t_brand = t2.t_brand
left join CTE t3
on t1.month_col = t3.month_col
and t1.year_col = t3.year_col + 1
and t1.t_campaign_cmcategory = t3.t_campaign_cmcategory
and t1.t_country = t3.t_country
and t1.t_brand = t3.t_brand) as target_base) as final_tbl
Start with an aggregation query:
select date_trunc('month', date_col), country, brand,
sum(opened) * 1.0 / nullif(sum(delivered), 0) as OpenRate,
sum(clicked) * 1.0 / nullif(sum(opened), 0) as ClickToOpenRate
from exasol_last_year_avg
group by 1, 2, 3;
Then, use window functions. Assuming you have a value for every month (with no gaps). you can just use lag(). I'm not sure what your final calculation is, but this brings in the data:
with mcb as (
select date_trunc('month', date_col) as yyyymm, country, brand,
sum(opened) * 1.0 / nullif(sum(delivered), 0) as OpenRate,
sum(clicked) * 1.0 / nullif(sum(opened), 0) as ClickToOpenRate
from exasol_last_year_avg
group by 1, 2, 3
)
select mcb.*,
lag(openrate, 1) over (partition by country, brand order by yyyymm) as prev_month_openrate,
lag(ClickToOpenRate, 1) over (partition by country, brand order by yyyymm) as prev_month_ClickToOpenRate,
lag(openrate, 12) over (partition by country, brand order by yyyymm) as prev_year_openrate,
lag(ClickToOpenRate, 12) over (partition by country, brand order by yyyymm) as prev_year_ClickToOpenRate
from mcb;
This works with a different join condition:
select *
from
(Select month_col,
year_col,
t_campaign_cmcategory,
t_country,
t_brand,
(t2_clicktoopenrate + t3_clicktoopenrate)/2 as target_clicktoopenrate,
(t2_openrate + t3_openrate)/2 as target_openrate
from (
with CTE as (
select extract(month from date_col) as month_col,
extract(year from date_col) as year_col,
category as t_campaign_cmcategory,
country as t_country,
brand as t_brand,
round(sum(opened)/nullif(sum(delivered),0),3) as OpenRate,
round(sum(clicked)/nullif(sum(opened),0),3) as ClickToOpenRate
from public.exasol_last_year_avg
group by 1, 2, 3, 4, 5)
select t1.month_col,
t1.year_col,
t2.month_col as t2_month_col,
t2.year_col as t2_year_col,
t3.month_col as t3_month_col,
t3.year_col as t3_year_col,
t1.t_campaign_cmcategory,
t1.t_country,
t1.t_brand,
t1.OpenRate,
t1.ClickToOpenRate,
t2.OpenRate as t2_OpenRate,
t2.ClickToOpenRate as t2_ClickToOpenRate,
t3.OpenRate as t3_OpenRate,
t3.ClickToOpenRate as t3_ClickToOpenRate
from CTE t1
left join CTE t2
-- adjusted join condition
on ((t1.month_col = (CASE WHEN t1.month_col = 1 then t2.month_col - 11 END) and t1.year_col = t2.year_col + 1)
or (t1.month_col = (CASE WHEN t1.month_col != 1 then t2.month_col + 1 END) and t1.year_col = t2.year_col))
and t1.t_campaign_cmcategory = t2.t_campaign_cmcategory
and t1.t_country = t2.t_country
and t1.t_brand = t2.t_brand
left join CTE t3
on t1.month_col = t3.month_col
and t1.year_col = t3.year_col + 1
and t1.t_campaign_cmcategory = t3.t_campaign_cmcategory
and t1.t_country = t3.t_country
and t1.t_brand = t3.t_brand) as target_base) as final_tbl

Group by join date + add month in where sql

I have the following table:
PERSON
ID
Name
date_created
date_left
What I want is a list of all months and the amount of users joined and the amount of users that left.
I already have the following query: it returns the amount of new users that joined in the month that I pass:
select MONTH(date_created) 'Month', YEAR(date_created) 'Year', count(*) as 'New Users'
from person p
where YEAR(date_created) = 2018 and MONTH( p.date_created) = 5
group by MONTH(date_created), YEAR(date_created)
It returns what I want:
How would I edit this to include a year report and add the column 'Users left' next to the 'new users' one?
My result would be:
MONTH YEAR NEW USERS USERS LEFT
1 2019 10 5
I would "unpivot" the data using cross apply:
select v.[year], v.[month], sum(v.iscreated) as num_created,
sum(v.isleft) as num_left
from person p cross apply
(values (year(p.date_created), month(p.date_created), 1, 0),
(year(p.date_left), month(p.date_left), 0, 1)
) v([year], [month], iscreated, isleft)
group by v.[year], v.[month]
order by v.[year], v.[month];
The straight-forward approach is probably to full outer join all entries and all leaves. SQL Server makes this a bit awkward by not featuring USING, so we must use ON and COALESCE on month and year instead.
select
coalesce(pin.year, pout.year) as year,
coalesce(pin.month, pout.month) as month,
coalesce(pin.cnt, 0) as count_in,
coalesce(pout.cnt, 0) as count_out
from
(
select year(date_created) as year, month(date_created) as month, count(*) as cnt
from person
group by year(date_created), month(date_created)
) pin
full outer join
(
select year(date_left) as year, month(date_left) as month, count(*) as cnt
from person
group by month(date_left), year(date_left)
) pout on pout.year = pin.year and pout.month = pin.month
order by year, month;
Maybe you could do it with a SubSelect? Tried it right now with ORACLE Syntax, I'm not sure if it works in SQL-Server.
SELECT * FROM
(
select MONTH(date_created) 'Month_C', YEAR(date_created) 'Year_C', count(*) as 'New Users'
from person p
where YEAR(date_created) = 2018 and MONTH( p.date_created) = 5
group by MONTH(date_created), YEAR(date_created)
)created_user,
(
select MONTH(date_left) 'Month_L', YEAR(date_left) 'Year_L', count(*) as 'New Users'
from person p
where YEAR(date_left) = 2018 and MONTH( p.date_left) = 5
group by MONTH(date_left), YEAR(date_left)
) left_user
where created_user.Year_C = left_user.Year_L
and created_user.Month_C = left_user.Month_L

How can I find DATEDIFF for records in the same field?

I have a reporting table that looks like this - BEFORE:
The FREQ_CALC is the number of months between EFFECTIVEDATE and EXPIRY_DATE, divided by the noMonths field, FREQ_CODE, after the M.
I need to get everything into this shape - AFTER.
I am trying to figure out how to calculate the 'FREQUENCY' as well as the fields in blue, green, and pink (pink is very easy). Basically, 'FREQ_CODE' has an 'M' character and after that I have months and days in a month. If noMonths is 3, I need to start mxDays with 90, and then find the difference in the number of days from the maturityDate field, so it's not the DATEDIFF() between two fields, but the DATEDIFF between increasing dates in the same field, grouped by Credit_Line_NO. So, the three cells in yellow start mxDays. Also, mxFactor is 1 when mxDays is 30 or 90, and it is 365/360, when mxDays is 365. Finally, the Calc is the mxDays * Amount. This is super-easy. I just can't figure out how to get the mxDays and mxFactor setup.
For additional clarity, 91 days = 6/30/2018 - 3/31/2018 and 92 days = 9/30/2018 - 6/30/2018. Also, 1.0111 = 91/90 and 1.0222 = 92/90. Similarly, 0.8111 = 73/90. Finally, 1.0139 = 365/360 because noMonths = 12.
Maybe this requires a CTE and a couple Case...When...Then statements. Not sure...
I am using SQL Server 2008.
-- Here is my DDL
-- Drop table Reporting_Table
CREATE TABLE Reporting_Table (
Credit_Line_NO Varchar(10),
noMonths INT,
EFFECTIVEDATE Date,
EXPIRY_DATE Date,
Amount Money,
mxDays INT,
mxFactor decimal(5,4),
Calc Money)
INSERT INTO Reporting_Table (Credit_Line_NO, noMonths, EFFECTIVEDATE, EXPIRY_DATE, Amount, mxDays, mxFactor, Calc)
Values('9938810','3','3/31/2018','6/12/2020','11718.75','90','1','11718.75')
INSERT INTO Reporting_Table (Credit_Line_NO, noMonths, EFFECTIVEDATE, EXPIRY_DATE, Amount, mxDays, mxFactor, Calc)
Values('2235461','1','6/30/2018','6/6/2019','12345','30','1','12345')
INSERT INTO Reporting_Table (Credit_Line_NO, noMonths, EFFECTIVEDATE, EXPIRY_DATE, Amount, mxDays, mxFactor, Calc)
Values('3365434','12','6/30/2018','6/30/2019','298523.36085','365','1.01388888888889','302669.518639583')
For SQL 2008 you need to order your table with row_number and join each row with previous one. Then make calculations
with cte as (
select
*, rn = row_number() over (partition by Credit_Line_NO order by maturityDate)
from
Reporting_Table
)
select
a.*, mxDay = isnull(q.dayDiff, q.mDay), z.mxFactor
, Calc = z.mxFactor * a.Amount
from
cte a
left join cte b on a.Credit_Line_NO = b.Credit_Line_NO and a.rn - 1 = b.rn
cross apply (select
mDay = case
when a.noMonths = 1 then 30
when a.noMonths = 3 then 90
when a.noMonths = 12 then 365
end, dayDiff = datediff(dd, b.maturityDate, a.maturityDate)) q
cross apply (select mxFactor = cast(1.0 * isnull(q.dayDiff, q.mDay) / q.mDay as decimal(10,4))) z
Edit:
This is update query:
with cte as (
select
*, rn = row_number() over (partition by Credit_Line_NO order by maturityDate)
from
Reporting_Table
)
, cte2 as (
select
a.Credit_Line_NO, a.noMonths, a.maturityDate, a.Amount, mxDay = isnull(q.dayDiff, q.mDay), z.mxFactor
, Calc = z.mxFactor * a.Amount
from
cte a
left join cte b on a.Credit_Line_NO = b.Credit_Line_NO and a.rn - 1 = b.rn
cross apply (select
mDay = case
when a.noMonths = 1 then 30
when a.noMonths = 3 then 90
when a.noMonths = 12 then 365
end, dayDiff = datediff(dd, b.maturityDate, a.maturityDate)) q
cross apply (select mxFactor = cast(1.0 * isnull(q.dayDiff, q.mDay) / q.mDay as decimal(10,4))) z
)
update r
set r.mxDay = c.mxDay, r.mxFactor = c.mxFactor, r.Calc = c.Calc
from
Reporting_Table r
join cte2 c on r.Credit_Line_NO = c.Credit_Line_NO and r.noMonths = c.noMonths and r.maturityDate = c.maturityDate

Group a query by every month

I have the following query :
select
(select Sum(Stores) from XYZ where Year = '2013' and Month = '8' )
-
(select Sum(SalesStores) from ABC where Year = '2013' and Month = '8') as difference
Here in the above query Year and Month are also columns of a table.
I would like to know if there is a way to run the same query so that , it is run against every month of the year ?
If there are months without data/rows within XYZ or ABC tables then I would use FULL OUTER JOIN:
SELECT ISNULL(x.[Month], y.[Month]) AS [Month],
ISNULL(x.Sum_Stores, 0) - ISNULL(y.Sum_SalesStores, 0) AS Difference
FROM
(
SELECT [Month], Sum(Stores) AS Sum_Stores
FROM XYZ
WHERE [Year] = '2013'
GROUP BY [Month]
) AS x
FULL OUTER JOIN
(
SELECT [Month], Sum(SalesStores) AS Sum_SalesStores
FROM ABC
WHERE [Year] = '2013'
GROUP BY [Month]
) AS y ON x.[Month] = y.[Month]
;WITH Months(Month) AS
(
SELECT 1
UNION ALL
SELECT Month + 1
FROM Months
where Month < 12
)
SELECT '2013' [Year], m.Month, COALESCE(SUM(Stores), 0) - COALESCE(SUM(SalesStores), 0) [Difference]
FROM months m
LEFT JOIN XYZ x ON m.Month = x.Month
LEFT JOIN ABC a ON a.Month = m.Month
GROUP BY m.Month
You could use GROUP BY in your inner trades, and then run a join, like this:
SELECT left.Month, (left.sum - COALESCE(right.sum, 0)) as difference
FROM (
SELECT Month, SUM(Stores) as sum
FROM XYZ WHERE Year = '2013'
GROUP BY Month
) left
LEFT OUTER JOIN (
SELECT Month, SUM(Stores) as sum
FROM ABC WHERE Year = '2013'
GROUP BY Month
) right ON left.Month = right.Months
Note the use of COALESCE. It lets you preserve the value of the first SUM in case when there are no records for the month in the ABC table.
In the following example uses the UNION ALL operator with CTE
;WITH cte AS
(SELECT SUM(Stores) AS Stores, [Month]
FROM dbo.XYZ
WHERE [Year] = '2013'
GROUP BY [Month]
UNION ALL
SELECT -1.00 * SUM(SalesStores), [Month]
FROM dbo.ABC
WHERE [Year] = '2013'
GROUP BY [Month]
)
SELECT [Month], SUM(Stores) AS Difference
FROM cte
GROUP BY [Month]
Demo on SQLFiddle
;WITH Months(Month) AS
(
SELECT 1
UNION ALL
SELECT Month + 1
FROM Months
where Month < 12
)
SELECT Months. Month ,
(select isnull(Sum(Stores),0) from XYZ where Year = '2013' and Month = Months.Month) - (select isnull(Sum(SalesStores),0) from ABC where Year = '2013' and Month =Months.Month) as difference
FROM Months

How to count open records, grouped by hour and day in SQL-server-2008-r2

I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK