Creating Crosstab/Matrix KPI Report Showing Counts of Open Records Over Time - sql

I'm using MS Report Builder 3.0 / SQL Server 2012 and I have a database of "ticket" records. Each ticket has a status (simplified to open / closed), an origination date, and a completion date. I've been asked to build a cross-tab report that returns the number of open records as of the last day of the month for the last 12 months.
I could easily provide a report that shows open items NOW. I can also fairly easily calculate the number of open items on any given date (origination date <= #DATE, comp date > #date or comp date is null). Using that logic, I could even define a dataset for each of the 12 periods for the given scope, but since each of those periods would be defined explicitly, they wouldn't be in the same field to use as the column group for the cross tab, so I don't know how I would actually be able to construct a single crosstab table that would summarize those results.
Anyone ever done anything like this and can share their method?
My most recent thought is to select each date period explicitly and combine them using unions and then use that as the basis for the report, but I'm having a tough time forcing my brain to congeal that concept into something I can execute.

Wouldn't something like this work? This query takes all the cases with status 'open' from last year then groups them by months.
select EOMONTH(completionDate), count(*)
from data
where ticketStatus = 'open' and completionDate > EOMONTH(DateAdd(Year, -1, GETDATE())
group by EOMONTH(completionDate)
You can play with the completionDate > EOMONTH(DateAdd(Year, -1, GETDATE()) condition depending whtat exactly do you need.

Creating the query from the perspective of the time period was the solution.
SELECT
PERIOD_START
,COUNT (ACOUNT.ACTIVITY_ID) OPEN_ACT_COUNT
FROM
(
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-12,getdate())),datepart(mm,dateadd(mm,-12,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-12,getdate())),datepart(mm,dateadd(mm,-12,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-11,getdate())),datepart(mm,dateadd(mm,-11,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-11,getdate())),datepart(mm,dateadd(mm,-11,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-10,getdate())),datepart(mm,dateadd(mm,-10,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-10,getdate())),datepart(mm,dateadd(mm,-10,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-9,getdate())),datepart(mm,dateadd(mm,-9,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-9,getdate())),datepart(mm,dateadd(mm,-9,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-8,getdate())),datepart(mm,dateadd(mm,-8,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-8,getdate())),datepart(mm,dateadd(mm,-8,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-7,getdate())),datepart(mm,dateadd(mm,-7,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-7,getdate())),datepart(mm,dateadd(mm,-7,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-6,getdate())),datepart(mm,dateadd(mm,-6,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-6,getdate())),datepart(mm,dateadd(mm,-6,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-5,getdate())),datepart(mm,dateadd(mm,-5,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-5,getdate())),datepart(mm,dateadd(mm,-5,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-4,getdate())),datepart(mm,dateadd(mm,-4,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-4,getdate())),datepart(mm,dateadd(mm,-4,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-3,getdate())),datepart(mm,dateadd(mm,-3,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-3,getdate())),datepart(mm,dateadd(mm,-3,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-2,getdate())),datepart(mm,dateadd(mm,-2,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-2,getdate())),datepart(mm,dateadd(mm,-2,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(datepart(yy,dateadd(mm,-1,getdate())),datepart(mm,dateadd(mm,-1,getdate())),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(datepart(yy,dateadd(mm,-1,getdate())),datepart(mm,dateadd(mm,-1,getdate())),'01')) AS PERIOD_END
UNION
SELECT
DATEFROMPARTS(DATEPART(yy,getdate()),datepart(mm,getdate()),'01') AS PERIOD_START
,EOMONTH(DATEFROMPARTS(DATEPART(yy,getdate()),datepart(mm,getdate()),'01')) AS PERIOD_END
) PERIODS
LEFT JOIN (
SELECT
AROOT.ACTIVITY_ID
,DACT_PARTY.ACT_OWNER_DEPT_GRP
,DACT_ORI.ACT_ORI_DATE
,DACT_COM.ACT_COM_DATE
,CASE WHEN AROOT.LCYCLE_CD IN ('01','02','03','04','60','63') THEN 'OPEN'
WHEN AROOT.LCYCLE_CD IN ('06','07','09') THEN 'COMPLETE' END STATUS
FROM
DM_IAM_D_ACT_ROOT AROOT
JOIN DM_IAM_D_I_ROOT IROOT ON IROOT.DB_KEY = AROOT.PAR_ISSUE_UUID AND IROOT.APPLICATION = 'QIM' AND IROOT.LCYCLE_CD NOT IN ('10', '64') AND IROOT.ZZCAP_FACILITY = CASE WHEN #FAC = 'MT' THEN 'N200' WHEN #FAC = 'PI' THEN 'N202' WHEN #FAC = 'HU' THEN 'N204' ELSE #FAC END
OUTER APPLY (
SELECT
DM_IAM_D_ACT_PARTY.PARENT_KEY
,DM_BUT000.FIRST_LAST_NAME
,NPDA.FLEET_ID
,NPDA.DEPARTMENT
,DEPTS.ZZCAP_OWED_TO_SUB_DEPT_DESC AS DEPT_DESC
,DEPTS.ZZCAP_OWED_DEPT AS ACT_OWNER_DEPT_GRP
,NPDA.SUPERVISOR
FROM
DM_IAM_D_ACT_PARTY
JOIN DM_BUT000 ON DM_IAM_D_ACT_PARTY.PARTNER_ID = DM_BUT000.PARTNER
JOIN DM_ADR6 ON DM_BUT000.PERSNUMBER = DM_ADR6.PERSNUMBER
JOIN NMC_PERSONNEL_DATA NPDA ON LEFT(DM_ADR6.SMTP_ADDR,40) = LEFT(NPDA.WORK_EMAIL,40)
JOIN DM_ZCAP_OWED_DEPT DEPTS ON NPDA.DEPARTMENT = DEPTS.ZZCAP_OWED_TO_SUB_DEPT
WHERE
AROOT.DB_KEY = DM_IAM_D_ACT_PARTY.PARENT_KEY AND AROOT.MANDT = DM_IAM_D_ACT_PARTY.MANDT
AND DM_IAM_D_ACT_PARTY.PARTY_ROLE_CODE IN ('ACTDRIVR', 'ZASSIGN', 'ZACTAPP')
) AS DACT_PARTY
LEFT JOIN (
SELECT
DM_IAM_D_ACT_DATE.PARENT_KEY
,DATEFROMPARTS(LEFT(DM_IAM_D_ACT_DATE.DATE_TIME,4),RIGHT(LEFT(DM_IAM_D_ACT_DATE.DATE_TIME,6),2),RIGHT(LEFT(DM_IAM_D_ACT_DATE.DATE_TIME,8),2)) AS ACT_ORI_DATE
FROM
DM_IAM_D_ACT_DATE
WHERE
DM_IAM_D_ACT_DATE.ROLE_CD = 'ORI'
AND DM_IAM_D_ACT_DATE.DATE_TIME > 19000000000000
) DACT_ORI ON AROOT.DB_KEY = DACT_ORI.PARENT_KEY
LEFT JOIN (
SELECT
DM_IAM_D_ACT_DATE.PARENT_KEY
,DATEFROMPARTS(LEFT(DM_IAM_D_ACT_DATE.DATE_TIME,4),RIGHT(LEFT(DM_IAM_D_ACT_DATE.DATE_TIME,6),2),RIGHT(LEFT(DM_IAM_D_ACT_DATE.DATE_TIME,8),2)) AS ACT_COM_DATE
FROM
DM_IAM_D_ACT_DATE
WHERE
DM_IAM_D_ACT_DATE.ROLE_CD = 'COM'
AND DM_IAM_D_ACT_DATE.DATE_TIME > 19000000000000
) DACT_COM ON AROOT.DB_KEY = DACT_COM.PARENT_KEY
WHERE
AROOT.ACT_TEMPLATE IN (' ', 'CA', 'CAPR', 'CCA', 'FBC-TEMP', 'FBD-TEMP', 'MRA', 'OBD-TEMP', 'OBN_OBD_CO', 'OBN_OBD_FB', 'OBN-TEMP', 'IA')
AND AROOT.LCYCLE_CD NOT IN ('10','64')
AND AROOT.LONG_TERM <> 'X'
) ACOUNT ON ACOUNT.ACT_ORI_DATE <= PERIOD_END AND (ACOUNT.STATUS = 'OPEN' OR ACOUNT.ACT_COM_DATE >= PERIOD_END)
GROUP BY PERIOD_START

Related

How to solve a nested aggregate function in SQL?

I'm trying to use a nested aggregate function. I know that SQL does not support it, but I really need to do something like the below query. Basically, I want to count the number of users for each day. But I want to only count the users that haven't completed an order within a 15 days window (relative to a specific day) and that have completed any order within a 30 days window (relative to a specific day). I already know that it is not possible to solve this problem using a regular subquery (it does not allow to change subquery values for each date). The "id" and the "state" attributes are related to the orders. Also, I'm using Fivetran with Snowflake.
SELECT
db.created_at::date as Date,
count(case when
(count(case when (db.state = 'finished')
and (db.created_at::date between dateadd(day,-15,Date) and dateadd(day,-1,Date)) then db.id end)
= 0) and
(count(case when (db.state = 'finished')
and (db.created_at::date between dateadd(day,-30,Date) and dateadd(day,-16,Date)) then db.id end)
> 0) then db.user end)
FROM
data_base as db
WHERE
db.created_at::date between '2020-01-01' and dateadd(day,-1,current_date)
GROUP BY Date
In other words, I want to transform the below query in a way that the "current_date" changes for each date.
WITH completed_15_days_before AS (
select
db.user as User,
count(case when db.state = 'finished' then db.id end) as Completed
from
data_base as db
where
db.created_at::date between dateadd(day,-15,current_date) and dateadd(day,-1,current_date)
group by User
),
completed_16_days_before AS (
select
db.user as User,
count(case when db.state = 'finished' then db.id end) as Completed
from
data_base as db
where
db.created_at::date between dateadd(day,-30,current_date) and dateadd(day,-16,current_date)
group by User
)
SELECT
date(db.created_at) as Date,
count(distinct case when comp_15.completadas = 0 and comp_16.completadas > 0 then comp_15.user end) as "Total Users Churn",
count(distinct case when comp_15.completadas > 0 then comp_15.user end) as "Total Users Active",
week(Date) as Week
FROM
data_base as db
left join completadas_15_days_before as comp_15 on comp_15.user = db.user
left join completadas_16_days_before as comp_16 on comp_16.user = db.user
WHERE
db.created_at::date between '2020-01-01' and dateadd(day,-1,current_date)
GROUP BY Date
Does anyone have a clue on how to solve this puzzle? Thank you very much!
The following should give you roughly what you want - difficult to test without sample data but should be a good enough starting point for you to then amend it to give you exactly what you want.
I've commented to the code to hopefully explain what each section is doing.
-- set parameter for the first date you want to generate the resultset for
set start_date = TO_DATE('2020-01-01','YYYY-MM-DD');
-- calculate the number of days between the start_date and the current date
set num_days = (Select datediff(day, $start_date , current_date()+1));
--generate a list of all the dates from the start date to the current date
-- i.e. every date that needs to appear in the resultset
WITH date_list as (
select
dateadd(
day,
'-' || row_number() over (order by null),
dateadd(day, '+1', current_date())
) as date_item
from table (generator(rowcount => ($num_days)))
)
--Create a list of all the orders that are in scope
-- i.e. 30 days before the start_date up to the current date
-- amend WHERE clause to in/exclude records as appropriate
,order_list as (
SELECT created_at, rt_id
from data_base
where created_at between dateadd(day,-30,$start_date) and current_date()
and state = 'finished'
)
SELECT dl.date_item
,COUNT (DISTINCT ol30.RT_ID) AS USER_COUNT
,COUNT (ol30.RT_ID) as ORDER_COUNT
FROM date_list dl
-- get all orders between -30 and -16 days of each date in date_list
left outer join order_list ol30 on ol30.created_at between dateadd(day,-30,dl.date_item) and dateadd(day,-16,dl.date_item)
-- exclude records that have the same RT_ID as in the ol30 dataset but have a date between 0 amd -15 of the date in date_list
WHERE NOT EXISTS (SELECT ol15.RT_ID
FROM order_list ol15
WHERE ol30.RT_ID = ol15.RT_ID
AND ol15.created_at between dateadd(day,-15,dl.date_item) and dl.date_item)
GROUP BY dl.date_item
ORDER BY dl.date_item;

List all months with a total regardless of null

I have a very small SQL table that lists courses attended and the date of attendance. I can use the code below to count the attendees for each month
select to_char(DATE_ATTENDED,'YYYY/MM'),
COUNT (*)
FROM TRAINING_COURSE_ATTENDED
WHERE COURSE_ATTENDED = 'Fire Safety'
GROUP BY to_char(DATE_ATTENDED,'YYYY/MM')
ORDER BY to_char(DATE_ATTENDED,'YYYY/MM')
This returns a list as expected for each month that has attendees. However I would like to list it as
January 2
February 0
March 5
How do I show the count results along with the nulls? My table is very basic
1234 01-JAN-15 Fire Safety
108 01-JAN-15 Fire Safety
1443 02-DEC-15 Healthcare
1388 03-FEB-15 Emergency
1355 06-MAR-15 Fire Safety
1322 09-SEP-15 Fire Safety
1234 11-DEC-15 Fire Safety
I just need to display each month and the total attendees for Fire Safety only. Not used SQL developer for a while so any help appreciated.
You would need a calendar table to select a period you want to display. Simplified code would look like this:
select to_char(c.Date_dt,'YYYY/MM')
, COUNT (*)
FROM calendar as c
left join TRAINING_COURSE_ATTENDED as tca
on tca.DATE_ATTENDED = c.Date_dt
WHERE tca.COURSE_ATTENDED = 'Fire Safety'
and c.Date_dt between [period_start_dt] and [period_end_dt]
GROUP BY to_char(c.Date_dt,'YYYY/MM')
ORDER BY to_char(c.Date_dt,'YYYY/MM')
You can create your own set required year month's on-fly with 0 count and use query as below.
Select yrmth,sum(counter) from
(
select to_char(date_attended,'YYYYMM') yrmth,
COUNT (1) counter
From TRAINING_COURSE_ATTENDED Where COURSE_ATTENDED = 'Fire Safety'
Group By Y to_char(date_attended,'YYYYMM')
Union All
Select To_Char(2015||Lpad(Rownum,2,0)),0 from Dual Connect By Rownum <= 12
)
group by yrmth
order by 1
If you want to show multiple year's, just change the 2nd query to
Select To_Char(Year||Lpad(Month,2,0)) , 0
From
(select Rownum Month from Dual Connect By Rownum <= 12),
(select 2015+Rownum-1 Year from Dual Connect By Rownum <= 3)
Try this :
SELECT Trunc(date_attended, 'MM') Month,
Sum(CASE
WHEN course_attended = 'Fire Safety' THEN 1
ELSE 0
END) Fire_Safety
FROM training_course_attended
GROUP BY Trunc(date_attended, 'MM')
ORDER BY Trunc(date_attended, 'MM')
Another way to generate a calendar table inline:
with calendar (month_start, month_end) as
( select add_months(date '2014-12-01', rownum)
, add_months(date '2014-12-01', rownum +1) - interval '1' second
from dual
connect by rownum <= 12 )
select to_char(c.month_start,'YYYY/MM') as course_month
, count(tca.course_attended) as attended
from calendar c
left join training_course_attended tca
on tca.date_attended between c.month_start and c.month_end
and tca.course_attended = 'Fire Safety'
group by to_char(c.month_start,'YYYY/MM')
order by 1;
(You could also have only the month start in the calendar table, and join on trunc(tca.date_attended,'MONTH') = c.month_start, though if you had indexes or partitioning on tca.date_attended that might be less efficient.)

Getting rid of grouping field

Is there a safe way to not have to group by a field when using an aggregate in another field? Here is my example
SELECT
C.CustomerName
,D.INDUSTRY_CODE
,CASE WHEN D.INDUSTRY_CODE IN ('003','004','005','006','007','008','009','010','017','029')
THEN 'PM'
WHEN UPPER(CustomerName) = 'ULINE INC'
THEN 'ULINE'
ELSE 'DR'
END AS BU
,ISNULL((SELECT SUM(GrossAmount)
where CONVERT(date,convert(char(8),InvoiceDateID )) between DATEADD(yy, DATEDIFF(yy, 0, GETDATE()) - 1, 0) and DATEADD(year, -1, GETDATE())),0) [PREVIOUS YEAR GROSS]
FROM factMargins A
LEFT OUTER JOIN dimDate B ON A.InvoiceDateID = B.DateId
LEFT OUTER JOIN dimCustomer C ON A.CustomerID = C.CustomerId
LEFT OUTER JOIN CRCDATA.DBO.CU10 D ON D.CUST_NUMB = C.CustomerNumber
GROUP BY
C.CustomerName,D.INDUSTRY_CODE
,A.InvoiceDateID
order by CustomerName
before grouping I was only getting 984 rows but after grouping by the A.InvoiceDateId field I am getting over 11k rows. The rows blow up since there are multiple invoices per customer. Min and Max wont work since then it will pull data incorrectly. Would it be best to let my application (crystal) get rid of the extra lines? Usually I like to have my base data be as close as possible to how the report will layout if possible.
Try moving the reference to InvoiceDateID to within an aggregate function, rather than within a selected subquery's WHERE clause.
In Oracle, here's an example:
with TheData as (
select 'A' customerID, 25 AMOUNT , trunc(sysdate) THEDATE from dual union
select 'B' customerID, 35 AMOUNT , trunc(sysdate-1) THEDATE from dual union
select 'A' customerID, 45 AMOUNT , trunc(sysdate-2) THEDATE from dual union
select 'A' customerID, 11000 AMOUNT , trunc(sysdate-3) THEDATE from dual union
select 'B' customerID, 12000 AMOUNT , trunc(sysdate-4) THEDATE from dual union
select 'A' customerID, 15000 AMOUNT , trunc(sysdate-5) THEDATE from dual)
select
CustomerID,
sum(amount) as "AllRevenue"
sum(case when thedate<sysdate-3 then amount else 0 end) as "OlderRevenue",
from thedata
group by customerID;
Output:
CustomerID | AllRevenue | OlderRevenue
A | 26070 | 26000
B | 12035 | 12000
This says:
For each customerID
I want the sum of all amounts
and I want the sum of amounts earlier than 3 days ago

SQL moving average

How do you create a moving average in SQL?
Current table:
Date Clicks
2012-05-01 2,230
2012-05-02 3,150
2012-05-03 5,520
2012-05-04 1,330
2012-05-05 2,260
2012-05-06 3,540
2012-05-07 2,330
Desired table or output:
Date Clicks 3 day Moving Average
2012-05-01 2,230
2012-05-02 3,150
2012-05-03 5,520 4,360
2012-05-04 1,330 3,330
2012-05-05 2,260 3,120
2012-05-06 3,540 3,320
2012-05-07 2,330 3,010
This is an Evergreen Joe Celko question.
I ignore which DBMS platform is used. But in any case Joe was able to answer more than 10 years ago with standard SQL.
Joe Celko SQL Puzzles and Answers citation:
"That last update attempt suggests that we could use the predicate to
construct a query that would give us a moving average:"
SELECT S1.sample_time, AVG(S2.load) AS avg_prev_hour_load
FROM Samples AS S1, Samples AS S2
WHERE S2.sample_time
BETWEEN (S1.sample_time - INTERVAL 1 HOUR)
AND S1.sample_time
GROUP BY S1.sample_time;
Is the extra column or the query approach better? The query is
technically better because the UPDATE approach will denormalize the
database. However, if the historical data being recorded is not going
to change and computing the moving average is expensive, you might
consider using the column approach.
MS SQL Example:
CREATE TABLE #TestDW
( Date1 datetime,
LoadValue Numeric(13,6)
);
INSERT INTO #TestDW VALUES('2012-06-09' , '3.540' );
INSERT INTO #TestDW VALUES('2012-06-08' , '2.260' );
INSERT INTO #TestDW VALUES('2012-06-07' , '1.330' );
INSERT INTO #TestDW VALUES('2012-06-06' , '5.520' );
INSERT INTO #TestDW VALUES('2012-06-05' , '3.150' );
INSERT INTO #TestDW VALUES('2012-06-04' , '2.230' );
SQL Puzzle query:
SELECT S1.date1, AVG(S2.LoadValue) AS avg_prev_3_days
FROM #TestDW AS S1, #TestDW AS S2
WHERE S2.date1
BETWEEN DATEADD(d, -2, S1.date1 )
AND S1.date1
GROUP BY S1.date1
order by 1;
One way to do this is to join on the same table a few times.
select
(Current.Clicks
+ isnull(P1.Clicks, 0)
+ isnull(P2.Clicks, 0)
+ isnull(P3.Clicks, 0)) / 4 as MovingAvg3
from
MyTable as Current
left join MyTable as P1 on P1.Date = DateAdd(day, -1, Current.Date)
left join MyTable as P2 on P2.Date = DateAdd(day, -2, Current.Date)
left join MyTable as P3 on P3.Date = DateAdd(day, -3, Current.Date)
Adjust the DateAdd component of the ON-Clauses to match whether you want your moving average to be strictly from the past-through-now or days-ago through days-ahead.
This works nicely for situations where you need a moving average over only a few data points.
This is not an optimal solution for moving averages with more than a few data points.
select t2.date, round(sum(ct.clicks)/3) as avg_clicks
from
(select date from clickstable) as t2,
(select date, clicks from clickstable) as ct
where datediff(t2.date, ct.date) between 0 and 2
group by t2.date
Example here.
Obviously you can change the interval to whatever you need. You could also use count() instead of a magic number to make it easier to change, but that will also slow it down.
General template for rolling averages that scales well for large data sets
WITH moving_avg AS (
SELECT 0 AS [lag] UNION ALL
SELECT 1 AS [lag] UNION ALL
SELECT 2 AS [lag] UNION ALL
SELECT 3 AS [lag] --ETC
)
SELECT
DATEADD(day,[lag],[date]) AS [reference_date],
[otherkey1],[otherkey2],[otherkey3],
AVG([value1]) AS [avg_value1],
AVG([value2]) AS [avg_value2]
FROM [data_table]
CROSS JOIN moving_avg
GROUP BY [otherkey1],[otherkey2],[otherkey3],DATEADD(day,[lag],[date])
ORDER BY [otherkey1],[otherkey2],[otherkey3],[reference_date];
And for weighted rolling averages:
WITH weighted_avg AS (
SELECT 0 AS [lag], 1.0 AS [weight] UNION ALL
SELECT 1 AS [lag], 0.6 AS [weight] UNION ALL
SELECT 2 AS [lag], 0.3 AS [weight] UNION ALL
SELECT 3 AS [lag], 0.1 AS [weight] --ETC
)
SELECT
DATEADD(day,[lag],[date]) AS [reference_date],
[otherkey1],[otherkey2],[otherkey3],
AVG([value1] * [weight]) / AVG([weight]) AS [wavg_value1],
AVG([value2] * [weight]) / AVG([weight]) AS [wavg_value2]
FROM [data_table]
CROSS JOIN weighted_avg
GROUP BY [otherkey1],[otherkey2],[otherkey3],DATEADD(day,[lag],[date])
ORDER BY [otherkey1],[otherkey2],[otherkey3],[reference_date];
select *
, (select avg(c2.clicks) from #clicks_table c2
where c2.date between dateadd(dd, -2, c1.date) and c1.date) mov_avg
from #clicks_table c1
Use a different join predicate:
SELECT current.date
,avg(periods.clicks)
FROM current left outer join current as periods
ON current.date BETWEEN dateadd(d,-2, periods.date) AND periods.date
GROUP BY current.date HAVING COUNT(*) >= 3
The having statement will prevent any dates without at least N values from being returned.
assume x is the value to be averaged and xDate is the date value:
SELECT avg(x) from myTable WHERE xDate BETWEEN dateadd(d, -2, xDate) and xDate
In hive, maybe you could try
select date, clicks, avg(clicks) over (order by date rows between 2 preceding and current row) as moving_avg from clicktable;
For the purpose, I'd like to create an auxiliary/dimensional date table like
create table date_dim(date date, date_1 date, dates_2 date, dates_3 dates ...)
while date is the key, date_1 for this day, date_2 contains this day and the day before; date_3...
Then you can do the equal join in hive.
Using a view like:
select date, date from date_dim
union all
select date, date_add(date, -1) from date_dim
union all
select date, date_add(date, -2) from date_dim
union all
select date, date_add(date, -3) from date_dim
NOTE: THIS IS NOT AN ANSWER but an enhanced code sample of Diego Scaravaggi's answer. I am posting it as answer as the comment section is insufficient. Note that I have parameter-ized the period for Moving aveage.
declare #p int = 3
declare #t table(d int, bal float)
insert into #t values
(1,94),
(2,99),
(3,76),
(4,74),
(5,48),
(6,55),
(7,90),
(8,77),
(9,16),
(10,19),
(11,66),
(12,47)
select a.d, avg(b.bal)
from
#t a
left join #t b on b.d between a.d-(#p-1) and a.d
group by a.d
--#p1 is period of moving average, #01 is offset
declare #p1 as int
declare #o1 as int
set #p1 = 5;
set #o1 = 3;
with np as(
select *, rank() over(partition by cmdty, tenor order by markdt) as r
from p_prices p1
where
1=1
)
, x1 as (
select s1.*, avg(s2.val) as avgval from np s1
inner join np s2
on s1.cmdty = s2.cmdty and s1.tenor = s2.tenor
and s2.r between s1.r - (#p1 - 1) - (#o1) and s1.r - (#o1)
group by s1.cmdty, s1.tenor, s1.markdt, s1.val, s1.r
)
I'm not sure that your expected result (output) shows classic "simple moving (rolling) average" for 3 days. Because, for example, the first triple of numbers by definition gives:
ThreeDaysMovingAverage = (2.230 + 3.150 + 5.520) / 3 = 3.6333333
but you expect 4.360 and it's confusing.
Nevertheless, I suggest the following solution, which uses window-function AVG. This approach is much more efficient (clear and less resource-intensive) than SELF-JOIN introduced in other answers (and I'm surprised that no one has given a better solution).
-- Oracle-SQL dialect
with
data_table as (
select date '2012-05-01' AS dt, 2.230 AS clicks from dual union all
select date '2012-05-02' AS dt, 3.150 AS clicks from dual union all
select date '2012-05-03' AS dt, 5.520 AS clicks from dual union all
select date '2012-05-04' AS dt, 1.330 AS clicks from dual union all
select date '2012-05-05' AS dt, 2.260 AS clicks from dual union all
select date '2012-05-06' AS dt, 3.540 AS clicks from dual union all
select date '2012-05-07' AS dt, 2.330 AS clicks from dual
),
param as (select 3 days from dual)
select
dt AS "Date",
clicks AS "Clicks",
case when rownum >= p.days then
avg(clicks) over (order by dt
rows between p.days - 1 preceding and current row)
end
AS "3 day Moving Average"
from data_table t, param p;
You see that AVG is wrapped with case when rownum >= p.days then to force NULLs in first rows, where "3 day Moving Average" is meaningless.
We can apply Joe Celko's "dirty" left outer join method (as cited above by Diego Scaravaggi) to answer the question as it was asked.
declare #ClicksTable table ([Date] date, Clicks int)
insert into #ClicksTable
select '2012-05-01', 2230 union all
select '2012-05-02', 3150 union all
select '2012-05-03', 5520 union all
select '2012-05-04', 1330 union all
select '2012-05-05', 2260 union all
select '2012-05-06', 3540 union all
select '2012-05-07', 2330
This query:
SELECT
T1.[Date],
T1.Clicks,
-- AVG ignores NULL values so we have to explicitly NULLify
-- the days when we don't have a full 3-day sample
CASE WHEN count(T2.[Date]) < 3 THEN NULL
ELSE AVG(T2.Clicks)
END AS [3-Day Moving Average]
FROM #ClicksTable T1
LEFT OUTER JOIN #ClicksTable T2
ON T2.[Date] BETWEEN DATEADD(d, -2, T1.[Date]) AND T1.[Date]
GROUP BY T1.[Date]
Generates the requested output:
Date Clicks 3-Day Moving Average
2012-05-01 2,230
2012-05-02 3,150
2012-05-03 5,520 4,360
2012-05-04 1,330 3,330
2012-05-05 2,260 3,120
2012-05-06 3,540 3,320
2012-05-07 2,330 3,010

Finding overlapping dates

I have a set of Meeting rooms and meetings in that having start date and end Date. A set of meeting rooms belong to a building.
The meeting details are kept in MeetingDetail table having a startDate and endDate.
Now I want to fire a report between two time period say reportStartDate and reportEndDate, which finds me the time slots in which all the meeting rooms are booked for a given building
Table structure
MEETING_ROOM - ID, ROOMNAME, BUILDING_NO
MEETING_DETAIL - ID, MEETING_ROOM_ID, START_DATE, END_DATE
The query has to be fired for reportStartDate and REportEndDate
Just to clarify further, the aim is to find all the time slots in which all the meeting rooms were booked in a given time period of reportStartDate and reportEndDate
For SQL Server 2005+ you could try the following (see note at the end for mysql)
WITH TIME_POINTS (POINT_P) AS
(SELECT DISTINCT START_DATE FROM MEETING_DETAIL
WHERE START_DATE > #reportStartDate AND START_DATE < #reportEndDate
UNION SELECT DISTINCT END_DATE FROM MEETING_DETAIL
WHERE END_DATE > #reportStartDate AND END_DATE < #reportEndDate
UNION SELECT #reportEndDate
UNION SELECT #reportStartDate),
WITH TIME_SLICE (START_T, END_T) AS
(SELECT A.POINT_P, MIN(B.POINT_P) FROM
TIMEPOINTS A
INNER JOIN TIMEPOINTS B ON A.POINT_P > B.POINT_P
GROUP BY A.POINT_P),
WITH SLICE_MEETINGS (START_T, END_T, MEETING_ROOM_ID, BUILDING_NO) AS
(SELECT START_T, END_T, MEETING_ROOM_ID, BUILDING_NO FROM
TIME_SLICE A
INNER JOIN MEETING_DETAIL B ON B.START_DATE <= A.START_T AND B.END_DATE >= B.END_T
INNER JOIN MEETING_ROOM C ON B.MEETING_ROOM_ID = C.ID),
WITH SLICE_COUNT (START_T, END_T, BUILDING_NO, ROOMS_C) AS
(SELECT START_T, END_T, BUILDING_NO, COUNT(MEETING_ROOM_ID) FROM
SLICE_MEETINGS
GROUP BY START_T, END_T, BUILDING_NO),
WITH ROOMS_BUILDING (BUILDING_NO, ROOMS_C) AS
(SELECT BUILDING_NO, COUNT(ID) FROM
MEETING_ROOM
GROUP BY BUILDING_NO)
SELECT B.BUILDING_NO, A.START_T, A.END_T
FROM SLICE_COUNT A.
INNER JOIN ROOMS_BUILDING B WHERE A.BUILDING_NO = B.BUILDING_NO AND B.ROOMS_C = A.ROOMS_C;
what it does is (each step corresponds to each CTE definition above)
Get all the time markers, i.e. end or start times
Get all time slices i.e. the smallest unit of time between which there is no other time marker (i.e. no meetings start in a time slice, it's either at the beginning or at the end of a time slice)
Get meetings for each time slice, so now you get something like
10.30 11.00 Room1 BuildingA
10.30 11.00 Room2 BuildingA
11.00 12.00 Room1 BuildingA
Get counts of rooms booked per building per time slice
Filter out timeslice-building combinations that match the number of rooms in each building
Edit
Since mysql doesn't support the WITH clause you'll have to construct views for each (of the 5) WITH clases above. everything else would remain the same.
After reading your comment, I think I understand the problem a bit better. As a first step I would generate a matrix of meeting rooms and time slots using cross join:
select *
from (
select distinct start_date
, end_date
from #meeting_detail
) ts
cross join
#meeting_room mr
Then, for each cell in the matrix, add meetings in that timeslot:
left join
#meeting_detail md
on mr.id = md.meeting_room_id
and ts.start_date < md.end_date
and md.start_date < ts.end_date
And then demand that there are no free rooms. For example, by saying that the left join must succeed for all rooms and time slots. A left join succeeds if any field is not null:
group by
mr.building_no
, ts.start_date
, ts.end_date
having max(case when md.meeting_room_id is null
then 1 else 0 end) = 0
Here's a complete working example. It's written for SQL Server, and the table variables (#meeting_detail) won't work in MySQL. But the report generating query should work in most databases:
set nocount on
declare #meeting_room table (id int, roomname varchar(50),
building_no int)
declare #meeting_detail table (meeting_room_id int,
start_date datetime, end_date datetime)
insert #meeting_room (id, roomname, building_no)
select 1, 'Kitchen', 6
union all select 2, 'Ballroom', 6
union all select 3, 'Conservatory', 7
union all select 4, 'Dining Room', 7
insert #meeting_detail (meeting_room_id, start_date, end_date)
select 1, '2010-08-01 9:00', '2010-08-01 10:00'
union all select 1, '2010-08-01 10:00', '2010-08-01 11:00'
union all select 2, '2010-08-01 10:00', '2010-08-01 11:00'
union all select 3, '2010-08-01 10:00', '2010-08-01 11:00'
select mr.building_no
, ts.start_date
, ts.end_date
from (
select distinct start_date
, end_date
from #meeting_detail
) ts
cross join
#meeting_room mr
left join
#meeting_detail md
on mr.id = md.meeting_room_id
and ts.start_date < md.end_date
and md.start_date < ts.end_date
group by
mr.building_no
, ts.start_date
, ts.end_date
having max(case when md.meeting_room_id is null
then 1 else 0 end) = 0
This prints:
building_no start end
6 2010-08-01 10:00:00.000 2010-08-01 11:00:00.000