Average sum with start end end date - sql

I'm working with SQL Teradata and I have a table as such:
cust_id start_dt end_dt amount is_current_y_n
12345 1/8/2018 7/8/2018 7044 N
12345 7/9/2018 7/10/2018 8142 N
12345 7/11/2018 7/13/2018 7643 N
12345 7/14/2018 7/14/2018 8630 N
12345 7/14/2018 7/19/2018 5597 N
12345 7/20/2018 12/31/9999 5680 Y
Another case that I've seen:
cust_id start_dt end_dt amount is_current_y_n
54321 1/1/2015 12/31/9999 8650 Y
I need to calculate with SQL the average amount for the past:
7 days
30 days
90 days
180 days
"Average", meaning that if during the past 7 days the amount changed from 1000 to 2000 in the 3rd day, the average should be:
(1000x3 + 2000x4)/7
I tried to join the table with a date table but it's not very efficient.
Is there any efficient way to achieve that?

It can probably be done via Recursive Common Table Expression Query.
To unfold those date ranges.
With the amounts for each date, the CTE can be joined back to the table to get those averages.
I couldn't test the SQL on a TeraData (don't have it).
But it should almost work on that RDBMS (probably)
WITH RECURSIVE CTE (cust_id, dt, amount, start_dt, end_dt) AS
(
SELECT cust_id, start_dt as dt, amount, start_dt,
case when end_dt - start_dt > 4200 then start_dt else end_dt end
FROM table_as_such
UNION ALL
SELECT cust_id, dt+1, amount, start_dt, end_dt
FROM CTE
WHERE dt < end_dt
)
SELECT t.cust_id, t.start_dt
, ROUND(AVG(case when CTE.dt between t.start_dt - 7 and t.start_dt then CTE.amount end),2) as avg7
, ROUND(AVG(case when CTE.dt between t.start_dt - 30 and t.start_dt then CTE.amount end),2) as avg30
, ROUND(AVG(case when CTE.dt between t.start_dt - 90 and t.start_dt then CTE.amount end),2) as avg90
, ROUND(AVG(case when CTE.dt between t.start_dt - 180 and t.start_dt then CTE.amount end),2) as avg180
FROM table_as_such t
JOIN CTE ON (CTE.cust_id = t.cust_id AND CTE.dt between t.start_dt - 180 and t.start_dt)
GROUP BY t.cust_id, t.start_dt
ORDER BY t.cust_id, t.start_dt;
Sample Data Used:
create table table_as_such (id int not null primary key, cust_id int, start_dt date, end_dt date, amount int, is_current_y_n char(1));
insert into table_as_such values (1,12345,'2018-01-08','2018-07-08',7044,'N');
insert into table_as_such values (2,12345,'2018-07-09','2018-07-10',8142,'N');
insert into table_as_such values (3,12345,'2018-07-11','2018-07-13',7643,'N');
insert into table_as_such values (4,12345,'2018-07-14','2018-07-14',8630,'N');
insert into table_as_such values (5,12345,'2018-07-14','2018-07-19',5597,'N');
insert into table_as_such values (6,12345,'2018-07-20','9999-12-31',5680,'Y');

maybe the temporal feature of Teradata can help you in this case. This is due to the PERIOD datatyp and the function to expand.
check this example for this feature and your intention:
database demo;
create table demoDateExpand (
myID integer
,myUser VARCHAR(100)
,myAmount DECIMAL(10,2)
,startDT DATE
,endDT DATE
) no primary index;
insert into demoDateExpand values (1, 'User01', 2.5, '2018-01-01', '2018-01-05');
insert into demoDateExpand values (2, 'User01', 3.0, '2018-01-08', '2018-01-15');
insert into demoDateExpand values (3, 'User01', 1.5, '2018-01-11', '2018-01-25');
insert into demoDateExpand values (4, 'User02', 2.0, '2018-01-01', '2018-01-15');
insert into demoDateExpand values (5, 'User02', 2.5, '2018-01-05', '2018-01-25');
insert into demoDateExpand values (6, 'User02', 4.5, '2018-01-26', '2018-01-27');
insert into demoDateExpand values (7, 'User03', 1.0, '2018-01-10', '2018-01-15');
insert into demoDateExpand values (8, 'User03', 3.5, '2018-01-16', '2018-01-25');
select myID
,myUser
,myAmount
,startDT
,endDT
,period(startDT, endDT)
from demoDateExpand
;
select myID
,myUser
,myAmount
,BEGIN(myDate)
from demoDateExpand
expand on period(startDT, endDT) AS myDate BY ANCHOR DAY
order by myID, myDate
;

I managed to create my own query with the help of a table with dates:
2017-07-11
2017-07-12
...
My query is:
sel
c.cust_id
,avg(case when c.cal_dt between '2017-07-01' and '2018-01-01' then c.amount end) as avg_180
,avg(case when c.cal_dt between '2017-10-01' and '2018-01-01' then c.amount end) as avg_90
,avg(case when c.cal_dt between '2017-12-01' and '2018-01-01' then c.amount end) as avg_30
,avg(case when c.cal_dt between '2017-12-24' and '2018-01-01' then c.amount end) as avg_7
from
(
sel b.cust_id
,a.cal_dt
,b.amount
from
(
sel *
from CALENDAR_DAILY_TABLE
where cal_dt between '2017-07-01' and '2018-01-01'
) as a
join
(
sel *
from MY_TABLE
where (start_dt > '2017-07-01' or end_dt='9999-12-31')
) as b
on b.start_dt<=a.cal_dt and a.cal_dt<=b.end_dt
) as c
where c.cust_id ='12345'
group by c.cust_id
The result is:
cust_id avg_180 avg_90 avg_30 avg_7
12345 1.34 1.34 1.34 1.34
Thanks!

Related

Select the first row returned by CASE WHEN

This is for SQL Server. I have a query that's trying to find the total balance in an account at a certain point in time (30 days before most recent transaction date, 90 days before most recent transaction date, etc). The table I'm querying keeps 'snapshots' of account balances over time with the amount and the time in which the transaction occurred.
PERSON
TOTALBALANCE
RCNTTRANS
Sarah
$5000
6/1/2021
Sarah
$4500
9/29/2021
Sarah
$7000
11/30/2021
Joe
$90
1/5/2020
Joe
$8000
1/17/2020
Joe
$2100
2/28/2021
I figured I could use a case statement to get the total balance at any date less than the most recent transaction date minus however many days away I'm looking for. However, this returns a row for every previous RCNTTRANS date. Is there a way to select only the first row that's returned?
SELECT
,T.PERSON
,CASE WHEN T.TRANSACTIONDATE <= DATEADD(DAY, -30, T.RCNTTRANS) THEN T.TOTALBALANCE ELSE 0 END AS TEST
,CASE WHEN T.TRANSACTIONDATE <= DATEADD(DAY, -90, T.RCNTTRANS) THEN T.TOTALBALANCE ELSE 0 END AS TEST2
,CASE WHEN T.TRANSACTIONDATE <= DATEADD(DAY, -180, T.RCNTTRANS) THEN T.TOTALBALANCE ELSE 0 END AS TEST3
FROM #TEMP T
I tried COALESCE, but that didn't seem to work. I also tried FIRST_VALUE, but that didn't seem to work either. I could possibly have been using them incorrectly, though.
Expected Results
PERSON
TEST1
TEST2
TEST3
Sarah
$4500
$5000
NULL
Joe
$8000
NULL
NULL
Aggregate and SUM each CASE WHEN, and within date ranges.
create table #TEMP (
PERSON VARCHAR(30),
TOTALBALANCE MONEY,
RCNTTRANS DATE
)
insert into #TEMP (PERSON, TOTALBALANCE, RCNTTRANS) values
('Sarah', $5000, '2021-11-29'),
('Sarah', $4500, '2021-12-01'),
('Sarah', $7000, '2021-12-30'),
('Joe', $90, '2020-08-28'),
('Joe', $8000, '2021-02-01'),
('Joe', $2100, '2021-02-28');
SELECT T.PERSON
, SUM(
CASE
WHEN T.RCNTTRANS >= DATEADD(DAY, -30, M.TRANSACTIONDATE)
AND T.RCNTTRANS < M.TRANSACTIONDATE
THEN T.TOTALBALANCE END) AS BALANCE1
, SUM(
CASE
WHEN T.RCNTTRANS >= DATEADD(DAY, -60, M.TRANSACTIONDATE)
AND T.RCNTTRANS < DATEADD(DAY, -30, M.TRANSACTIONDATE)
THEN T.TOTALBALANCE END) AS BALANCE2
, SUM(
CASE
WHEN T.RCNTTRANS >= DATEADD(DAY, -180, M.TRANSACTIONDATE)
AND T.RCNTTRANS < DATEADD(DAY, -60, M.TRANSACTIONDATE)
THEN T.TOTALBALANCE END) AS BALANCE3
FROM #TEMP T
LEFT JOIN (
SELECT PERSON, MAX(RCNTTRANS) AS TRANSACTIONDATE
FROM #TEMP
GROUP BY PERSON
) M ON M.PERSON = T.PERSON
GROUP BY T.PERSON
ORDER BY T.PERSON DESC;
PERSON
BALANCE1
BALANCE2
BALANCE3
Sarah
4500.0000
5000.0000
null
Joe
8000.0000
null
null
db<>fiddle here
IF OBJECT_ID('tempdb..#TMP') IS NOT NULL
DROP TABLE #TMP;
CREATE TABLE #TMP (
PERSON VARCHAR(30),
TOTALBALANCE MONEY,
RCNTTRANS DATE
)
INSERT INTO #TMP (PERSON, TOTALBALANCE, RCNTTRANS) VALUES
('Sarah', $5000, '2021-01-06'),
('Sarah', $4500, '2021-09-29'),
('Sarah', $7000, '2021-11-30'),
('Joe', $90, '2020-01-05'),
('Joe', $8000, '2020-01-17'),
('Joe', $2100, '2021-02-28');
DECLARE #TRANSACTIONDATE DATE = DATEFROMPARTS(2022,1,29); -- FOR TESTING
WITH CTE_BasicData
AS(
SELECT
T.PERSON,
T.TOTALBALANCE,
T.RCNTTRANS,
DATEDIFF(day, #TRANSACTIONDATE, T.RCNTTRANS) AS [DAYS],
CASE
WHEN DATEDIFF(day, #TRANSACTIONDATE, T.RCNTTRANS) <= -180 THEN '180 days'
WHEN DATEDIFF(day, #TRANSACTIONDATE, T.RCNTTRANS) <= -90 THEN '90 days'
WHEN DATEDIFF(day, #TRANSACTIONDATE, T.RCNTTRANS) <= -30 THEN '30 days'
ELSE NULL
END AS [Period]
FROM #TMP AS T
)
,CTE_Data
AS(
SELECT
d.PERSON,
d.TOTALBALANCE,
d.RCNTTRANS,
d.[DAYS],
d.[Period],
DENSE_RANK() OVER(PARTITION BY d.PERSON, d.[Period] ORDER BY d.[DAYS] DESC) AS [Ranking]
FROM CTE_BasicData AS d
)
,CTE_Pivot
AS(
SELECT
P.[PERSON]
,P.[30 days]
,P.[90 days]
,P.[180 days]
FROM (
SELECT
d.PERSON,
d.TOTALBALANCE,
d.RCNTTRANS,
d.[DAYS],
d.[Period]
FROM CTE_Data AS d
WHERE (1=1)
AND d.Ranking = 1
) AS D
PIVOT(SUM(D.TOTALBALANCE) FOR D.[Period] IN([30 days],[90 days],[180 days])) AS P
)
--SELECT * FROM CTE_Data ORDER BY PERSON, RCNTTRANS; RETURN;
--SELECT * FROM CTE_Pivot ORDER BY PERSON; RETURN;
SELECT
d.[PERSON]
,SUM(d.[30 days]) AS [30 days]
,SUM(d.[90 days]) AS [90 days]
,SUM(d.[180 days]) AS [180 days]
FROM CTE_Pivot AS d
GROUP BY
d.[PERSON]

get most frequent values in every month in 2021

Trying to get the most frequent values in every month
from tables
inspection table :
CREATE TABLE inspection (lno INT,
idate DATE,
iid INT,
stime TIME,
passed INT,
violations VARCHAR(100),
check (passed = 1 or passed = 0),
PRIMARY KEY(lno,idate),
FOREIGN key (lno) REFERENCES restaurant);
can be ignored - > FOREIGN key (lno) REFERENCES restaurant)
data :
INSERT INTO inspection VALUES
(234,'6.1.2020' ,333, '16:00', 1 ,NULL),
(123,'7.2.2020' ,333 ,'12:15' ,0 ,'rats'),
(234, '7.2.2020', 333, '17:00', 0, 'Bugs'),
(456, '1.3.2021' ,222, '20:00' ,1,NULL),
(234, '10.3.2021', 333, '16:00', 1,NULL),
(567, '24.3.2021' ,333, '17:00' ,1,NULL),
(345, '9.4.2021' ,222, '18:00', 0, 'Rats'),
(345, '30.4.2021' ,222, '18:00' ,1,NULL),
(123,'11.5.2021', 111, '19:40', 0 ,'Mold'),
(567, '15.5.2021' ,111 ,'19:00' ,1,NULL),
(345, '17.5.2021' ,222, '19:00' ,1,NULL),
(456, '19.5.2021', 111 ,'17:00', 0 ,'Bats'),
(123, '13.6.2021' ,222, '13:00', 1,NULL),
(456, '16.6.2021' ,333 ,'21:00' ,0 ,'Mold');
query :
SELECT date_part('month', idate) ,max(iid)
FROM inspector natural join inspection where date_part('year', idate) >= date_part('year', current_date)
GROUP BY date_part('month', idate)
output:
month
id
3
333
4
222
5
222
6
333
expected output -
month
id
3
333
4
222
5
111
6
222
6
333
IMHO you don't need the inspector table for this calculation. A query like this would do:
with t1(month, iid, cnt) as
(
select date_part('month', idate), iid, count(*)
from inspection
where date_part('year', idate) = date_part('year',current_date)
group by date_part('month', idate), iid
),
t2 (month, maxCnt) as
(
select month, max(cnt)
from t1
group by month
)
select t1.month, t1.iid
from t1
inner join t2 on t1.month = t2.month and t1.cnt = t2.maxCnt
order by t1.month, t1.iid;
Here is Dbfiddle demo link.
Here is a an approach without using joins. With the assistance of DATE_PART and RANK
WITH occurrences AS (
SELECT
DATE_PART('MONTH',idate) as month,
iid,
COUNT(iid) cnt
FROM
inspection
WHERE
DATE_PART('YEAR',idate)=2021
GROUP BY
DATE_PART('MONTH',idate),
iid
),
ranked AS (
SELECT
month,
iid,
RANK() OVER (PARTITION BY month ORDER BY cnt DESC) rnk
FROM
occurrences
)
SELECT
month,
iid
FROM
ranked
WHERE
rnk=1
DB Fiddle

SQL find average time difference between rows for a given category

I browsed SO but could not quite find the exact answer or maybe it was for a different language.
Let's say I have a table, where each row is a record of a trade:
trade_id customer trade_date
1 A 2013-05-01 00:00:00
2 B 2013-05-01 10:00:00
3 A 2013-05-02 00:00:00
4 A 2013-05-05 00:00:00
5 B 2013-05-06 12:00:00
I would like to have the average time between trades, in days or fraction of days, for each customer, and the number of days since last trade. So for instance for customer A, time between trades 1 and 3 is 1 day and between trades 3 and 4 is 3 days, for an average of 2. So the end table would look like something like this (assuming today it's the 2013-05-10):
customer avg_time_btw_trades time_since_last_trade
A 2.0 5.0
B 5.08 3.5
If a customer has only got 1 trade I guess NULL is fine as output.
Not even sure SQL is the best way to do this (I am working with SQL server), but any help is appreciated!
SELECT
customer,
DATEDIFF(second, MIN(trade_date), MAX(trade_date)) / (NULLIF(COUNT(*), 1) - 1) / 86400.0,
DATEDIFF(second, MAX(trade_date), GETDATE() ) / 86400.0
FROM
yourTable
GROUP BY
customer
http://sqlfiddle.com/#!6/eb46e/7
EDIT: Added final field that I didn't notice, apologies.
The following SQL script uses your data and gives the expected results.
DECLARE #temp TABLE
( trade_id INT,
customer CHAR(1),
trade_date DATETIME );
INSERT INTO #temp VALUES (1, 'A', '20130501');
INSERT INTO #temp VALUES (2, 'B', '20130501 10:00');
INSERT INTO #temp VALUES (3, 'A', '20130502');
INSERT INTO #temp VALUES (4, 'A', '20130505');
INSERT INTO #temp VALUES (5, 'B', '20130506 12:00');
DECLARE #getdate DATETIME
-- SET #getdate = getdate();
SET #getdate = '20130510';
SELECT s.customer
, AVG(s.days_btw_trades) AS avg_time_between_trades
, CAST(DATEDIFF(hour, MAX(s.trade_date), #getdate) AS float)
/ 24.0 AS time_since_last_trade
FROM (
SELECT CAST(DATEDIFF(HOUR, t2.trade_date, t.trade_date) AS float)
/ 24.0 AS days_btw_trades
, t.customer
, t.trade_date
FROM #temp t
LEFT JOIN #temp t2 ON t2.customer = t.customer
AND t2.trade_date = ( SELECT MAX(t3.trade_date)
FROM #temp t3
WHERE t3.customer = t.customer
AND t3.trade_date < t.trade_date)
) s
GROUP BY s.customer
You need a date difference between every trade and average them.
select
a.customer
,avg(datediff(a.trade_date, b.trade_date))
,datediff(now(),max(a.trade_date))
from yourTable a, yourTable b
where a.customer = b.customer
and b.trade_date = (
select max(trade_date)
from yourTable c
where c.customer = a.customer
and a.trade_date > c.trade_date)
#gets the one earlier date for every trade
group by a.customer
Just for grins I added a solution that would use CTE's. You could probably use a temp table if the first query is too large. I used #MatBailie creation script for the table:
CREATE TABLE customer_trades (
id INT IDENTITY(1,1),
customer_id INT,
trade_date DATETIME,
PRIMARY KEY (id),
INDEX ix_user_trades (customer_id, trade_date)
)
INSERT INTO
customer_trades (
customer_id,
trade_date
)
VALUES
(1, '2013-05-01 00:00:00'),
(2, '2013-05-01 10:00:00'),
(1, '2013-05-02 00:00:00'),
(1, '2013-05-05 00:00:00'),
(2, '2013-05-06 12:00:00')
;
;WITH CTE as(
select customer_id, trade_date, datediff(hour,trade_date,ISNULL(LEAD(trade_date,1) over (partition by customer_id order by trade_date),GETDATE())) Trade_diff
from customer_trades
)
, CTE2 as
(SELECT customer_id, trade_diff, LAST_VALUE(trade_diff) OVER(Partition by customer_id order by trade_date) Curr_Trade from CTE)
SELECT Customer_id, AVG(trade_diff) AV, Max(Curr_Trade) Curr_Trade
FROM CTE2
GROUP BY customer_id

Any one help me to solve this i try my best but did not solve this?

ItemName Price CreatedDateTime
New Card 50.00 2014-05-26 19:17:09.987
Recharge 110.00 2014-05-26 19:17:12.427
Promo 90.00 2014-05-27 16:17:12.427
Membership 70.00 2014-05-27 16:17:12.427
New Card 50.00 2014-05-26 19:20:09.987
Out Put : Need a query which Sum the sale of Current hour and
sale of item which have maximum sale in that hour in breakdownofSale
Column.
Hour SaleAmount BreakDownOfSale
19 210 Recharge
16 160 Promo
This should do it
create table #t
(
ItemName varchar(50),
Price decimal(18,2),
CreatedDateTime datetime
);
set dateformat ymd;
insert into #t values('New Card', 50.00, '2014-05-26 19:17:09.987');
insert into #t values('Recharge', 110.00, '2014-05-26 19:17:12.427');
insert into #t values('Promo', 90.00, '2014-05-27 16:17:12.427');
insert into #t values('Membership', 70.00, '2014-05-27 16:17:12.427');
insert into #t values('New Card', 50.00, '2014-05-26 19:20:09.987');
with cte as
(
select datepart(hh, CreatedDateTime) as [Hour],
ItemName,
Price,
sum(Price) over (partition by datepart(hh, CreatedDateTime)) SaleAmount,
ROW_NUMBER() over (partition by datepart(hh, CreatedDateTime) order by Price desc) rn
from #t
)
select Hour,
SaleAmount,
ItemName
from cte
where rn = 1
Though i am not clear with the question, based on your desired output, you may use the query as below.
SELECT DATEPART(HOUR,CreatedDateTime) AS Hour, sum(Price) AS Price, ItemName AS BreakDownOfSale from TableName WHERE BY ItemName,DATEPART(HOUR,CreatedDateTime)
Replace table name and column name with the actual one.
Hope this helps!
Here is the sample query.
You can use SQL Server Windows functions to get the result you need.
DECLARE #Table TABLE
(
ItemName NVARCHAR(40),
Price DECIMAL(10,2),
CreatedDatetime DATETIME
)
-- Fill table.
INSERT INTO #Table
( ItemName, Price, CreatedDatetime )
VALUES
( N'New Card' , 50.00 , '2014-05-26 19:17:09.987' ),
( N'Recharge' , 110.00 , '2014-05-26 19:17:12.427' ) ,
( N'Promo' , 90.00 , '2014-05-27 16:17:12.427' ) ,
( N'Membership' , 70.00 , '2014-05-27 16:17:12.427' ) ,
( N'New Card' , 50.00 , '2014-05-26 19:20:09.987' )
-- Check record(s).
SELECT * FROM #Table
-- Get record(s) in required way.
;WITH T1 AS
(
SELECT
DATEPART(HOUR, T.CreatedDatetime) AS Hour,
CONVERT(DATE, T.CreatedDatetime) AS Date,
T.ItemName AS BreakDownOfSales,
-- Date and hour both will give unique record(s)
SUM(Price) OVER (PARTITION BY CONVERT(DATE, T.CreatedDatetime), DATEPART(HOUR, CreatedDateTime)) AS SaleAmount,
ROW_NUMBER() OVER(PARTITION BY CONVERT(DATE, T.CreatedDatetime), DATEPART(HOUR, T.CreatedDatetime) ORDER BY T.Price DESC) AS RN
FROM
#Table T
)
SELECT
T1.Date ,
T1.Hour ,
T1.SaleAmount,
T1.BreakDownOfSales
FROM
T1
WHERE T1. RN = 1
ORDER BY
T1.Hour
Check this simple solution, Please convert it to SQL Server Query.
This will give you perfect result even if you have multiple date data.
SELECT HOUR(CreatedDateTime), SUM(Price),
(SELECT itemname FROM t it WHERE HOUR(ot.CreatedDateTime) = HOUR(it.CreatedDateTime) AND
DATE(ot.CreatedDateTime) = DATE(it.CreatedDateTime)
GROUP BY itemname
ORDER BY price DESC
LIMIT 1
) g
FROM t ot
GROUP BY HOUR(CreatedDateTime);

How to group value within a certain date range or every certain days in SQL server

For example table would be:
Customer OrderDate OrderAmt
-------- ---------- ---------
A1 20140101 920.00
A2 20140111 301.00
A2 20140123 530.00
A1 20140109 800.00
A3 20140110 500.00
A1 20140115 783.00
A3 20140217 500.00
A1 20140219 1650.00
A1 20140225 780.00
...
A3 20140901 637.00
I want to group them and calculate the sum(OrderAmt) within every 20 days and group by customer start from 20140101.
For what it's worth, you can accomplish what you describe with a pretty simple DATEDIFF() / GROUP BY operation, as below: whether or not that is actually what you want might be another question. I suspect that the DateBucket calculation might actually be something else ...
CREATE TABLE #tmpCustomer (Customer VARCHAR(2), OrderDate VARCHAR(10), OrderAmt DECIMAL(6,2))
INSERT INTO #tmpCustomer (Customer, OrderDate, OrderAmt)
SELECT 'A1',20140101, 920.00 UNION
SELECT 'A2',20140111, 301.00 UNION
SELECT 'A2',20140123, 530.00 UNION
SELECT 'A1',20140109, 800.00 UNION
SELECT 'A3',20140110, 500.00 UNION
SELECT 'A1',20140115, 783.00 UNION
SELECT 'A3',20140217, 500.00 UNION
SELECT 'A1',20140219, 1650.00 UNION
SELECT 'A1',20140225, 780.00 UNION
SELECT 'A3',20140901, 637.00
SELECT
Customer,
(DATEDIFF(day, '1/1/2014', CAST(OrderDate AS DATE)) / 20) + 1 AS DateBucket,
SUM(OrderAmt) SumOrderAmt
FROM #tmpCustomer
GROUP BY Customer, (DATEDIFF(day, '1/1/2014', CAST(OrderDate AS DATE)) / 20) + 1
ORDER BY Customer, DateBucket
You need to do two things:
(1) Create some sort of guide hold the '20 day groups' information. A Recursive CTE does this pretty well, and
(2) Recast that varchar date as an actual date for comparison purposes.
Then it's just joining the order data into that daterange grouping and summing the order amounts.
-------------------------
-- Here i'm just recreating your example
-------------------------
DECLARE #customerOrder TABLE (Customer varchar(2), OrderDate varchar(8), OrderAmt decimal(8,2))
INSERT INTO #customerOrder (Customer, OrderDate, OrderAmt)
VALUES
('A1', '20140101', 920.00),
('A2', '20140111', 301.00),
('A2', '20140123', 530.00),
('A1', '20140109', 800.00),
('A3', '20140110', 500.00),
('A1', '20140115', 783.00),
('A3', '20140217', 500.00),
('A1', '20140219', 1650.00),
('A1', '20140225', 780.00),
('A3', '20140901', 637.00)
-------------------------
-- Set up a table that lists off 20 day periods starting from 1/1/2014
-------------------------
DECLARE #startDate datetime, #endDate datetime;
SET #startDate = {d N'2014-01-01'};
SET #endDate = {d N'2014-12-31'};
WITH [dates] ([Sequence], [startDate], [maxExcludedDate]) AS
(SELECT 1 AS [Sequence]
,#startDate AS [startDate]
,DATEADD(d, 20, #startDate) AS [maxExcludedDate]
UNION ALL
SELECT Sequence + 1 AS Sequence
,DATEADD(d, 20, [startDate]) AS [startDate]
,DATEADD(d, 40, [startDate]) AS [maxExcludedDate]
FROM [dates]
WHERE [startDate] < #endDate
)
, dateFrame AS
(
SELECT
[startDate]
,[maxExcludedDate]
FROM [dates]
)
-------------------------
-- Set up a table that holds the orderDates as actual dates
-------------------------
, castData AS
(
SELECT
cast(orderdate as datetime) castDate
,OrderAmt
FROM #customerOrder
)
-------------------------
-- JOIN and sum.
-------------------------
SELECT
[startDate]
, Sum(OrderAmt) perodAmt
FROM
dateFrame df
left join castData cd
on cd.castDate >= df.startDate
and cd.castDate < df.maxExcludedDate
GROUP BY
[startDate]
ORDER by
[startDate]
Assuming that the OrderDate is a numeric field (not varchar). I'm also assuming that you don't need to go much more than a year in the future. If you want the gaps shown, keep the left join, if you don't want the gaps, then make it an inner join. (You can also make the hardcoded date a variable of where to start, I just kept it as the 20140101 that you mentioned.
with Numbers as
(Select 0 as Num
UNION ALL
Select Num+1
FROM Numbers
WHERE Num+1<= 20
)
, DateList AS
(Select Year(DateAdd(dd,20*(Num), Cast('2014-01-01' as date))) * 10000+Month(DateAdd(dd,20*(Num), Cast('2014-01-01' as date)))*100+Day(DateAdd(dd,20*(Num), Cast('2014-01-01' as date))) Groupingdatemin
, Year(DateAdd(dd,20*(Num+1)-1, Cast('2014-01-01' as date)))*10000+ MONTH(DateAdd(dd,20*(Num+1)-1, Cast('2014-01-01' as date)))*100+DAY(DateAdd(dd,20*(Num+1)-1, Cast('2014-01-01' as date))) Groupingdatemax
from Numbers
)
select Customer, sum(orderamt), Groupingdatemin, Groupingdatemax from DateLIst d LEFT JOIN
<yourtable> t on t.orderdate between d.Groupingdatemin and d.Groupingdatemax
group by customer, Groupingdatemin, Groupingdatemax