Select running balance from table credit debit columns - sql

I have a SQL Server 2008 table, and I need to select a running balance from it
TransDate Credit Debit Datasource
------------------------------------------
2014-01-01 5000 NULL 3
2014-01-07 NULL 2000 3
2014-01-11 5000 NULL 3
2014-02-03 6000 NULL 4
2014-02-06 NULL 4000 4
2014-02-11 3000 NULL 4
2014-02-21 NULL 1000 3
2014-02-28 2000 NULL 3
2014-03-01 5000 NULL 3
I tried a correlated query
Select
t.TransDate,
t.Credit,
t.Debit,
(Select sum(coalesce(x.credit, 0) - coalesce(x.debit, 0))
From Transactions x
WHERE x.DataSource IN (3,4) AND (x.TransDate >= '2014/02/01' AND x.TransDate <= '2014/02/28' )
AND x.TransDate = t.TransDate) Balance
From
Transactions t
but I get all nulls for the Balance.
Expected output is
TransDate Credit Debit Balance
------------------------------------------
2014-01-11 NULL NULL 8000 <- opening balance
2014-02-03 6000 NULL 14000
2014-02-06 NULL 4000 10000
2014-02-11 3000 NULL 13000
2014-02-21 NULL 1000 12000
2014-02-28 2000 NULL 15000 <- closing balance

You need to self join table.
CREATE TABLE Test
(
TransDate DATE,
Credit INT,
Debit INT,
);
INSERT INTO Test VALUES
('2014-01-01', 5000, NULL),
('2014-01-07', NULL, 2000),
('2014-01-11', 5000, NULL),
('2014-02-03', 6000, NULL),
('2014-02-06', NULL, 4000),
('2014-02-11', 3000, NULL),
('2014-02-21', NULL, 1000),
('2014-02-28', 2000, NULL),
('2014-03-01', 5000, NULL)
WITH CTE AS
(
SELECT t2.TransDate,
t2.Credit,
t2.Debit,
SUM(COALESCE(t1.credit, 0) - COALESCE(t1.debit, 0)) AS Balance
FROM Test t1
INNER JOIN Test t2
ON t1.TransDate <= t2.TransDate
WHERE t1.DataSource IN (3,4)
GROUP BY t2.TransDate, t2.Credit, t2.Debit
)
SELECT *
FROM CTE
WHERE (TransDate >= '2014/01/11' AND TransDate <= '2014/02/28' )
OUTPUT
TransDate Credit Debit Balance
2014-01-11 5000 (null) 8000
2014-02-03 6000 (null) 14000
2014-02-06 (null) 4000 10000
2014-02-11 3000 (null) 13000
2014-02-21 (null) 1000 12000
2014-02-28 2000 (null) 14000
SQL FIDDLE

I would recommend to doing this:
Data Set
CREATE TABLE Test1(
Id int,
TransDate DATE,
Credit INT,
Debit INT
);
INSERT INTO Test1 VALUES
(1, '2014-01-01', 5000, NULL),
(2, '2014-01-07', NULL, 2000),
(3, '2014-01-11', 5000, NULL),
(4, '2014-02-03', 6000, NULL),
(5, '2014-02-06', NULL, 4000),
(6, '2014-02-11', 3000, NULL),
(7, '2014-02-21', NULL, 1000),
(8, '2014-02-28', 2000, NULL),
(9, '2014-03-01', 5000, NULL)
Solution
SELECT TransDate,
Credit,
Debit,
SUM(isnull(Credit,0) - isnull(Debit,0)) OVER (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Balance
FROM Test1
order by TransDate
OUTPUT
TransDate Credit Debit Balance
2014-01-01 5000 NULL 5000
2014-01-07 NULL 2000 3000
2014-01-11 5000 NULL 8000
2014-02-03 6000 NULL 14000
2014-02-06 NULL 4000 10000
2014-02-11 3000 NULL 13000
2014-02-21 NULL 1000 12000
2014-02-28 2000 NULL 14000
2014-03-01 5000 NULL 19000
Thank You!

Try this:
Select
x.TransDate,
x.Credit,
x.Debit,
SUM(coalesce(y.credit, 0) - coalesce(y.debit, 0)) AS Balance
FROM Transactions x
INNER JOIN Transasctions y
ON y.TransDate <= x.TransDate
AND Y.DataSource IN (3,4)
WHERE x.DataSource IN (3,4)
GROUP BY
x.TransDate,
x.Credit,
x.Debit
Note that, for large datasets this can get bad real quick... might want to use a cursor or try new 'Windowing' functions.
More here: https://brettwgreen.wordpress.com/2012/10/17/sql-cursors-are-slow-except-when-they-arent/

The Following Worked With Me:
WITH tempDebitCredit AS (
Select 0 As Details_ID, null As Creation_Date, null As Reference_ID, 'Brought Forward' As Transaction_Kind, null As Amount_Debit, null As Amount_Credit, isNull(Sum(Amount_Debit - Amount_Credit), 0) 'diff'
From _YourTable_Name
where Account_ID = #Account_ID
And Creation_Date < #Query_Start_Date
Union All
SELECT a.Details_ID, a.Creation_Date, a.Reference_ID, a.Transaction_Kind, a.Amount_Debit, a.Amount_Credit, a.Amount_Debit - a.Amount_Credit 'diff'
FROM _YourTable_Name a
where Account_ID = #Account_ID
And Creation_Date >= #Query_Start_Date And Creation_Date <= #Query_End_Date
)
SELECT a.Details_ID, a.Creation_Date, a.Reference_ID, a.Transaction_Kind,
a.Amount_Debit, a.Amount_Credit, SUM(b.diff) 'Balance'
FROM tempDebitCredit a, tempDebitCredit b
WHERE b.Details_ID <= a.Details_ID
GROUP BY a.Details_ID, a.Creation_Date, a.Reference_ID, a.Transaction_Kind,
a.Amount_Debit, a.Amount_Credit
Order By a.Details_ID Desc
Tested on Microsoft SQL Server

Related

Sum over N days in SQL server

I have below table
AccountID
Date
Amount
123
07/06/2021
2000
123
07/12/2021
9000
123
07/16/2021
500
123
07/20/2021
500
123
07/28/2021
500
I am trying to sum the amount over 5 working days and get the output like below
AccountID
Date
Sum Amount
123
07/06/2021
11000
123
07/12/2021
9500
123
07/16/2021
1000
123
07/20/2021
500
123
07/28/2021
500
Also I am trying to ignore weekends(Saturday and Sunday)
I was able to add over 5 days using the below query. But not able to skip weekends.
Select distinct
t1.accountid,
convert(datetime,t1.[date]),
t1.amount,
sum(t2.amount)
from [dbo].[HANMI_ABRIGO_TRANSACTIONS] t1
cross apply
(
SELECT *
FROM [dbo].[HANMI_ABRIGO_TRANSACTIONS] a
WHERE a.accountid= t1.accountid
AND
(
convert(datetime,a.[date]) < DATEADD(DAY,5,convert(datetime,t1.[date]))
AND
convert(datetime,a.[date]) >= convert(datetime,t1.[date])
)
And a.accountid = '123'
And a.date like '2021-07%'
and a.amount > 0
)t2
where t1.accountid = '123'
And t1.date like '2021-07%'
and t1.amount > 0
group by
t1.accountid,
convert(datetime,t1.[date]),
t1.amount
order by convert(datetime,t1.[date])
Thanks!
I think this is the query you are asking for:
SELECT AccountId, Date,
(
SELECT SUM(Amount)
FROM HANMI_ABRIGO_TRANSACTIONS h2
WHERE
h1.AccountID = h2.AccountID and
DATEPART(WEEKDAY, h2.Date) not in (1, 7) and
h2.Date between h1.Date AND DATEADD(d, 5, h1.Date)
) as SumAmount
FROM HANMI_ABRIGO_TRANSACTIONS h1
The results are:
AccountId
Date
SumAmount
123
2021-07-06
2000
123
2021-07-12
9500
123
2021-07-16
1000
123
2021-07-20
500
123
2021-07-28
500
SQL Fiddle: http://sqlfiddle.com/#!18/3d6bae/8

Dates in Ascending order based on multiple columns for calculating Opening and Closing balance

I am developing an application for a shop. In this business it is possible that in any day there is No item sold but there is expense or bill paid to supplier and vice versa.Formula for calculating closing balance is as:
Closing_Balance = Opening_Balance + Income - Expense - Bill
I have following tables
SupplierPayments
DateOfPayment Bill
2018-06-01 4000
2018-06-01 9000
2018-06-19 2000
2018-06-19 6000
2019-03-28 3000
2019-03-29 5000
Expensis
DateOfExpense Expense
2018-08-14 2,000
2019-02-26 8,000
2019-03-28 2000
2019-03-29 2000
Income
DateSold Income
2018-09-27 24,000
2018-10-17 8,000
2019-01-01 13,000
2019-03-28 10,000
I used following
SQL Server Query
with Income( DateSold, Income ) as (
select DateSold,isnull(sum(TotalBill),0)
from SalesInvoice group by DateSold
), SupplierPayments( DateOfPayment,Bill ) as(
select DateOfPayment,isnull(sum(BillPaidAmount),0)
from SupplyInvoicePaymentHistory group by DateOfPayment
), Expensis( DateOfExpense, Expense ) as(
select Date ,isnull(sum(Amount),0)
from GeneralExpense group by Date
), t as (
select i.DateSold
,e.DateOfExpense
,sp.DateOfPayment
,i.income
, e.Expense
,sp.Bill
, sum(isnull(i.income,0)-(isnull(e.Expense,0)+isnull(sp.Bill,0))) over (order by i.DateSold,e.DateOfExpense,sp.DateOfPayment) as closing_balance
from income i
full outer join expensis e on e.DateOfExpense = i.DateSold
full outer join SupplierPayments sp on sp.DateOfPayment=e.DateOfExpense
)
select m.DateSold
,m.DateOfExpense
,m.DateOfPayment
,isnull(m.opening_balance,0) as Opening_Balance
,isnull(m.Income,0) as Income
,isnull(m.Expense,0) as Expensis
,isnull(m.Bill,0) as SupplierPayments
,isnull(m.closing_balance,0) as Closing_Balance
from (
select DateSold
,DateOfExpense
,DateOfPayment
,lag(closing_balance,1,0) over (order by DateSold, DateOfExpense,DateOfPayment) as opening_balance,Income
, Expense
,closing_balance
,Bill
from t
) as m
Output
DateSold ExpenseDate PaymentDate Opening Income Expense Bill Closing
NULL NULL 2018-06-01 0 0 0 4,000 -4,000
NULL NULL 2018-06-19 -4000 0 0 2,000 -6,000
NULL 2018-08-14 NULL -6,000 0 2,000 0 -8,000
NULL 2019-02-26 NULL -8,000 0 8,000 0 -16,000
NULL 2019-03-29 2019-03-29 -16,000 0 2000 5000 -23,000
2018-09-27 NULL NULL -23,000 24,000 0 0 1,000
2018-10-17 NULL NULL 1,000 8,000 0 0 9,000
2019-01-01 NULL NULL 9,000 13,000 0 0 22,000
2019-03-28 2019-03-28 2019-03-28 22,000 10,000 2000 3000 27,000
The Closing balance is wrong because of the ordering of Dates Columns. i want the following output in which Date is in ascending order based on 3 dates columns
Required Result
DateSold ExpenseDate PaymentDate Opening Income Expense Bill Closing
NULL NULL 2018-06-01 0 0 0 4,000 -4,000
NULL NULL 2018-06-19 -4000 0 0 2,000 -6,000
NULL 2018-08-14 NULL -6,000 0 2,000 0 -8,000
2018-09-27 NULL NULL -8,000 24,000 0 0 16,000
2018-10-17 NULL NULL 16,000 8,000 0 0 24,000
2019-01-01 NULL NULL 24,000 13,000 0 0 37,000
NULL 2019-02-26 NULL 37,000 0 8,000 0 29,000
2019-03-28 2019-03-28 2019-03-28 29,000 10,000 2000 3000 34,000
NULL 2019-03-29 2019-03-29 34,000 0 2000 5000 29,000
I think union all with group by might be a better approach:
select dte, sum(bill) as bill, sum(expense) as expense,
sum(income) as income,
sum(income - expense - bill) over (order by dte) - (income - expense - bill) as opening_balance
sum(income - expense - bill) over (order by dte) as closing_balance
from ((select DateOfPayment as dte, Bill, 0 as expense, 0 as income
from SupplierPayments
) union all
(select DateOfExpense, 0, Expense, 0 as income
from expenses
) union all
(select datesold, 0, 0, income
from income
)
) d
group by dte
order by dte;
The query is a little simpler than a full join query, because you don't have to deal with so many NULL values. More importantly, if one of the tables has two entries on the same date, this produces the correct answer.
Sample data:
declare #SupplierPayments table(DateOfPayment date, Bill int);
insert into #SupplierPayments values
('2018-06-01', 4000),
('2018-06-19', 2000),
('2019-03-28', 3000),
('2019-03-29', 5000);
declare #Expensis table(DateOfExpense date, Expense int);
insert into #Expensis values
('2018-08-14',2000),
('2019-02-26',8000),
('2019-03-28',2000),
('2019-03-29',2000);
declare #Income table(DateSold date, Income int);
insert into #Income values
('2018-09-27',24000),
('2018-10-17',8000),
('2019-01-01',13000),
('2019-03-28',10000);
To get Closing column it's enough to use your formula per each row (without need of Opening column).
Then, to get Closing values, it's enough to use cumulative sum on column with that formula (just take a look at the query).
Cumulative sum can be easily achieved with sum function with over clause with order by.
select EventDate, DateOfExpense, DateOfPayment, DateSold, Income, Expense, Bill,
sum(Income - Expense - Bill)
over (order by EventDate rows between unbounded preceding and 1 preceding) Opening,
sum(Income - Expense - Bill)
over (order by EventDate) Closing
from (
select coalesce(coalesce(DateOfPayment, DateOfExpense), DateSold) EventDate, *
from (
select DateOfPayment, sum(coalesce(Bill, 0)) Bill from #SupplierPayments group by DateOfPayment
) sp
full join (
select DateOfExpense, sum(coalesce(Expense, 0)) Expense from #Expensis group by DateOfExpense
) e on sp.DateOfPayment = e.DateOfExpense
full join (
select DateSold, sum(coalesce(Income, 0)) Income from #Income group by DateSold
) i on sp.DateOfPayment = i.DateSold
) a order by EventDate

Group By first day of month and join with a separate table

I have 2 tables in SQL
one with monthly sales targets:
Date Target
1/7/17 50000
1/8/17 65000
1/9/17 50000
1/10/17 48000
etc...
the other with sales orders:
TxnDate JobNum Value
3/7/17 100001 20000
3/7/17 100002 11000
8/7/17 100003 10000
10/8/17 100004 15000
15/9/17 100005 20000
etc...
what I want is a table with following:
Date Target Sales
1/7/17 50000 41000
1/8/17 65000 15000
1/9/17 50000 20000
please help me I'm a newbie to coding and this is doing my head in.. :)
Assuming your 1st table is targetSales and your 2nd table is Sales and your database is SQL Server:
select
t.date
, t.target
, isnull(sum(s.value), 0) as Sales
from targetSales t
left join Sales s
on (month(t.date) = month(s.date)
and year(t.date) = year(s.date))
group by t.date
, t.target
You can follow a similar approach if you use a different database, just find the equivalents of month() and year() functions for your RDBMS.
try this
select tb1.date,tb1.target,tb2.value from table1 as tb1
INNER JOIN (select sum(value) as sales, date from table2 group by date) as tb2
on tb1.date = tb2.date,
you can use this script for daily targets
An another way around, looks like in target table the date is always the first day of the month. So in the sales table, just round the TxnDate column value to first day of the month.
Query
select t1.[date],
max(t1.[target]) as [target],
coalesce(sum(t2.[value]), 0) as [value]
from [targets] t1
left join [sales] t2
on t1.[Date] = dateadd(day, - datepart(day, t2.[txnDate]) + 1, t2.[txnDate])
group by t1.[Date];
demo
If you take any datetime value in SQL Server, calculate the number of months from that date to zero datediff(month,0,TxnDate) then add that number of moths to zero dateadd(month, ... , 0) you get the first day of the month for the original datetime value. This works in all versions of SQL Server. With this we can sum the values of the orders by the first day of the month, then join to targets using that date.
CREATE TABLE Orders
([TxnDate] datetime, [JobNum] int, [Value] int)
;
INSERT INTO Orders
([TxnDate], [JobNum], [Value])
VALUES
('2017-07-03 00:00:00', 100001, 20000),
('2017-07-03 00:00:00', 100002, 11000),
('2017-07-08 00:00:00', 100003, 10000),
('2017-08-10 00:00:00', 100004, 15000),
('2017-09-15 00:00:00', 100005, 20000)
;
CREATE TABLE Targets
([Date] datetime, [Target] int)
;
INSERT INTO Targets
([Date], [Target])
VALUES
('2017-07-01 00:00:00', 50000),
('2017-08-01 00:00:00', 65000),
('2017-09-01 00:00:00', 50000),
('2017-10-10 00:00:00', 48000)
;
GO
9 rows affected
select dateadd(month,datediff(month,0,TxnDate), 0) month_start, sum(Value) SumValue
from Orders
group by dateadd(month, datediff(month,0,TxnDate), 0)
GO
month_start | SumValue
:------------------ | -------:
01/07/2017 00:00:00 | 41000
01/08/2017 00:00:00 | 15000
01/09/2017 00:00:00 | 20000
select
t.[Date], t.Target, coalesce(o.SumValue,0)
from targets t
left join (
select dateadd(month,datediff(month,0,TxnDate), 0) month_start, sum(Value) SumValue
from Orders
group by dateadd(month, datediff(month,0,TxnDate), 0)
) o on t.[Date] = o.month_start
GO
Date | Target | (No column name)
:------------------ | -----: | ---------------:
01/07/2017 00:00:00 | 50000 | 41000
01/08/2017 00:00:00 | 65000 | 15000
01/09/2017 00:00:00 | 50000 | 20000
10/10/2017 00:00:00 | 48000 | 0
dbfiddle here
This is not the best solution but this will give you a correct result.
select date,target,(
select sum(value)
from sales_orders s
where datepart(m,s.TxnDate) = datepart(m,targets.Date)
and datepart(year,s.TxnDate) = datepart(year,targets.Date)
) as sales
from targets

Sql Server Select Statement, Cumulative column

I have the select statement below which is used in a gridview datasource. I need the cumulative column to be calculated on the ProjInstDate like I have below. But if you see the results, I get duplicates on the cumulative totals for the same date, which makes sense. But what I can't figure out is how to have the cumulative be calculated on the ProjInstDate but do it for the different JobIDs. I'm not sure what the select statement is supposed to be like at this point "(t1.ProjectedInstDate >= t2.ProjectedInstDate) ". I know that right here is the issue but I'm not sure how to properly do what I want.
select t1.JobID
, t1.JobName
, t1.[JobDescription]
, t1.JobType
, Convert(varchar, t1.QuoteDate, 101) AS QuoteDate
, t1.ProjectedInstDate
, Convert(decimal(12, 0), t1.QuoteTotal) AS QuoteTotal
, Convert(decimal(12, 0), t1.Prob) AS Prob
, t1.Completed
, t1.ProjectManager
, Convert(decimal(12, 0), t1.ProjectedIncome) AS ProjectedIncome
, sum(CASE WHEN t1.Completed != 'y' and t1.Prob != 0
then Convert(decimal(12, 0),t2.ProjectedIncome)
else null end) As CumulativeProjectedIncome
, Convert(decimal(12, 1), sum(CASE WHEN t1.Completed != 'y' and t1.Prob != 0
then (t2.ProjectedIncome/1923)
else null end)) as DAYSOFWORK
, (CASE WHEN t1.Completed != 'y'and t1.Prob != 0
then Convert(decimal(12, 0), (datediff(day, GetDate(), t1.ProjectedInstDate)))
else null end) AS DAYSFROMTODAY
, (CASE WHEN t1.Completed != 'y' and t1.Prob != 0
then Convert(decimal(12, 1), sum(t2.ProjectedIncome/1923)-datediff(day, GetDate(), t1.ProjectedInstDate))
else null end) as DAYSOFREVASOFTODAY
from ContractPlan t1
inner join
ContractPlan t2
on (t1.ProjectedInstDate >= t2.ProjectedInstDate)
where (t2.Completed = 'n' or t2.Completed = 'N')
and (t1.Completed = 'N' or t1.Completed = 'n')
Group By
t1.JobID
, t1.JobName
, t1.[JobDescription]
, t1.JobType
, t1.QuoteDate
, t1.ProjectedInstDate
, t1.QuoteTotal
, t1.Prob
, t1.ProjectedIncome
, t1.Completed
, t1.ProjectManager
Order By
t1.ProjectedInstDate
ProjInstDate | QuoteTotal | Prob |Comp | PM| ProjectedIncome | CumulativeIncome
2014-04-16 00:00:00 300 100 N NULL 300 300
2014-04-17 00:00:00 6000 100 N NULL 6000 6300
2014-04-18 00:00:00 300 100 N NULL 300 6600
2014-05-01 00:00:00 9756 100 N NULL 9756 63637
2014-05-01 00:00:00 28796 75 N NULL 21597 63637
2014-05-01 00:00:00 11179 50 N NULL 5590 63637
2014-05-01 00:00:00 20094 100 N NULL 20094 63637
2014-05-04 00:00:00 2222 90 N ben 2000 67637
2014-05-04 00:00:00 2222 90 N ben 2000 67637
2014-05-05 00:00:00 23698 100 N daved 23698 98835
2014-05-05 00:00:00 6000 100 N sdf 6000 98835
2014-05-05 00:00:00 1500 100 N NULL 1500 98835
2014-05-07 00:00:00 4500 100 N NULL 4500 103335
2014-05-09 00:00:00 750 100 N NULL 750 104085
2014-05-13 00:00:00 540 100 N NULL 540 104625

Finding Duplicate Orders (by time proximity)

I have a table of orders that I know have duplicates
customer order_number order_date
---------- ------------ -------------------
1 1 2012-03-01 01:58:00
1 2 2012-03-01 02:01:00
1 3 2012-03-01 02:03:00
2 4 2012-03-01 02:15:00
3 5 2012-03-01 02:18:00
3 6 2012-03-01 04:30:00
4 7 2012-03-01 04:35:00
5 8 2012-03-01 04:38:00
6 9 2012-03-01 04:58:00
6 10 2012-03-01 04:59:00
I want to find all duplicates (order by same customer within 60 minutes of eachother). Either a resultset consisting of the 'duplicate' rows or a set of all customers with a count of how many duplicates.
Here is what I have tried
SELECT
customer,
count(*)
FROM
orders
GROUP BY
customer,
DATEPART(HOUR, order_date)
HAVING (count(*) > 1)
This doesn't work when duplicates are within 60 minutes of each other but are in different hours i.e 1:58 and 2:02
I've also tried this
SELECT
o1.customer,
o1.order_number,
o2.order_number,
DATEDIFF(MINUTE,o1.order_date, o2.order_date) AS [diff]
FROM
orders o1 LEFT OUTER JOIN
orders o2 ON o1.customer = o2.customer AND o1.order_number <> o2.order_number
WHERE
ABS(DATEDIFF(MINUTE,o1.order_date, o2.order_date)) < 60
Now this gives me all of the duplicates but it also gives me multiple rows per duplicate order. i.e (o1, o2) and (o2, o1) which wouldn't be so bad if there were'nt some orders with multiple duplicates. In those cases I get (o1, o2), (o1,o3), (o2, o1), (o2, o3), (o3, o1), (o3, o2) etc. I get all of the permutations.
Anyone have some insight? I'm not necessarily looking for the best performing answer here, just one that works.
SELECT
*,
CASE WHEN EXISTS (SELECT *
FROM orders AS lookup
WHERE customer = orders.customer
AND order_date < orders.order_date
AND order_date >= DATEADD(hour, -1, order_date)
)
THEN 'Principle Order'
ELSE 'Duplicate Order'
END as Order_Status
FROM
orders
Using EXISTS and a correlated sub-query you can check if there were any preceding orders in the last hour.
Maybe something like this:
Test data:
DECLARE #tbl TABLE(customer INT,order_number INT,order_date DATETIME)
INSERT INTO #tbl
VALUES
(1,1,'2012-03-01 01:58:00'),
(1,2,'2012-03-01 02:01:00'),
(1,3,'2012-03-01 02:03:00'),
(2,4,'2012-03-01 02:15:00'),
(3,5,'2012-03-01 02:18:00'),
(3,6,'2012-03-01 04:30:00'),
(4,7,'2012-03-01 04:35:00'),
(5,8,'2012-03-01 04:38:00'),
(6,9,'2012-03-01 04:58:00'),
(6,10,'2012-03-01 04:59:00')
Query
;WITH CTE
AS
(
SELECT
MIN(datediff(minute,'1990-1-1',order_date)) OVER(PARTITION BY customer) AS minDate,
datediff(minute,'1990-1-1',order_date) AS DateTicks,
tbl.customer
FROM
#tbl AS tbl
)
SELECT
CTE.customer,
SUM(CASE WHEN (CTE.DateTicks-CTE.minDate)<60 THEN 1 ELSE 0 END)
FROM
CTE
GROUP BY
CTE.customer
The following query identifies all possible permutations of orders within the proximity of 60 minutes of one another:
DECLARE #orders TABLE (CustomerId INT, OrderId INT, OrderDate DATETIME)
INSERT INTO #orders
VALUES
(1, 1, '2012-03-01 01:58:00'),
(1, 2, '2012-03-01 02:01:00'),
(1, 3, '2012-03-01 02:03:00'),
(2, 4, '2012-03-01 02:15:00'),
(3, 5, '2012-03-01 02:18:00'),
(3, 6, '2012-03-01 04:30:00'),
(4, 7, '2012-03-01 04:35:00'),
(5, 8, '2012-03-01 04:38:00'),
(6, 9, '2012-03-01 04:58:00'),
(6, 10, '2012-03-01 04:59:00');
with ProximityOrderCascade(CustomerId, OrderId, ProximateOrderId, MinutesDifference, OrderDate, ProximateOrderDate)
as
(
select o.customerid, o.orderid, null, null, o.orderdate, o.orderdate
from #orders o
union all
select o.customerid, o.orderid, p.orderid, datediff(minute, p.OrderDate, o.OrderDate), o.OrderDate, p.OrderDate
from ProximityOrderCascade p
inner join #orders o
on p.customerid = o.customerid
and abs(datediff(minute, p.OrderDate, o.OrderDate)) between 0 and 60
and o.orderid <> p.orderid
where proximateorderid is null
)
select * from ProximityOrderCascade
where
not ProximateOrderId is null
From there, you can transform the results into a query of your choice. The results of this function identify only customers 1 and 6 as having 'duplicate' orders.
CustomerId OrderId ProximateOrderId MinutesDifference OrderDate ProximateOrderDate
----------- ----------- ---------------- ----------------- ----------------------- -----------------------
6 9 10 -1 2012-03-01 04:58:00.000 2012-03-01 04:59:00.000
6 10 9 1 2012-03-01 04:59:00.000 2012-03-01 04:58:00.000
1 1 3 -5 2012-03-01 01:58:00.000 2012-03-01 02:03:00.000
1 2 3 -2 2012-03-01 02:01:00.000 2012-03-01 02:03:00.000
1 1 2 -3 2012-03-01 01:58:00.000 2012-03-01 02:01:00.000
1 3 2 2 2012-03-01 02:03:00.000 2012-03-01 02:01:00.000
1 2 1 3 2012-03-01 02:01:00.000 2012-03-01 01:58:00.000
1 3 1 5 2012-03-01 02:03:00.000 2012-03-01 01:58:00.000
(8 row(s) affected)