Related
I have a table in SQL Server that contains customers' transactions From 2022-02-10 to 2022-03-10.
I want to find customers that have at least 5 transactions on At most three consecutive days
For example, output of below table should be CustomerId = 2 and customerid=3
Id
CustomerId
Transactiondate
1
1
2022-03-01
2
1
2022_03_01
3
1
2022_03_05
4
1
2022_03_07
5
1
2022_03_07
6
2
2022_03_05
7
2
2022_03_05
8
2
2022_03_06
9
2
2022_03_06
10
2
2022_03_07
1
3
2022-03-01
2
3
2022_03_01
3
3
2022_03_01
4
3
2022_03_03
5
3
2022_03_03
I tried this query but it doesn't have good performance for a large table:
select distinct p1.customerid
from trntbl p1
join trntbl p2 on p2.id <> p1.id
and p2.customerid = p1.customerid
and p2.TransactionDate >= p1.TransactionDate
and p2.TransactionDate < date_add(day, 3, p1.prchasedate)
group by p1.customerid, p1.id
having count(*) >= 4
If customers must have done transactions in three consecutive days (meaning that 5 transactions in a day then nothing in the next two days wouldn't count), then this can be done with two self joins:
with cte as
(select CustomerId, Transactiondate, count(*) ct
from table_name
group by CustomerId, Transactiondate)
select distinct t1.CustomerId
from cte t1 inner join cte t2
on t1.Transactiondate = dateadd(day, 1, t2.Transactiondate)
and t1.CustomerId = t2.CustomerId
inner join cte t3
on t2.Transactiondate = dateadd(day, 1, t3.Transactiondate)
and t3.CustomerId = t2.CustomerId
;
Fiddle
Although this is a gaps-and-islands problem, there are shortcuts you can take.
You can group it up by date, then get the row 2 previous, and filter by only rows where the 2 previous row is exactly two days apart.
SELECT DISTINCT
CustomerId
FROM (
SELECT
t.CustomerId,
v.Date,
Prev2 = LAG(v.Date, 2) OVER (PARTITION BY t.CustomerId ORDER BY v.Date)
FROM YourTable t
CROSS APPLY (VALUES( CAST(Transactiondate AS date) )) v(Date)
GROUP BY
t.CustomerId,
v.Date
) t
WHERE DATEDIFF(day, t.Prev2, t.Date) = 2
db<>fiddle
If the base table only has a maximum of one row per date then you can forgo the GROUP BY.
This is actually a gaps and islands problem, you can solve by using analytic window functions to subtract sequential row_number from consecutive days and then grouping, after first "plugging" any gaps with the help of a numbers table.
with numbers as (select top(20) Row_Number() over(order by (select null))-1 n from master.dbo.spt_values),
dRanges as (
select customerId,
Min(Transactiondate) CustStartDate,
Max(Transactiondate) CustEndDate
from t
group by CustomerId
), dates as (
select *
from dranges r
outer apply (
select DateAdd(day,n,r.CustStartDate) SeqDate
from numbers n
where DateAdd(day,n,r.CustStartDate) < = r.CustEndDate
)d
), q as (
select customerId, transactiondate, Count(*) qty
from t
group by CustomerId, Transactiondate
), g as (
select d.CustomerId, d.SeqDate, IsNull(q.qty,0)Qty,
DateAdd(day, - row_number() over (partition by d.customerid order by d.SeqDate), d.SeqDate) as dGrp
from dates d
left join q on q.Transactiondate = d.SeqDate and q.CustomerId = d.CustomerId
)
select customerId
from g
group by CustomerId, dGrp
having Count(*) <= 3 and Sum(qty) >= 5
DB<>Fiddle
You could make use of datediff function and verify if the sum of the date differences are between 3 and 5 (provided the max of the differences is just 1) since the dates might be unique (for example customerid 2 can have transaction dates as 5,6,7,8,9 of March 2022) and this should be taken into account too.
declare #tbl table(id int identity,customerid int,transactiondate date)
insert into #tbl(customerid,transactiondate)
values(1,'2022-03-01')
,(1,'2022-03-01')
,(1,'2022-03-05')
,(1,'2022-03-07')
,(1,'2022-03-07')
,(2,'2022-03-05')
,(2,'2022-03-05')
,(2,'2022-03-06')
,(2,'2022-03-06')
,(2,'2022-03-07')
select customerid from (
select *
,SUM(datediff)over(partition by customerid order by transactiondate)[sum]
,max(datediff)over(partition by customerid order by transactiondate)[max]
from(
select customerid , transactiondate,
DATEDIFF(DAY
,
case when LEAD(transactiondate,1)over(partition by customerid order by transactiondate)
is null then
LAG(transactiondate,1,transactiondate)
over(partition by customerid order by transactiondate)
else
transactiondate end
, case when LEAD(transactiondate,1)over(partition by customerid order by transactiondate)
is null then
transactiondate
else
LEAD(transactiondate,1,transactiondate)
over(partition by customerid order by transactiondate)end) as [datediff]
,ROW_NUMBER()over(partition by customerid order by transactiondate)rownum
from #tbl
)t
)t1
where t1.rownum = 5
and t1.max = 1
and t1.sum between 3 and 5
I have data like this. first row of Id 1 from particular time period and second row of id 1 is another time period. so now want to combined id and name which are same in the two time periods reaming are same.if there is no orders from that time period its should be display 0 or null.
Id Name Qty Price
----------------------
1 Rose 4 540
1 Rose 1 640
2 Lilly 5 550
2 Lilly 18 360
3 Grand 2 460
3 Grand 10 360
4 lotus 0 0
4 Lotus 9 580
now I want data like this..
Id Name Qty Price
4 540
1 rose
1 640
5 550
2 Lilly
18 360
2 460
3 Grand
10 360
0 0
4 Lotus
9 580
This is my procedure
create PROCEDURE [dbo].[Sp_Orders]
(
#Startdate varchar(30),
#Enddate varchar(30),
#Startdate1 varchar(30),
#Enddate1 varchar(30)
)
--[Sp_Orders] '03/01/2016','03/15/2016','02/01/2016','02/28/2016'
AS
BEGIN
---First Duration----
SELECT DISTINCT
op.ProductId as id, op.Price as Prc,
sc.SubCategoryName as ScName,
COUNT(op.ProductId) AS Qty,
ROUND(SUM(op.Price * op.Quantity), 0) AS Revenue,
FROM
orderdetails od
INNER JOIN
(SELECT DISTINCT
Orderid, Productid, ProductFeatures, Price, Quantity
FROM
OrderProducts) op ON od.Orderid = op.Orderid
INNER JOIN
products p ON p.productid = op.productid
INNER JOIN
subcategory sc ON sc.subcategoryid = p.subcategoryid
WHERE
CONVERT(datetime, CONVERT(varchar(50), od.DeliveryDate, 101)) BETWEEN #Startdate AND #Enddate
GROUP BY
op.ProductID, op.Price, sc.SubCategoryName
---Second Duration----
SELECT DISTINCT
op.ProductID AS id, op.Price AS Prc,
sc.SubCategoryName AS ScName,
COUNT(op.ProductId) AS Qty,
ROUND(SUM(op.Price * op.Quantity), 0) AS Revenue,
FROM
orderdetails od
INNER JOIN
(SELECT DISTINCT
Orderid, Productid, ProductFeatures, Price, Quantity
FROM
OrderProducts) op ON od.Orderid = op.Orderid
INNER JOIN
products p ON p.productid = op.productid
INNER JOIN
subcategory sc ON sc.subcategoryid = p.subcategoryid
WHERE
CONVERT(datetime, CONVERT(varchar(50),od.DeliveryDate,101)) BETWEEN #Startdate1 AND #Enddate1
GROUP BY
op.ProductID, op.Price, sc.SubCategoryName
END
From what I understood from your Question and Comments:
Schema for your case
SELECT * INTO #TAB FROM(
SELECT 1 ID, 'ROSE' NAME, 4 QTY, 540 PRICE
UNION ALL
SELECT 1 , 'ROSE' , 1 , 640
UNION ALL
SELECT 2 , 'LILLY' , 5 , 550
UNION ALL
SELECT 2 , 'LILLY' , 18 ,360
UNION ALL
SELECT 3 , 'GRAND' , 2 , 460
UNION ALL
SELECT 3 , 'GRAND' , 10 ,360
UNION ALL
SELECT 4 , NULL,NULL,NULL
UNION ALL
SELECT 4 , 'LOTUS' , 9 , 580
) AS A
And the Logic to display is as below
SELECT CASE WHEN SNO=1 THEN CAST(ID AS VARCHAR(250)) ELSE '' END ID,
CASE WHEN SNO=1 THEN ISNULL(NAME,'') ELSE '' END NAME,ISNULL(Qty,0)Qty
,ISNuLL(Price,0)Price FROM (
SELECT ROW_NUMBER() Over(partition by Name, Id ORDER BY (SELECT 1)) SNO
,ID, NAME , Qty, Price, ID AS ID2 FROM #TAB
)AS A
ORDER BY ID2, NAME DESC
Try this from your Procedure. And may need to do type cast based on your actual datatypes
CREATE PROCEDURE [DBO].[SP_ORDERS]
(
#STARTDATE VARCHAR(30),
#ENDDATE VARCHAR(30),
#STARTDATE1 VARCHAR(30),
#ENDDATE1 VARCHAR(30)
)
--[SP_ORDERS] '03/01/2016','03/15/2016','02/01/2016','02/28/2016'
AS
BEGIN
SELECT CASE WHEN SNO=1 THEN CAST(ID AS VARCHAR(250)) ELSE '' END ID,CASE WHEN SNO=1 THEN ISNULL(SCNAME,'') ELSE '' END NAME,ISNULL(QTY,0)QTY,ISNULL(REVENUE,0)PRICE FROM (
SELECT ROW_NUMBER() OVER(PARTITION BY SCNAME, ID ORDER BY (SELECT 1)) SNO, ID, SCNAME , QTY, REVENUE, ID AS ID2 FROM (
SELECT DISTINCT OP.PRODUCTID AS ID,OP.PRICE AS PRC,SC.SUBCATEGORYNAME AS SCNAME,COUNT(OP.PRODUCTID) AS QTY, ROUND(SUM(OP.PRICE * OP.QUANTITY), 0) AS REVENUE
FROM ORDERDETAILS OD INNER JOIN
(SELECT DISTINCT ORDERID,PRODUCTID,PRODUCTFEATURES,PRICE,QUANTITY FROM ORDERPRODUCTS ) OP ON OD.ORDERID=OP.ORDERID
INNER JOIN PRODUCTS P ON P.PRODUCTID=OP.PRODUCTID
INNER JOIN SUBCATEGORY SC ON SC.SUBCATEGORYID=P.SUBCATEGORYID
WHERE CONVERT(DATETIME,CONVERT(VARCHAR(50),OD.DELIVERYDATE,101)) BETWEEN #STARTDATE AND #ENDDATE
GROUP BY OP.PRODUCTID,OP.PRICE,SC.SUBCATEGORYNAME
---SECOND DURATION----
UNION ALL --ADDED NOW
SELECT DISTINCT OP.PRODUCTID AS ID,OP.PRICE AS PRC,SC.SUBCATEGORYNAME AS SCNAME,COUNT(OP.PRODUCTID) AS QTY, ROUND(SUM(OP.PRICE * OP.QUANTITY), 0) AS REVENUE
FROM ORDERDETAILS OD INNER JOIN
(SELECT DISTINCT ORDERID,PRODUCTID,PRODUCTFEATURES,PRICE,QUANTITY FROM ORDERPRODUCTS ) OP ON OD.ORDERID=OP.ORDERID
INNER JOIN PRODUCTS P ON P.PRODUCTID=OP.PRODUCTID
INNER JOIN SUBCATEGORY SC ON SC.SUBCATEGORYID=P.SUBCATEGORYID
WHERE CONVERT(DATETIME,CONVERT(VARCHAR(50),OD.DELIVERYDATE,101)) BETWEEN #STARTDATE1 AND #ENDDATE1
GROUP BY OP.PRODUCTID,OP.PRICE,SC.SUBCATEGORYNAME
)
AS A
)B
ORDER BY ID2, NAME
END
Based on your sample data i have given this Out put but if the data is inconsistent it may not give accurate results if you see the Expected Output it gives exact same
Declare #Table1 TABLE
(Id VARCHAR(10), Name varchar(5),Qty VARCHAR(10), Price varchar(10))
;
INSERT INTO #Table1
(Id, Name,Qty, Price)
VALUES
(1, 'Rose',4, 540),
(1, 'Rose',1, 640),
(2, 'Lilly',5, 550),
(2, 'Lilly',18, 360),
(3, 'Grand',2, 460),
(3, 'Grand',10, 360),
(4,'Lotus',0,0),
(4, 'Lotus',9, 580)
;
SCRIPT
;WITH CTE AS (
Select
CASE WHEN RN = 1 THEN ID ELSE NULL END ID,
CASE WHEN RN = 1 THEN Name ELSE NULL END NAME,
Qty,
Price
from (
select
Id,
Name,
Qty,
Price,
ROW_NUMBER()OVER(PARTITION BY ID,NAME ORDER BY NAME)RN
FROM
#Table1)T)
Select CASE WHEN RN = 2 THEN T.Id ELSE '' END ID,
CASE WHEN RN = 2 THEN T.Name ELSE '' END Name,
CASE WHEN RN IN (1,3) THEN ISNULL(T.Qty,0) ELSE '' END qty,
CASE WHEN RN IN (1,3) THEN ISNULL(T.Price,0) ELSE '' END qty from (
Select
T.ID,
T.NAME,
c.Qty,
C.Price,
ROW_NUMBER()OVER(PARTITION BY T.ID,T.NAME ORDER BY T.NAME)RN
from #Table1 T
INNER JOIN CTE C
ON T.Id = C.ID
AND T.Name = C.NAME
OR (T.Qty = C.Qty OR T.Price = C.Price ))T
WHERE T.RN <> 4
I have the following table:
custid custname channelid channel dateViewed
--------------------------------------------------------------
1 A 1 ABSS 2016-01-09
2 B 2 STHHG 2016-01-19
3 C 4 XGGTS 2016-01-09
6 D 4 XGGTS 2016-01-09
2 B 2 STHHG 2016-01-26
2 B 2 STHHG 2016-01-28
1 A 3 SSJ 2016-01-28
1 A 1 ABSS 2016-01-28
2 B 2 STHHG 2016-02-02
2 B 7 UUJKS 2016-02-10
2 B 8 AKKDC 2016-02-10
2 B 9 GGSK 2016-02-10
2 B 9 GGSK 2016-02-11
2 B 7 UUJKS 2016-02-27
And I want the results to be:
custid custname month count
------------------------------
1 A 1 1
2 B 1 1
2 B 2 4
3 C 1 1
6 D 1 1
According to the following rules:
All channel views subscription is billed every 15 days. If the
customer viewed the same channel within the 15 days, he will only be
billed once for that channel. For instance, custid 2, custname B his billing cycle is 19 Jan - 3 Feb (one billing cycle), 4 Feb - 20 Feb (one billing cycle) and so on. Therefore, he is billed only 1 time in Jan since he watch the same channel throughout the billing cycle; and he is billed 4 times in Feb for watching (channelid 7, 8, 9) and channelid 7 watched on 27 Feb (since this falls in another billing cycle, customer B is also charged here). Customer B is not charged on 2 Feb for watching channel 2 since he was already billed in 19 jan - 3 Feb billing cycle.
An invoice is generated every month for each customer, therefore, the
results should show the 'Month' and the 'Count' of the channels
viewed for each customer.
Can this be done in SQL server?
;WITH cte AS (
SELECT custid,
custname,
channelid,
channel,
dateViewed,
CAST(DATEADD(day,15,dateViewed) as date) as dateEnd,
ROW_NUMBER() OVER (PARTITION BY custid, channelid ORDER BY dateViewed) AS rn
FROM (VALUES
(1, 'A', 1, 'ABSS', '2016-01-09'),(2, 'B', 2, 'STHHG', '2016-01-19'),
(3, 'C', 4, 'XGGTS', '2016-01-09'),(6, 'D', 4, 'XGGTS', '2016-01-09'),
(2, 'B', 2, 'STHHG', '2016-01-26'),(2, 'B', 2, 'STHHG', '2016-01-28'),
(1, 'A', 3, 'SSJ', '2016-01-28'),(1, 'A', 1, 'ABSS', '2016-01-28'),
(2, 'B', 2, 'STHHG', '2016-02-02'),(2, 'B', 7, 'UUJKS', '2016-02-10'),
(2, 'B', 8, 'AKKDC', '2016-02-10'),(2, 'B', 9, 'GGSK', '2016-02-10'),
(2, 'B', 9, 'GGSK', '2016-02-11'),(2, 'B', 7, 'UUJKS', '2016-02-27')
) as t(custid, custname, channelid, channel, dateViewed)
), res AS (
SELECT custid, channelid, dateViewed, dateEnd, 1 as Lev
FROM cte
WHERE rn = 1
UNION ALL
SELECT c.custid, c.channelid, c.dateViewed, c.dateEnd, lev + 1
FROM res r
INNER JOIN cte c ON c.dateViewed > r.dateEnd and c.custid = r.custid and c.channelid = r.channelid
), final AS (
SELECT * ,
ROW_NUMBER() OVER (PARTITION BY custid, channelid, lev ORDER BY dateViewed) rn,
DENSE_RANK() OVER (ORDER BY custid, channelid, dateEnd) dr
FROM res
)
SELECT b.custid,
b.custname,
MONTH(f.dateViewed) as [month],
COUNT(distinct dr) as [count]
FROM cte b
LEFT JOIN final f
ON b.channelid = f.channelid and b.custid = f.custid and b.dateViewed between f.dateViewed and f.dateEnd
WHERE f.rn = 1
GROUP BY b.custid,
b.custname,
MONTH(f.dateViewed)
Output:
custid custname month count
----------- -------- ----------- -----------
1 A 1 3
2 B 1 1
2 B 2 4
3 C 1 1
6 D 1 1
(5 row(s) affected)
I don't know why you get 1 in count field for customer A. He got:
ABSS 2016-01-09 +1 to count (+15 days = 2016-01-24)
SSJ 2016-01-28 +1 to count
ABSS 2016-01-28 +1 to count (28-01 > 24.01)
So in January there must be count = 3.
Whenever I am trying to count things with complex criteria, I use a sum and case statement. Something like below:
SELECT custid, custname,
SUM(CASE WHEN somecriteria
THEN 1
ELSE 0
END) As CriteriaCount
FROM whateverTable
GROUP BY custid, custname
You can make that somecriteria variable as complicated a statement as you like, so long as it returns a boolean. If it passes, this row returns a 1. If it fails, the row reutrns a 0, then we sum up the values returned to get the count.
Generally this is how you can get any number (10 in this example) of fixed 15 day intervals starting at the given date (#dd in this example).
DECLARE #dd date = CAST('2016-01-19 17:30' AS DATE);
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1),
E2(N) AS (SELECT 1 FROM E1 a, E1 b),
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10,000 rows max
tally(N) AS (SELECT TOP (10) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4)
SELECT
startd = DATEADD(D,(N-1)*15, #dd),
endd = DATEADD(D, N*15-1, #dd)
FROM tally
Adapt it to the rules defining how start date must be calculated for the user (and probably chanel).
#Sturgus what if I want to define it in the code? Any other
alternatives besides defining it in the table? How to write a query
that can be run every month to generate the monthly invoice. –
saturday 15 mins ago
Well, one way or another, you will have to save each customer's billing start date (minimally). If you want to do this entirely in SQL without 'editing the database', something like the following should work. The drawback to this approach is that you would need to manually edit the "INSERT INTO" statement every month to suit your needs. If you were allowed to edit the already existing customers table or create a new one, then it would reduce this manual effort.
DECLARE #CustomerBillingPeriodsTVP AS Table(
custID int UNIQUE,
BillingCycleID int,
BillingStartDate Date,
BillingEndDate Date
);
INSERT INTO #CustomerBillingPeriodsTVP (custID, BillingCycleID, BillingStartDate, BillingEndDate) VALUES
(1, 1, '2016-01-03', '2016-01-18'), (2, 1, '2016-01-18', '2016-02-03'), (3, 1, '2016-01-15', '2016-01-30'), (6, 1, '2016-01-14', '2016-01-29');
SELECT A.custid, A.custname, B.BillingCycleID AS [month], COUNT(DISTINCT A.channelid) AS [count]
FROM dbo.tblCustomerChannelViews AS A INNER JOIN #CustomerBillingPeriodsTVP AS B ON A.custid = B.CustID
GROUP BY A.custid, A.custname, B.BillingCycleID;
GO
Where are you getting your customers' billing start dates as it is?
I'm not sure how this solution will scale - but with some good index candidates and decent data housekeeping, it'll work..
You're going to need some extra info for starters, and to normalize your data. You will need to know the first charging period start date for each customer. So store that in a customer table.
Here are the tables I used:
create table #channelViews
(
custId int, channelId int, viewDate datetime
)
create table #channel
(
channelId int, channelName varchar(max)
)
create table #customer
(
custId int, custname varchar(max), chargingStartDate datetime
)
I'll populate some data. I won't get the same results as your sample output, because I don't have the appropriate start dates for each customer. Customer 2 will be OK though.
insert into #channel (channelId, channelName)
select 1, 'ABSS'
union select 2, 'STHHG'
union select 4, 'XGGTS'
union select 3, 'SSJ'
union select 7, 'UUJKS'
union select 8, 'AKKDC'
union select 9, 'GGSK'
insert into #customer (custId, custname, chargingStartDate)
select 1, 'A', '4 Jan 2016'
union select 2, 'B', '19 Jan 2016'
union select 3, 'C', '5 Jan 2016'
union select 6, 'D', '5 Jan 2016'
insert into #channelViews (custId, channelId, viewDate)
select 1,1,'2016-01-09'
union select 2,2,'2016-01-19'
union select 3,4,'2016-01-09'
union select 6,4,'2016-01-09'
union select 2,2,'2016-01-26'
union select 2,2,'2016-01-28'
union select 1,3,'2016-01-28'
union select 1,1,'2016-01-28'
union select 2,2,'2016-02-02'
union select 2,7,'2016-02-10'
union select 2,8,'2016-02-10'
union select 2,9,'2016-02-10'
union select 2,9,'2016-02-11'
union select 2,7,'2016-02-27'
And here is the somewhat unweildy query, in a single statement.
The two underlying sub-queries are actually the same data, so there may be more appropriate / efficient ways to generate these.
We need to exclude from billing any channel charged in the same charging period C for the previous Month. This is the essence of the join. I used a right-join so that I could exclude all such matches from the results (using old.custId is null).
select c.custId, c.[custname], [month], count(*) [count] from
(
select new.custId, new.channelId, new.month, new.chargingPeriod
from
(
select distinct cv.custId, cv.channelId, month(viewdate) [month], (convert(int, cv.viewDate) - convert(int, c.chargingStartDate))/15 chargingPeriod
from #channelViews cv join #customer c on cv.custId = c.custId
) old
right join
(
select distinct cv.custId, cv.channelId, month(viewdate) [month], (convert(int, cv.viewDate) - convert(int, c.chargingStartDate))/15 chargingPeriod
from #channelViews cv join #customer c on cv.custId = c.custId
) new
on old.custId = new.custId
and old.channelId = new.channelId
and old.month = new.Month -1
and old.chargingPeriod = new.chargingPeriod
where old.custId is null
group by new.custId, new.month, new.chargingPeriod, new.channelId
) filteredResults
join #customer c on c.custId = filteredResults.custId
group by c.custId, [month], c.custname
order by c.custId, [month], c.custname
And finally my results:
custId custname month count
1 A 1 3
2 B 1 1
2 B 2 4
3 C 1 1
6 D 1 1
This query does the same thing:
select c.custId, c.custname, [month], count(*) from
(
select cv.custId, min(month(viewdate)) [month], cv.channelId
from #channelViews cv join #customer c on cv.custId = c.custId
group by cv.custId, cv.channelId, (convert(int, cv.viewDate) - convert(int, c.chargingStartDate))/15
) x
join #customer c
on c.custId = x.custId
group by c.custId, c.custname, x.[month]
order by custId, [month]
Please help me to generate the following query. Say I have customer table and order table.
Customer Table
CustID CustName
1 AA
2 BB
3 CC
4 DD
Order Table
OrderID OrderDate CustID
100 01-JAN-2000 1
101 05-FEB-2000 1
102 10-MAR-2000 1
103 01-NOV-2000 2
104 05-APR-2001 2
105 07-MAR-2002 2
106 01-JUL-2003 1
107 01-SEP-2004 4
108 01-APR-2005 4
109 01-MAY-2006 3
110 05-MAY-2007 1
111 07-JUN-2007 1
112 06-JUL-2007 1
I want to find out the customers who have made orders on three successive months. (Query using SQL server 2005 and 2008 is allowed).
The desired output is:
CustName Year OrderDate
AA 2000 01-JAN-2000
AA 2000 05-FEB-2000
AA 2000 10-MAR-2000
AA 2007 05-MAY-2007
AA 2007 07-JUN-2007
AA 2007 06-JUL-2007
Edit: Got rid or the MAX() OVER (PARTITION BY ...) as that seemed to kill performance.
;WITH cte AS (
SELECT CustID ,
OrderDate,
DATEPART(YEAR, OrderDate)*12 + DATEPART(MONTH, OrderDate) AS YM
FROM Orders
),
cte1 AS (
SELECT CustID ,
OrderDate,
YM,
YM - DENSE_RANK() OVER (PARTITION BY CustID ORDER BY YM) AS G
FROM cte
),
cte2 As
(
SELECT CustID ,
MIN(OrderDate) AS Mn,
MAX(OrderDate) AS Mx
FROM cte1
GROUP BY CustID, G
HAVING MAX(YM)-MIN(YM) >=2
)
SELECT c.CustName, o.OrderDate, YEAR(o.OrderDate) AS YEAR
FROM Customers AS c INNER JOIN
Orders AS o ON c.CustID = o.CustID
INNER JOIN cte2 c2 ON c2.CustID = o.CustID and o.OrderDate between Mn and Mx
order by c.CustName, o.OrderDate
Here is my version. I really was presenting this as a mere curiosity, to show another way of thinking about the problem. It turned out to be more useful than that because it performed better than even Martin Smith's cool "grouped islands" solution. Though, once he got rid of some overly expensive aggregate windowing functions and did real aggregates instead, his query started kicking butt.
Solution 1: Runs of 3 months or more, done by checking 1 month ahead and behind and using a semi-join against that.
WITH Months AS (
SELECT DISTINCT
O.CustID,
Grp = DateDiff(Month, '20000101', O.OrderDate)
FROM
CustOrder O
), Anchors AS (
SELECT
M.CustID,
Ind = M.Grp + X.Offset
FROM
Months M
CROSS JOIN (
SELECT -1 UNION ALL SELECT 0 UNION ALL SELECT 1
) X (Offset)
GROUP BY
M.CustID,
M.Grp + X.Offset
HAVING
Count(*) = 3
)
SELECT
C.CustName,
[Year] = Year(OrderDate),
O.OrderDate
FROM
Cust C
INNER JOIN CustOrder O ON C.CustID = O.CustID
WHERE
EXISTS (
SELECT 1
FROM
Anchors A
WHERE
O.CustID = A.CustID
AND O.OrderDate >= DateAdd(Month, A.Ind, '19991201')
AND O.OrderDate < DateAdd(Month, A.Ind, '20000301')
)
ORDER BY
C.CustName,
OrderDate;
Solution 2: Exact 3-month patterns. If it is a 4-month or greater run, the values are excluded. This is done by checking 2 months ahead and two months behind (essentially looking for the pattern N, Y, Y, Y, N).
WITH Months AS (
SELECT DISTINCT
O.CustID,
Grp = DateDiff(Month, '20000101', O.OrderDate)
FROM
CustOrder O
), Anchors AS (
SELECT
M.CustID,
Ind = M.Grp + X.Offset
FROM
Months M
CROSS JOIN (
SELECT -2 UNION ALL SELECT -1 UNION ALL SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2
) X (Offset)
GROUP BY
M.CustID,
M.Grp + X.Offset
HAVING
Count(*) = 3
AND Min(X.Offset) = -1
AND Max(X.Offset) = 1
)
SELECT
C.CustName,
[Year] = Year(OrderDate),
O.OrderDate
FROM
Cust C
INNER JOIN CustOrder O ON C.CustID = O.CustID
INNER JOIN Anchors A
ON O.CustID = A.CustID
AND O.OrderDate >= DateAdd(Month, A.Ind, '19991201')
AND O.OrderDate < DateAdd(Month, A.Ind, '20000301')
ORDER BY
C.CustName,
OrderDate;
Here's my table-loading script if anyone else wants to play:
IF Object_ID('CustOrder', 'U') IS NOT NULL DROP TABLE CustOrder
IF Object_ID('Cust', 'U') IS NOT NULL DROP TABLE Cust
GO
SET NOCOUNT ON
CREATE TABLE Cust (
CustID int identity(1,1) NOT NULL PRIMARY KEY CLUSTERED,
CustName varchar(100) UNIQUE
)
CREATE TABLE CustOrder (
OrderID int identity(100, 1) NOT NULL PRIMARY KEY CLUSTERED,
CustID int NOT NULL FOREIGN KEY REFERENCES Cust (CustID),
OrderDate smalldatetime NOT NULL
)
DECLARE #i int
SET #i = 1000
WHILE #i > 0 BEGIN
WITH N AS (
SELECT
Nm =
Char(Abs(Checksum(NewID())) % 26 + 65)
+ Char(Abs(Checksum(NewID())) % 26 + 97)
+ Char(Abs(Checksum(NewID())) % 26 + 97)
+ Char(Abs(Checksum(NewID())) % 26 + 97)
+ Char(Abs(Checksum(NewID())) % 26 + 97)
+ Char(Abs(Checksum(NewID())) % 26 + 97)
)
INSERT Cust
SELECT N.Nm
FROM N
WHERE NOT EXISTS (
SELECT 1
FROM Cust C
WHERE
N.Nm = C.CustName
)
SET #i = #i - ##RowCount
END
WHILE #i < 50000 BEGIN
INSERT CustOrder
SELECT TOP (50000 - #i)
Abs(Checksum(NewID())) % 1000 + 1,
DateAdd(Day, Abs(Checksum(NewID())) % 10000, '19900101')
FROM master.dbo.spt_values
SET #i = #i + ##RowCount
END
Performance
Here are some performance testing results for the 3-month-or-more queries:
Query CPU Reads Duration
Martin 1 2297 299412 2348
Martin 2 625 285 809
Denis 3641 401 3855
Erik 1855 94727 2077
This is only one run of each, but the numbers are fairly representative. It turns out that your query wasn't so badly-performing, Denis, after all. Martin's query beats the others hands down, but at first was using some overly-expensive windowing functions strategies that he fixed.
Of course, as I noted, Denis's query isn't pulling the right rows when a customer has two orders on the same day, so his query is out of contention unless he fixed is.
Also, different indexes could possibly shake things up. I don't know.
Here you go:
select distinct
CustName
,year(OrderDate) [Year]
,OrderDate
from
(
select
o2.OrderDate [prev]
,o1.OrderDate [curr]
,o3.OrderDate [next]
,c.CustName
from [order] o1
join [order] o2 on o1.CustId = o2.CustId and datediff(mm, o2.OrderDate, o1.OrderDate) = 1
join [order] o3 on o1.CustId = o3.CustId and o2.OrderId <> o3.OrderId and datediff(mm, o3.OrderDate, o1.OrderDate) = -1
join Customer c on c.CustId = o1.CustId
) t
unpivot
(
OrderDate for [DateName] in ([prev], [curr], [next])
)
unpvt
order by CustName, OrderDate
Here is my take.
select 100 as OrderID,convert(datetime,'01-JAN-2000') OrderDate, 1 as CustID into #tmp union
select 101,convert(datetime,'05-FEB-2000'), 1 union
select 102,convert(datetime,'10-MAR-2000'), 1 union
select 103,convert(datetime,'01-NOV-2000'), 2 union
select 104,convert(datetime,'05-APR-2001'), 2 union
select 105,convert(datetime,'07-MAR-2002'), 2 union
select 106,convert(datetime,'01-JUL-2003'), 1 union
select 107,convert(datetime,'01-SEP-2004'), 4 union
select 108,convert(datetime,'01-APR-2005'), 4 union
select 109,convert(datetime,'01-MAY-2006'), 3 union
select 110,convert(datetime,'05-MAY-2007'), 1 union
select 111,convert(datetime,'07-JUN-2007'), 1 union
select 112,convert(datetime,'06-JUL-2007'), 1
;with cte as
(
select
*
,convert(int,convert(char(6),orderdate,112)) - dense_rank() over(partition by custid order by orderdate) as g
from #tmp
),
cte2 as
(
select
CustID
,g
from cte a
group by CustID, g
having count(g)>=3
)
select
a.CustID
,Yr=Year(OrderDate)
,OrderDate
from cte2 a join cte b
on a.CustID=b.CustID and a.g=b.g
Hopefully I can explain this correctly. I have a table of line orders (each line order consists of quantity of item and the price, there are other fields but I left those out.)
table 'orderitems':
orderid | quantity | price
1 | 1 | 1.5000
1 | 2 | 3.22
2 | 1 | 9.99
3 | 4 | 0.44
3 | 2 | 15.99
So to get order total I would run
SELECT SUM(Quantity * price) AS total
FROM OrderItems
GROUP BY OrderID
However, I would like to get a count of all total orders under $1 (just provide a count).
My end result I would like would be able to define ranges:
under $1, $1 - $3, 3-5, 5-10, 10-15, 15.. etc;
and my data to look like so (hopefully):
tunder1 | t1to3 | t3to5 | t5to10 | etc
10 | 500 | 123 | 5633 |
So that I can present a piechart breakdown of customer orders on our eCommerce site.
Now I can run individual SQL queries to get this, but I would like to know what the most efficient 'single sql query' would be. I am using MS SQL Server.
Currently I can run a single query like so to get under $1 total:
SELECT COUNT(total) AS tunder1
FROM (SELECT SUM(Quantity * price) AS total
FROM OrderItems
GROUP BY OrderID) AS a
WHERE (total < 1)
How can I optimize this? Thanks in advance!
select
count(case when total < 1 then 1 end) tunder1,
count(case when total >= 1 and total < 3 then 1 end) t1to3,
count(case when total >= 3 and total < 5 then 1 end) t3to5,
...
from
(
select sum(quantity * price) as total
from orderitems group by orderid
);
you need to use HAVING for filtering grouped values.
try this:
DECLARE #YourTable table (OrderID int, Quantity int, Price decimal)
INSERT INTO #YourTable VALUES (1,1,1.5000)
INSERT INTO #YourTable VALUES (1,2,3.22)
INSERT INTO #YourTable VALUES (2,1,9.99)
INSERT INTO #YourTable VALUES (3,4,0.44)
INSERT INTO #YourTable VALUES (3,2,15.99)
SELECT
SUM(CASE WHEN TotalCost<1 THEN 1 ELSE 0 END) AS tunder1
,SUM(CASE WHEN TotalCost>=1 AND TotalCost<3 THEN 1 ELSE 0 END) AS t1to3
,SUM(CASE WHEN TotalCost>=3 AND TotalCost<5 THEN 1 ELSE 0 END) AS t3to5
,SUM(CASE WHEN TotalCost>=5 THEN 1 ELSE 0 END) AS t5andup
FROM (SELECT
SUM(quantity * price) AS TotalCost
FROM #YourTable
GROUP BY OrderID
) dt
OUTPUT:
tunder1 t1to3 t3to5 t5andup
----------- ----------- ----------- -----------
0 0 0 3
(1 row(s) affected)
WITH orders (orderid, quantity, price) AS
(
SELECT 1, 1, 1.5
UNION ALL
SELECT 1, 2, 3.22
UNION ALL
SELECT 2, 1, 9.99
UNION ALL
SELECT 3, 4, 0.44
UNION ALL
SELECT 4, 2, 15.99
),
ranges (bound) AS
(
SELECT 1
UNION ALL
SELECT 3
UNION ALL
SELECT 5
UNION ALL
SELECT 10
UNION ALL
SELECT 15
),
rr AS
(
SELECT bound, ROW_NUMBER() OVER (ORDER BY bound) AS rn
FROM ranges
),
r AS
(
SELECT COALESCE(rf.rn, 0) AS rn, COALESCE(rf.bound, 0) AS f,
rt.bound AS t
FROM rr rf
FULL JOIN
rr rt
ON rt.rn = rf.rn + 1
)
SELECT rn, f, t, COUNT(*) AS cnt
FROM r
JOIN (
SELECT SUM(quantity * price) AS total
FROM orders
GROUP BY
orderid
) o
ON total >= f
AND total < COALESCE(t, 10000000)
GROUP BY
rn, t, f
Output:
rn f t cnt
1 1 3 1
3 5 10 2
5 15 NULL 1
, that is 1 order from $1 to $3, 2 orders from $5 to $10, 1 order more than $15.