SQL Query help needed - Multiple rows in 1st table should match to multiple table in 2nd table - sql

Problem Illustration
I am trying to find that magical query to generate summary information. I have mapped my problem into fictitious illustration. I have 'WaterLeakage%' table which records leakage occurred in hotel rooms over several year.
I have another table which records WaterConsumption in liters for each table.
Now i have to find actual water leakage in liters for given room number over given date range.
Basically i have to group several rows in 'WaterLeakage%' table to several rows in 'WaterConsumption' table. I am trying to figure out magical efficient query to find this. Unable to find it, please help.

DECLARE #START_DATE_PARAM DATE = '01/10/2017';
DECLARE #END_DATE_PARAM DATE = '01/31/2017';
DECLARE #ROOM_NUMBER INT = 101;
IF (EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '#WATER_CONSUMPTION'))
DROP TABLE #WATER_CONSUMPTION;
IF (EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '#WATER_LEAKAGE_PER'))
DROP TABLE #WATER_LEAKAGE_PER;
--Table for daily daily water consumption per room
CREATE TABLE #WATER_CONSUMPTION(
ROOM_NUMBER INT,
UDAY DATE,
WATER_CONSUMPTION_LITER INT
)
--Table for water leakage percent per room for date range
CREATE TABLE #WATER_LEAKAGE_PER
(
ROOM_NUMBER INT,
START_DATE DATE,
END_DATE DATE,
WATER_LEAKAGE_PERCENT INT
)
-- Raw Data
INSERT INTO #WATER_LEAKAGE_PER(ROOM_NUMBER,START_DATE,END_DATE,WATER_LEAKAGE_PERCENT)
VALUES(101,'2017/01/01','2017/01/02',5),
(102,'2017/01/01','2017/01/05',10),
(101,'2017/01/04','2017/02/06',10);
-- Raw Data
INSERT INTO #WATER_CONSUMPTION
VALUES(101,'2017/01/01',100),
(101,'2017/01/02',100),
(101,'2017/01/03',100),
(101,'2017/01/04',100),
(101,'2017/01/05',100),
(101,'2017/01/06',100),
(102,'2017/01/01',100),
(102,'2017/01/02',100),
(102,'2017/01/03',100),
(102,'2017/01/04',100),
(102,'2017/01/05',100);
DECLARE #TotalLeak REAL = 0;
SELECT * FROM #WATER_CONSUMPTION;
SELECT * FROM #WATER_LEAKAGE_PER;
SELECT * FROM #WATER_CONSUMPTION T1 JOIN (SELECT * FROM #WATER_LEAKAGE_PER WHERE ROOM_NUMBER=#ROOM_NUMBER) T2
ON (T1.ROOM_NUMBER=T2.ROOM_NUMBER AND T1.UDAY >= T2.START_DATE AND T1.UDAY <= T2.END_DATE);
DROP TABLE #WATER_CONSUMPTION;
DROP TABLE #WATER_LEAKAGE_PER;
I am very close to solution now. Basically i changed my thinking. I will join reverse now.

BEGIN
--Input Parameters for calculating water wastage between date range
DECLARE #START_DATE_PARAM DATE = '01/10/2017';
DECLARE #END_DATE_PARAM DATE = '01/31/2017';
--Table for daily daily water consumption per room
CREATE TABLE #WATER_CONSUMPTION(
ROOM_NUMBER INT,
UDAY DATE,
WATER_CONSUMPTION_LITER INT
)
--Table for water leakage percent per room for date range
CREATE TABLE #WATER_LEAKAGE_PER
(
ROOM_NUMBER INT,
START_DATE DATE,
END_DATE DATE,
WATER_LEAKAGE_PERCENT INT,
LEAKAGE_PER_DAY_IN_LITER INT
)
-- Leakage in liter per room for each day, This will have multiple entries for room and date if room number and date is available in multiple date ranges, ex. in #WATER_CONSUMPTION table for room number 101 we have multiple entries with overlapping dates
CREATE TABLE #DAY_WISE_LEAKAGE
(
ROOM_NUMBER INT,
LDATE DATE,
LEAKAGE_IN_LITER INT
)
-- Raw Data
INSERT INTO #WATER_LEAKAGE_PER(ROOM_NUMBER,START_DATE,END_DATE,WATER_LEAKAGE_PERCENT)
VALUES(101,'2017/01/15','2017/01/18',30),
(102,'2017/01/15','2017/01/18',10),
(101,'2017/01/15','2017/02/13',5);
-- Raw Data
INSERT INTO #WATER_CONSUMPTION
VALUES(101,'01/01/2017',1001),
(101,'01/02/2017',1001),
(101,'01/03/2017',1001),
(101,'01/04/2017',1001),
(101,'01/05/2017',1001),
(101,'01/06/2017',1001),
(101,'01/07/2017',1001),
(101,'01/08/2017',1001),
(101,'01/09/2017',1001),
(101,'01/10/2017',1001),
(101,'01/11/2017',1001),
(101,'01/12/2017',1001),
(101,'01/13/2017',1001),
(101,'01/14/2017',1001),
(101,'01/15/2017',1001),
(101,'01/16/2017',1001),
(101,'01/17/2017',1001),
(101,'01/18/2017',1001),
(101,'01/19/2017',1001),
(101,'01/20/2017',1001),
(101,'01/21/2017',1001),
(101,'01/22/2017',1001),
(101,'01/23/2017',1001),
(101,'01/24/2017',1001),
(101,'01/25/2017',1001),
(101,'01/26/2017',1001),
(101,'01/27/2017',1001),
(101,'01/28/2017',1001),
(101,'01/29/2017',1001),
(101,'01/30/2017',1001),
(101,'01/31/2017',1001);
DECLARE #ROOM_NUMBER INT
DECLARE #START_DATE DATE
DECLARE #END_DATE DATE
DECLARE #WATER_LEAKAGE_PERCENT INT
-- cursor for calculating water wastage pre date range per day available in #WATER_LEAKAGE_PER table
DECLARE WATER_LEAKAGE_PER_CURSOR CURSOR FOR
SELECT ROOM_NUMBER,START_DATE,END_DATE,WATER_LEAKAGE_PERCENT FROM #WATER_LEAKAGE_PER
OPEN WATER_LEAKAGE_PER_CURSOR
FETCH NEXT FROM WATER_LEAKAGE_PER_CURSOR
INTO #ROOM_NUMBER, #START_DATE ,#END_DATE, #WATER_LEAKAGE_PERCENT
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #TOTAL_WATER_USED_FOR_DATE_RANGE INT=0;
DECLARE #NUMBER_OF_DAYS INT=0;
DECLARE #LEAKAGE_PER_DAY_IN_LITER INT=0;
-- Total Liters of water used for 1 date range
SELECT #TOTAL_WATER_USED_FOR_DATE_RANGE =SUM(WATER_CONSUMPTION_LITER),#NUMBER_OF_DAYS=COUNT(1) FROM #WATER_CONSUMPTION WHERE ROOM_NUMBER=#ROOM_NUMBER AND UDAY BETWEEN #START_DATE AND #END_DATE;
-- Liters of water leakage per day for selevted date range in cursor
SELECT #LEAKAGE_PER_DAY_IN_LITER=((#TOTAL_WATER_USED_FOR_DATE_RANGE*#WATER_LEAKAGE_PERCENT)/100)/#NUMBER_OF_DAYS;
UPDATE #WATER_LEAKAGE_PER SET LEAKAGE_PER_DAY_IN_LITER = #LEAKAGE_PER_DAY_IN_LITER WHERE ROOM_NUMBER=#ROOM_NUMBER AND START_DATE = #START_DATE AND END_DATE=#END_DATE AND WATER_LEAKAGE_PERCENT=#WATER_LEAKAGE_PERCENT;
-- generate dates and water leakage, this will be used for actual calculation of water leakage in date range.
;WITH n AS
(
SELECT TOP (DATEDIFF(DAY, #START_DATE, #END_DATE) + 1)
n = ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_objects
)
INSERT INTO #DAY_WISE_LEAKAGE SELECT #ROOM_NUMBER, DATEADD(DAY, n-1, #START_DATE),#LEAKAGE_PER_DAY_IN_LITER
FROM n;
FETCH NEXT FROM WATER_LEAKAGE_PER_CURSOR
INTO #ROOM_NUMBER, #START_DATE ,#END_DATE, #WATER_LEAKAGE_PERCENT
END
CLOSE WATER_LEAKAGE_PER_CURSOR;
DEALLOCATE WATER_LEAKAGE_PER_CURSOR;
-- Average of Liters of water leakage per Room number.
SELECT ROOM_NUMBER,SUM(LEAKAGE_IN_LITER) FROM #DAY_WISE_LEAKAGE WHERE LDATE BETWEEN #START_DATE_PARAM AND #END_DATE_PARAM GROUP BY ROOM_NUMBER;
DROP TABLE #WATER_CONSUMPTION;
DROP TABLE #WATER_LEAKAGE_PER;
DROP TABLE #DAY_WISE_LEAKAGE
END

Related

Running total by date/ID based on latest change to value SQL

I have a unique case where I want to calculate the running total of quantities day over day. I have been searching a lot but couldn't find the right answer. Code-wise, there is nothing much I can share as it refers to a lot of sensitive data
Below is the table of dummy data:
As you can see, there are multiple duplicate IDs by date. I want to be able to calculate the running total of a date as follows:
For 2022/03/24, the running total would be 9+33 = 42, on 2022/03/26 the running total should be 9+31 = 40. Essentially, the running total for any given day should pick the last value by ID if it changed or the value that exists. In this case on 2022/03/26 for that date, for ID 2072, we pick 31 and not 33 because that's the latest value available.
Expected Output:
There maybe be many days spanning across and the running total needs to be day over day.
Possible related question: SQL Server running total based on change of state of a column
PS: For context, ID is just a unique identifier for an inventory of items. Each item's quantity changes day by day. In this example, ID 1's inventoyr last changed on 2022/03/24 where as ID 2072's changed multiple times. Running total for 2022/03/24 would be quantities of inventory items on that day. On 26th there are no changes for ID 1 but ID 2072 changed, the inventory pool should reflect the total as current inventory size of ID 2072+ current size of ID 1. On 26th, again ID 1 did not have any change, but ID 2072 changed. Therefore inventory size = current size of ID 2072 + current size of ID 1, in this case, 40. Essentially, it is just a current size of inventory with day over day change.
Any help would be really appreciated! Thanks.
I added a few more rows just in case if this is what you really wanted.
I used T-SQL.
declare #orig table(
id int,
quantity int,
rundate date
)
insert into #orig
values (1,9,'20220324'),(2072,33,'20220324'),(2072,31,'20220326'),(2072,31,'20220327'),
(2,10,'20220301'),(2,20,'20220325'),(2,30,'20220327')
declare #dates table (
runningdate date
)
insert into #dates
select distinct rundate from #orig
order by rundate
declare #result table (
dates date,
running_quality int
)
DECLARE #mydate date
DECLARE #sum int
-- CURSOR definition
DECLARE my_cursor CURSOR FOR
SELECT * FROM #dates
OPEN my_cursor
-- Perform the first fetch
FETCH NEXT FROM my_cursor into #mydate
-- Check ##FETCH_STATUS to see if there are any more rows to fetch
WHILE ##FETCH_STATUS = 0
BEGIN
;with cte as (
select * from #orig
where rundate <= #mydate
), cte2 as (
select id, max(rundate) as maxrundate
from cte
group by id
), cte3 as (
select a.*
from cte as a join cte2 as b
on a.id = b.id and a.rundate = b.maxrundate
)
select #sum = sum(quantity)
from cte3
insert into #result
select #mydate, #sum
-- This is executed as long as the previous fetch succeeds
FETCH NEXT FROM my_cursor into #mydate
END -- cursor
CLOSE my_cursor
DEALLOCATE my_cursor
select * from #result
Result:
dates running_quality
2022-03-01 10
2022-03-24 52
2022-03-25 62
2022-03-26 60
2022-03-27 70

Sum of two table values per time and find minimum of sum result in SQL-Server

I have two database.
in database one I have a table (507.000 record for one day data)
-- insert data from database_1
DROP TABLE IF EXISTS #AccountBalance;
CREATE TABLE #AccountBalance
( AccountNumber VARCHAR(20),AccountBalance MONEY,TranTime DATETIME);
Sample data is AccountBalance:
BankAccountNumber AccountBalance transactiontime
01003930510 42006.00 2021-03-20
45033323462 4682.00 2021-03-20
23035469562 3388.00 2021-03-20
23005168662 617.00 2021-03-20
01004829050 44640.00 2021-03-20
Sample data for TransactionCards
BankAccountNumber Balance TransactionTime
45033323462 245428.00 2021-03-21 00:06:47.000
23038201062 140983.00 2021-03-21 00:06:49.000
45019249962 60416.00 2021-03-21 00:07:46.000
45004876662 588154.00 2021-03-21 00:10:46.000
45004876662 627867.00 2021-03-22 00:17:44.000
in database two I have a table with 18 millions records
Aim : find one value and call that Minimum Value Of Sum(balance) per record
I trreid:
0. Insert data in one database with temporary table and linked-Server.
Write cursor like below (fetch BankAccountNumber from CardTransaction and if not exists same BankAccountNumber Insert into #Account balance ,If exists update balance )
calculate sum of all SUM(AccountBalance) per fetch(record or per time) and insert result into #Result (business is clear in cursor)
Select Min(AccountBalance ) From #Result
-- Create tables for calculate
DROP TABLE IF EXISTS #AccountBalance;
CREATE TABLE #AccountBalance
( BankAccountNumber VARCHAR(20),AccountBalance MONEY,TranTime DATETIME); -- I inserted 507.000 row record data in this table
DROP TABLE IF EXISTS #Result
CREATE TABLE #Result (SumOfBalance MONEY, BankAccountNumber VARCHAR(20), TranTime DATETIME)
-- variable for cursor procces
DECLARE #BankAccountNumber VARCHAR(20);
DECLARE #TransactionBalance MONEY;
DECLARE #TranTime DATETIME;
DECLARE #OldBankAccountNumber VARCHAR(20);
DECLARE #OldAccountBalance MONEY;
DECLARE #OldTranTime DATETIME = '2021-03-20';
-- start cursor
DECLARE CR CURSOR FOR
SELECT rt.BankAccountNumber,rt.Balance,rt.TransactionTime
FROM RawData.dbo.CardTransaction rt;
PRINT '-------Sum of all AccountBalance Report per time------';
OPEN CR;
FETCH NEXT FROM CR
INTO #BankAccountNumber,
#TransactionBalance,
#TranTime;
-- insert sum of account balanace into result table
INSERT INTO #Result (SumOfBalance,BankAccountNumber,TranTime)
SELECT SUM(AccountBalance),#BankAccountNumber,#TranTime FROM #AccountBalance
WHILE ##FETCH_STATUS = 0 AND dbo.DoContinue() = 1
BEGIN
SELECT BankAccountNumber = #OldBankAccountNumber , AccountBalance = #OldAccountBalance FROM dbo.AccountBalance WHERE BankAccountNumber = #BankAccountNumber
IF #OldBankAccountNumber=#BankAccountNumber -- if exists record in account balance
BEGIN
-- update account balance with new balance
UPDATE #AccountBalance
SET AccountBalance = #TransactionBalance
WHERE BankAccountNumber = #BankAccountNumber
-- insert new sum of account balanace into result table
INSERT INTO #Result (SumOfBalance,BankAccountNumber,TranTime)
SELECT SUM(AccountBalance),#BankAccountNumber,#TranTime FROM #AccountBalance
END;
ELSE
BEGIN
--
INSERT INTO #AccountBalance (BankAccountNumber,AccountBalance,TranTime)
VALUES (#BankAccountNumber, #TransactionBalance, #TranTime);
-- insert new sum of account balanace into result table
INSERT INTO #Result (SumOfBalance,BankAccountNumber,TranTime)
SELECT SUM(AccountBalance),#BankAccountNumber,#TranTime FROM #AccountBalance
END;
PRINT #BankAccountNumber
FETCH NEXT FROM CR
INTO #BankAccountNumber,#TransactionBalance,#TranTime;
END;
CLOSE CR;
DEALLOCATE CR;
Problem : Very slowly work and I can't wait one day for run cursor . and I cant see result, but I guess not reliable value(I checked 2000 record)
What I need : I need fast and reliable solution
Expected table like below :
SumOfAccountBalance transactiontime
98,721 2021-03-21 10:01:00
339,464 2021-04-22 01:01:00
480,447 2021-04-23 01:01:00
540,863 2021-04-23 02:01:00
1,129,017 2021-04-23 03:01:00
1,168,730 2021-04-23 15:01:00
Final Expected :
MinCriticalPointAccountBalance transactiontime
98,721 2021-03-21 10:01:00

How to add an additional column to the result set returned by a SP without modifying the SP?

I have a Stored Procedure (SP), named myStoredProcedure, returning me such output based on startDate and endDate user-defined parameters:
PrimaryName SecondaryName Volume
A B 20
C D 30
A D 50
...
So, Volume represents the sum of all the cases between the dates defined.
In another SP, named mySecondStoredProcedure, I am using the first SP to get the result there. However, my problem is that I need an additional attribute in my output, which is year, I want to see year based volumes. Therefore, the output I would like to see is something like that
assume startDate: 2014, endDate: 2015:
PrimaryName SecondaryName Volume Year
A B 12 2014
C D 14 2014
A D 20 2014
A B 8 2015
C D 16 2015
A D 30 2015
...
I am not allowed to modify myStoredProcedure. Therefore I build a while loop in the second SP to receive it. My code is like:
declare #temp_table table
(
PrimaryGroup varchar(10),
SecondaryGroup varchar(10),
Volume int
)
while #startDate < #endDate
begin
insert into #temp_table
exec myStoredProcedure #startDate #endDate
set #startDate = DATEADD(YEAR,1,#startDate)
end
select * from #temp_table
This is giving me the result without the year column. I need a year column like I showed in my example output above. I could not find a way to add it. There is no primary key in the result set returned by myStoredProcedure. Also, SQL Server 2008 does not let me add a year column in #temp_table, saying that fields are not matching. How can I add the year column properly? Any help would be appreciated!
EDIT: When I add year column in the definition of #temp_table, the error I receive: Column name or number of supplied values does not match table definition.
You're close with the syntax you currently have, you'll just need to add the year to the temp table and supply it after calling the stored procedure. In addition, you will also need to specify the columns being inserted (a practice well worth getting in the habit of) as your procedure doesn't return the same number of columns.
declare #temp_table table
(
PrimaryGroup varchar(10),
SecondaryGroup varchar(10),
Volume int,
Year int
)
while #startDate < #endDate
begin
insert into #temp_table (PrimaryGroup, SecondaryGroup, Volume)
exec myStoredProcedure #startDate #endDate
Update #temp_table
Set Year = #StartDate
Where Year Is Null
set #startDate = DATEADD(YEAR,1,#startDate)
end
select * from #temp_table
Add a Year column to your temp table, and apply the structured insert
declare #temp_table table
(
PrimaryGroup varchar(10),
SecondaryGroup varchar(10),
Volume int,
Year int
)
while #startDate < #endDate
begin
insert into #temp_table (PrimaryName,SecondaryName,Volume)
exec myStoredProcedure #startDate #endDate
Update #temp_table set Year = #startDate where Year is Null
set #startDate = DATEADD(YEAR,1,#startDate)
end
select * from #temp
Create a second table variable that will hold the result:
declare #result_table table
(
Year int,
PrimaryGroup varchar(10),
SecondaryGroup varchar(10),
Volume int
)
Then in the while loop after fetching the result into #temp_table:
insert into #result_table
select <year>, PrimaryGroup, SecondaryGroup, Volume from #temp_table;
truncate #temp_table;

How to make a cursor faster

I have wrote this cursor for commission report. What happens is commission comes in one table, the records are another table. I match two based on certain critera (there is not exact match available). The problem is there are duplicates where records exist. When I match commission with the records table, it can result picking up these duplicates. Thus the rep gets paid more. On the other hand, there are duplicates in commission table also but those are valid beause they simple mean an account got paid for 2 months.
I wrote this query but it takes 5+ minutes to run. I have 50,000 records in records table and 100,000 in commission table. Is there any way I an improve this cursor?
/* just preparation of cursor, this is not time consuming */
CREATE TABLE #result
(
repid INT,
AccountNo VARCHAR(100),
supplier VARCHAR(15),
CompanyName VARCHAR(200),
StartDate DATETIME,
EndDate DATETIME,
Product VARCHAR(25),
commodity VARCHAR(25),
ContractEnd DATETIME,
EstUsage INT,
EnrollStatus VARCHAR(10),
EnrollDate DATETIME,
ActualEndDate DATETIME,
MeterStart DATETIME,
MeterEnd DATETIME,
ActualUsage INT
)
DECLARE #AccountNo VARCHAR(100)
DECLARE #supplier VARCHAR(10)
DECLARE #commodity VARCHAR(15)
DECLARE #meterstart DATETIME
DECLARE #meterEnd DATETIME
DECLARE #volume FLOAT
DECLARE #RepID INT
DECLARE #Month INT
DECLARE #Year INT
SET #repID = 80
SET #Month = 1
SET #year = 2012
/* the actual cursor */
DECLARE commission_cursor CURSOR FOR
SELECT AccountNo,
supplier,
commodity,
meterStart,
MeterEnd,
Volume
FROM commission
WHERE Datepart(m, PaymentDate) = #Month
AND Datepart(YYYY, PaymentDate) = #Year
OPEN commission_cursor
FETCH next FROM commission_cursor INTO #AccountNo, #supplier, #commodity, #MeterStart, #MeterEnd, #Volume;
WHILE ##fetch_status = 0
BEGIN
IF EXISTS (SELECT id
FROM Records
WHERE AccountNo = #AccountNo
AND supplier = #supplier
AND Commodity = #commodity
AND RepID = #repID)
INSERT INTO #result
SELECT TOP 1 RepID,
AccountNo,
Supplier,
CompanyName,
[Supplier Start Date],
[Supplier End Date],
Product,
Commodity,
[customer end date],
[Expected Usage],
EnrollStatus,
ActualStartDate,
ActualEndDate,
#meterstart,
#MeterEnd,
#volume
FROM Records
WHERE AccountNo = #AccountNo
AND supplier = #supplier
AND Commodity = #commodity
AND RepID = #repID
AND #MeterStart >= Dateadd(dd, -7, ActualStartDate)
AND #meterEnd <= Isnull(Dateadd(dd, 30, ActualEndDate), '2015-12-31')
FETCH next FROM commission_cursor INTO #AccountNo, #supplier, #commodity, #MeterStart, #MeterEnd, #Volume;
END
SELECT *
FROM #result
/* clean up */
CLOSE commission_cursor
DEALLOCATE commission_cursor
DROP TABLE #result
I have read answer to How to make a T-SQL Cursor faster?, for that what I get is rewrite this query in table form. But I do have another query which uses join and is lightening fast. The problem is, it can not differentiate between the dups in my records table.
Is there anything I can do to make is faster. This is primary question. If not, do you have any alternative way to do it.
I specifically need help with
Will using Views or store procedure help
I there a way I can use cache in Cursor to make it faster
Any other option in syntax
The very first option is to set the least resource intensive options for your cursor:
declare commission_cursor cursor
local static read_only forward_only
for
Next is to investigate whether you need a cursor at all. In this case I think you can do the same with a single pass and no loops:
;WITH x AS
(
SELECT
rn = ROW_NUMBER() OVER (PARTITION BY r.AccountNo, r.Supplier, r.Commodity, r.RepID
ORDER BY r.ActualEndDate DESC),
r.RepID,
r.AccountNo,
r.Supplier,
r.CompanyName,
StartDate = r.[Supplier Start Date],
EndDate = r.[Supplier End Date],
r.Product,
r.Commodity,
ContractEnd = r.[customer end date],
EstUsage = r.[Expected Usage],
r.EnrollStatus,
EnrollDate = r.ActualStartDate,
r.ActualEndDate,
c.MeterStart,
c.MeterEnd,
ActualUsage = c.Volume
FROM dbo.commission AS c
INNER JOIN dbo.Records AS r
ON c.AccountNo = r.AccountNo
AND c.Supplier = r.Supplier
AND c.Commodity = r.Commodity
AND c.RepID = r.RepID
WHERE
c.PaymentDate >= DATEADD(MONTH, #Month-1, CONVERT(CHAR(4), #Year) + '0101')
AND c.PaymentDate < DATEADD(MONTH, 1, CONVERT(CHAR(4), #Year) + '0101')
AND r.RepID = #RepID
)
SELECT RepID, AccountNo, Supplier, CompanyName, StartDate, EndDate,
Product, Commodity, ContractEnd, EstUsage, EnrollStatus, EnrollDate,
ActualEndDate, MeterStart, MeterEnd, ActualUsage
FROM x
WHERE rn = 1 --ORDER BY something;
If this is still slow, then the cursor probably wasn't the problem - the next step will be investigating what indexes might be implemented to make this query more efficient.
Temp tables are your friend
The way I solved my problem, merging data from two tables, removed duplicates in complex fashion and everything extremely fast was to use temporary table. This is what I did
Create a #temp table, fetch the merged data from both the tables. Make sure you include ID fields in both tables even if you do not required it. This will help remove duplicates.
Now you can do all sort of calculation on this table. Remove duplicates from table B, just remove duplicate table B IDs. Remove duplicates from table A, just remove duplicate table A Ids. There is more complexity to the problem but at least this is probably the best way to solve your problem and make it considerably faster if cursors are too expensive and takes considerable time to calculate. In my case it was taking +5 min. The #temp table query about about 5 sec, which had a lot more calculations in it.
While applying Aaron solution, the cursor did not get any faster. The second query was faster but it did not give me the correct answer, so finally I used temp tables. This is my own answer.

selecting max date in range, excluding multiple other date ranges

my first time posting.
I have a tricky task of finding the latest date within a range, but excluding multiple other date ranges. I have code that does work, but it seems awfully taxing.
I am selecting the MAX(Date) within a range. However, I have a table, bfShow, where each show has its own date-range (stored as DateStart and DateEnd). So I need the MAX(Date) within the range which does NOT have a show on that date (there may be 0 to 99 shows overlapping my date-range).
Note: I have dbo.fnSeqDates which works great (found via Google) and returns all dates within a range - makes for very fast filling in 6/1/12, 6/2/12, 6/3/12...6/30/12, etc.
What I'm doing (below) is creating a table with all the dates (within range) in it, then find all the Shows within that range (#ShowIDs) and iterate through those shows, one at a time, deleting all those dates (from #DateRange). Ultimately, #DateRange is left with only "empty" dates. Thus, the MAX(Date) remaining in #DateRange is my last date in the month without a show.
Again, my code below does work, but there's got to be a better way. Thoughts?
Thank you,
Todd
CREATE procedure spLastEmptyDate
#DateStart date
, #DateEnd date
as
begin
-- VARS...
declare #ShowID int
declare #EmptyDate date
-- TEMP TABLE...
create table #DateRange(dDate date)
create table #ShowIDs(ShowID int)
-- LOAD ALL DATES IN RANGE (THIS MONTH-ISH)...
insert into #DateRange(dDate)
select SeqDate
from dbo.fnSeqDates(#DateStart, #DateEnd)
-- LOAD ALL SHOW IDs IN RANGE (THIS MONTH-IS)...
insert into #ShowIDs(ShowID)
select s.ShowID
from bfShow s
where s.DateStart = #DateStart
-- PRIME SHOW ID...
set #ShowID = 0
select #ShowID = min(ShowID)
from #ShowIDs
-- RUN THRU ALL, REMOVING DATES AS WE GO...
while (#ShowID > 0)
begin
-- REMOVE FROM TEMP...
delete DR
from #DateRange DR
, bfShow s
where DR.dDate between s.DateStart and s.DateEnd
and s.ShowID = #ShowID
-- DROP THAT ONE FROM TEMP...
delete from #ShowIDs
where ShowID = #ShowID
-- GET NEXT ID...
set #ShowID = 0
select #ShowID = min(ShowID)
from #ShowIDs
end
-- GET LAST EMPTY SPOT...
select #EmptyDate = max(dDate)
from #DateRange
-- CLEAN UP...
drop table #DateRange
drop table #ShowIDs
-- RETURN DATA...
select #EmptyDate as LastEmptyDateInRange
end
Let us know what version of SQL Server you're on because that will help determine your options, but you should be able to use the BETWEEN operator in a JOIN between the fnSeqDates function (it's a table-valued function, so you can join to it directly rather than inserting them into a temp table) and the bfShow tables:
SELECT TOP 1 tDate.SeqDate
FROM dbo.fnSeqDates('6/1/2012', '6/30/2012') tDate
LEFT JOIN bfShow tShow
ON tDate.SeqDate BETWEEN tShow.DateStart AND tShow.DateEnd
WHERE tShow.ShowID IS NULL -- no matches found
ORDER BY tDate.SeqDate DESC -- to pull the most recent date
Okay, I thought I'd re-phrase the question, and try to expose some edge cases. I'm not using your function at all. If this isn't right, can you give an example where it fails?
create table bfShow (
DateStart date,
DateEnd date
)
go
CREATE procedure spLastEmptyDate
#DateStart date
, #DateEnd date
as
--Return #DateEnd, or, if that is within a show, find the contiguous
--region of shows covering it, and select the day before that
;with ShowsCovering as (
select DateStart,DateEnd from bfShow where DateStart <= #DateEnd and DateEnd >= #DateEnd
union all
select s1.DateStart,s2.DateEnd
from
bfShow s1
inner join
ShowsCovering s2
on
s1.DateStart < s2.DateStart and
(
--This join would be helped by an indexed computed column on bfShow, either Start-1 or End+1
s1.DateEnd >= s2.DateStart or
s1.DateEnd = DATEADD(day,-1,s2.DateStart)
)
where
s2.DateStart > #DateStart
), Earliest as (
select MIN(DateStart) as MinDate from ShowsCovering
)
--1) If there are no rows, the answer is #DateEnd
--2) If there are rows, and the MIN(DateStart) = #DateStart, then no day exists
--3) Otherwise, the answer is MIN(DateStart)-1
, Answer as (
select #DateEnd as Result where exists(select * from Earliest where MinDate is null)
union all
select DATEADD(day,-1,MinDate) from Earliest where MinDate > #DateStart
)
select Result from Answer
go
insert into bfShow(DateStart,DateEnd)
values ('20120601','20120612'),
('20120619','20120630')
go
exec spLastEmptyDate '20120601','20120625'
--Result = 2012-06-18
go
exec spLastEmptyDate '20120525','20120625'
--Result = 2012-06-18
go
exec spLastEmptyDate '20120601','20120705'
--Result = 2012-07-05
go
insert into bfShow(DateStart,DateEnd)
values ('20120613','20120618')
go
exec spLastEmptyDate '20120601','20120625'
--Result - no rows
By the way, in your current solution, these lines:
drop table #DateRange
drop table #ShowIDs
Are unnecessary. Temp tables created within a stored procedure are automatically dropped when the stored procedure exits. So you can avoid the little dance at the end and make the last line just select max(dDate) as LastEmptyDateInRange from #DateRange, if you want to continue using your solution.