Incremental Group BY

Incremental Group BY - sql

How I can achieve incremental grouping in query ?
I need to group by all the non-zero values into different named groups.
Please help me write a query based on columns date and subscribers.

If you have SQL Server 2012 or newer, you can use few tricks with windows functions to get this kind of grouping without cursors, with something like this:
select
Date, Subscribers,
case when Subscribers = 0 then 'No group'
else 'Group' + convert(varchar, GRP) end as GRP
from (
select
Date, Subscribers,
sum (GRP) over (order by Date asc) as GRP
from (
select
*,
case when Subscribers > 0 and
isnull(lag(Subscribers) over (order by Date asc),0) = 0 then 1 else 0 end as GRP
from SubscribersCountByDay S
) X
) Y
Example in SQL Fiddle

In general I advocate AGAINST cursors but in this case it ill not hurt since it ill iterate, sum up and do the conditional all in one pass.
Also note I hinted it with FAST_FORWARD to not degrade performance.
I'm guessing you do want what #HABO commented.
See the working example below, it just sums up until find a ZERO, reset and starts again. Note the and #Sum > 0 handles the case where the first row is ZERO.
create table dbo.SubscribersCountByDay
(
[Date] date not null
,Subscribers int not null
)
GO
insert into dbo.SubscribersCountByDay
([Date], Subscribers)
values
('2015-10-01', 1)
,('2015-10-02', 2)
,('2015-10-03', 0)
,('2015-10-04', 4)
,('2015-10-05', 5)
,('2015-10-06', 0)
,('2015-10-07', 7)
GO
declare
#Date date
,#Subscribers int
,#Sum int = 0
,#GroupId int = 1
declare #Result as Table
(
GroupName varchar(10) not null
,[Sum] int not null
)
declare ScanIt cursor fast_forward
for
(
select [Date], Subscribers
from dbo.SubscribersCountByDay
union
select '2030-12-31', 0
) order by [Date]
open ScanIt
fetch next from ScanIt into #Date, #Subscribers
while ##FETCH_STATUS = 0
begin
if (#Subscribers = 0 and #Sum > 0)
begin
insert into #Result (GroupName, [Sum]) values ('Group ' + cast(#GroupId as varchar(6)), #Sum)
set #GroupId = #GroupId + 1
set #Sum = 0
end
else begin
set #Sum = #Sum + #Subscribers
end
fetch next from ScanIt into #Date, #Subscribers
end
close ScanIt
deallocate ScanIt
select * from #Result
GO
For the OP: Please next time write the table, just posting an image is lazy

In a version of SQL Server modern enough to support CTEs you can use the following cursorless query:
-- Sample data.
declare #SampleData as Table ( Id Int Identity, Subscribers Int );
insert into #SampleData ( Subscribers ) values
-- ( 0 ), -- Test edge case when we have a zero first row.
( 200 ), ( 100 ), ( 200 ),
( 0 ), ( 0 ), ( 0 ),
( 50 ), ( 50 ), ( 12 ),
( 0 ), ( 0 ),
( 43 ), ( 34 ), ( 34 );
select * from #SampleData;
-- Run the query.
with ZerosAndRows as (
-- Add IsZero to indicate zero/non-zero and a row number to each row.
select Id, Subscribers,
case when Subscribers = 0 then 0 else 1 end as IsZero,
Row_Number() over ( order by Id ) as RowNumber
from #SampleData ),
Groups as (
-- Add a group number to every row.
select Id, Subscribers, IsZero, RowNumber, 1 as GroupNumber
from ZerosAndRows
where RowNumber = 1
union all
select FAR.Id, FAR.Subscribers, FAR.IsZero, FAR.RowNumber,
-- Increment GroupNumber only when we move from a non-zero row to a zero row.
case when Groups.IsZero = 1 and FAR.IsZero = 0 then Groups.GroupNumber + 1 else Groups.GroupNumber end
from ZerosAndRows as FAR inner join Groups on Groups.RowNumber + 1 = FAR.RowNumber
)
-- Display the results.
select Id, Subscribers,
case when IsZero = 0 then 'no group' else 'Group' + Cast( GroupNumber as VarChar(10) ) end as Grouped
from Groups
order by Id;
To see the intermediate results just replace the final select with select * from FlagsAndRows or select * from Groups.

Related

Multilevel CTE Expression slows the execution (Reposted with changes) [duplicate]

This question already exists:
Multilevel CTE Expression slows the execution [closed]
Closed 1 year ago.
The community reviewed whether to reopen this question 1 year ago and left it closed:
Duplicate This question has been answered, is not unique, and doesn’t differentiate itself from another question.
In SQL Server, I have a table-valued function which contains a multi-level CTE as shown in the code here:
CREATE FUNCTION [dbo].[GetDataset_Test]
(#XMLBlock XML, #Id INT)
RETURNS
#tempTable TABLE
(
methodID INT,
[Id] INT,
SavingMessage varchar(50)
)
AS
BEGIN
DECLARE #ObjectID INT = 2;
DECLARE #ExchangeRate INT = 2;
DECLARE #Cond INT = 1;
DECLARE #Param1 varchar(50) = 'name1';
DECLARE #Param2 varchar(50) = 'name2';
WITH CTE AS
(
SELECT
Col1,
con,
DiscountLine,
tempNumber,
ItemCostExVAT,
Quantity,
SomeValue,
methodID,
VATAmount,
SubTypeID,
Line,
VATMultiplier,
ROUND(dbo.GetAmountCustomFunction(COALESCE(Id, AnotherId), COALESCE(Price, PriceValue)), 2) As [Amount]
FROM
dbo.GetObject(#Id, #ObjectID, #ParameterBlock) AS BC
INNER JOIN
dbo.fnPrices(#ID, #CurrencyID) BPD ON BPD.Id = BC.productid
),
CTE1 AS
(
SELECT
*,
CASE WHEN con = 0 THEN Quantity ELSE 1 END AS Quantity
FROM
CTE
WHERE
1 = SomeValue
),
CTE2 AS
(
Select *,
MIN(CASE WHEN DiscountLine=1 THEN 1 ELSE 20 END) over (PARTITION by tempNumber) As StockControlled,
SUM(ItemCostExVAT * Quantity) OVER ( PARTITION BY tempNumber ) AS tempCost ,
ROUND(CASE
WHEN methodID = 8 THEN DiscValue
WHEN methodID = 2 THEN END ,
DicsValue,VATAmount),2) AS AmountExVAT
From CTE1
),
CTE3
(
SELECT
*,
ROUND(CASE
WHEN SubTypeID = 1 AND Line = 1 THEN -1 * AmountExVAT
ELSE 20
END, 2) PriceExVAT
FROM
CTE2
),
CTE4
(
SELECT
*,
ROUND(#ExchangeRate * CASE WHEN #Cond = 1 THEN AmountExVAT * VATMultiplier ELSE 20 END, 2) CashBack
FROM
CTE3
),
CTE5
(
SELECT
*,
dbo.FormatMessage(#Param1, #Param2) AS SavingMessage
FROM
CTE4
)
INSERT INTO #tempTable
SELECT
methodID, [Id], SavingMessage
FROM
CTE5
RETURN
END
In above query because of multi-level CTE and table value parameter I can think that its trying to query recursively and taking more execution time.
I know that we cannot use temporary table as function parameter, is there any alternative of this or can I use temporary table by any way in function?
Or can I make some changes in CTE to improve my T-SQL function query execution time?

Selecting data from table where sum of values in a column equal to the value in another column

Sample data:
create table #temp (id int, qty int, checkvalue int)
insert into #temp values (1,1,3)
insert into #temp values (2,2,3)
insert into #temp values (3,1,3)
insert into #temp values (4,1,3)
According to data above, I would like to show exact number of lines from top to bottom where sum(qty) = checkvalue. Note that checkvalue is same for all the records all the time. Regarding the sample data above, the desired output is:
Id Qty checkValue
1 1 3
2 2 3
Because 1+2=3 and no more data is needed to show. If checkvalue was 4, we would show the third record: Id:3 Qty:1 checkValue:4 as well.
This is the code I am handling this problem. The code is working very well.
declare #checkValue int = (select top 1 checkvalue from #temp);
declare #counter int = 0, #sumValue int = 0;
while #sumValue < #checkValue
begin
set #counter = #counter + 1;
set #sumValue = #sumValue + (
select t.qty from
(
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (ORDER BY id ASC) AS rownumber,
id,qty,checkvalue
FROM #temp
) AS foo
WHERE rownumber = #counter
) t
)
end
declare #sql nvarchar(255) = 'select top '+cast(#counter as varchar(5))+' * from #temp'
EXECUTE sp_executesql #sql, N'#counter int', #counter = #counter;
However, I am not sure if this is the best way to deal with it and wonder if there is a better approach. There are many professionals here and I'd like to hear from them about what they think about my approach and how we can improve it. Any advice would be appreciated!

Try this:
select id, qty, checkvalue from (
select t1.*,
sum(t1.qty) over (partition by t2.id) [sum]
from #temp [t1] join #temp [t2] on t1.id <= t2.id
) a where checkvalue = [sum]
Smart self-join is all you need :)

For SQL Server 2012, and onwards, you can easily achieve this using ROWS BETWEEN in your OVER clause and the use of a CTE:
WITH Running AS(
SELECT *,
SUM(qty) OVER (ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS RunningQty
FROM #temp t)
SELECT id, qty, checkvalue
FROM Running
WHERE RunningQty <= checkvalue;

One basic improvement is to try & reduce the no. of iterations. You're incrementing by 1, but if you repurpose the logic behind binary searching, you'd get something close to this:
DECLARE #RoughAverage int = 1 -- Some arbitrary value. The closer it is to the real average, the faster things should be.
DECLARE #CheckValue int = (SELECT TOP 1 checkvalue FROM #temp)
DECLARE #Sum int = 0
WHILE 1 = 1 -- Refer to BREAK below.
BEGIN
SELECT TOP (#RoughAverage) #Sum = SUM(qty) OVER(ORDER BY id)
FROM #temp
ORDER BY id
IF #Sum = #CheckValue
BREAK -- Indicating you reached your objective.
ELSE
SET #RoughAverage = #CheckValue - #Sum -- Most likely incomplete like this.
END

For SQL 2008 you can use recursive cte. Top 1 with ties limits result with first combination. Remove it to see all combinations
with cte as (
select
*, rn = row_number() over (order by id)
from
#temp
)
, rcte as (
select
i = id, id, qty, sumV = qty, checkvalue, rn
from
cte
union all
select
a.id, b.id, b.qty, a.sumV + b.qty, a.checkvalue, b.rn
from
rcte a
join cte b on a.rn + 1 = b.rn
where
a.sumV < b.checkvalue
)
select
top 1 with ties id, qty, checkvalue
from (
select
*, needed = max(case when sumV = checkvalue then 1 else 0 end) over (partition by i)
from
rcte
) t
where
needed = 1
order by dense_rank() over (order by i)

SQL Server - loop through table and update based on count

I have a SQL Server database. I need to loop through a table to get the count of each value in the column 'RevID'. Each value should only be in the table a certain number of times - for example 125 times. If the count of the value is greater than 125 or less than 125, I need to update the column to ensure all values in the RevID (are over 25 different values) is within the same range of 125 (ok to be a few numbers off)
For example, the count of RevID = "A2" is = 45 and the count of RevID = 'B2' is = 165 then I need to update RevID so the 45 count increases and the 165 decreases until they are within the 125 range.
This is what I have so far:
DECLARE #i INT = 1,
#RevCnt INT = SELECT RevId, COUNT(RevId) FROM MyTable group by RevId
WHILE(#RevCnt >= 50)
BEGIN
UPDATE MyTable
SET RevID= (SELECT COUNT(RevID) FROM MyTable)
WHERE RevID < 50)
#i = #i + 1
END
I have also played around with a cursor and instead of trigger. Any idea on how to achieve this? Thanks for any input.

Okay I cam back to this because I found it interesting even though clearly there are some business rules/discussion that you and I and others are not seeing. anyway, if you want to evenly and distribute arbitrarily there are a few ways you could do it by building recursive Common Table Expressions [CTE] or by building temp tables and more. Anyway here is a way that I decided to give it a try, I did utilize 1 temp table because sql was throwing in a little inconsistency with the main logic table as a cte about every 10th time but the temp table seems to have cleared that up. Anyway, this will evenly spread RevId arbitrarily and randomly assigning any remainder (# of Records / # of RevIds) to one of the RevIds. This script also doesn't rely on having a UniqueID or anything it works dynamically over row numbers it creates..... here you go just subtract out test data etc and you have what you more than likely want. Though rebuilding the table/values would probably be easier.
--Build Some Test Data
DECLARE #Table AS TABLE (RevId VARCHAR(10))
DECLARE #C AS INT = 1
WHILE #C <= 400
BEGIN
IF #C <= 200
BEGIN
INSERT INTO #Table (RevId) VALUES ('A1')
END
IF #c <= 170
BEGIN
INSERT INTO #Table (RevId) VALUES ('B2')
END
IF #c <= 100
BEGIN
INSERT INTO #Table (RevId) VALUES ('C3')
END
IF #c <= 400
BEGIN
INSERT INTO #Table (RevId) VALUES ('D4')
END
IF #c <= 1
BEGIN
INSERT INTO #Table (RevId) VALUES ('E5')
END
SET #C = #C+ 1
END
--save starting counts of test data to temp table to compare with later
IF OBJECT_ID('tempdb..#StartingCounts') IS NOT NULL
BEGIN
DROP TABLE #StartingCounts
END
SELECT
RevId
,COUNT(*) as Occurences
INTO #StartingCounts
FROM
#Table
GROUP BY
RevId
ORDER BY
RevId
/************************ This is the main method **********************************/
--clear temp table that is the main processing logic
IF OBJECT_ID('tempdb..#RowNumsToChange') IS NOT NULL
BEGIN
DROP TABLE #RowNumsToChange
END
--figure out how many records there are and how many there should be for each RevId
;WITH cteTargetNumbers AS (
SELECT
RevId
--,COUNT(*) as RevIdCount
--,SUM(COUNT(*)) OVER (PARTITION BY 1) / COUNT(*) OVER (PARTITION BY 1) +
--CASE
--WHEN ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY NEWID()) <=
--SUM(COUNT(*)) OVER (PARTITION BY 1) % COUNT(*) OVER (PARTITION BY 1)
--THEN 1
--ELSE 0
--END as TargetNumOfRecords
,SUM(COUNT(*)) OVER (PARTITION BY 1) / COUNT(*) OVER (PARTITION BY 1) +
CASE
WHEN ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY NEWID()) <=
SUM(COUNT(*)) OVER (PARTITION BY 1) % COUNT(*) OVER (PARTITION BY 1)
THEN 1
ELSE 0
END - COUNT(*) AS NumRecordsToUpdate
FROM
#Table
GROUP BY
RevId
)
, cteEndRowNumsToChange AS (
SELECT *
,SUM(CASE WHEN NumRecordsToUpdate > 1 THEN NumRecordsToUpdate ELSE 0 END)
OVER (PARTITION BY 1 ORDER BY RevId) AS ChangeEndRowNum
FROM
cteTargetNumbers
)
SELECT
*
,LAG(ChangeEndRowNum,1,0) OVER (PARTITION BY 1 ORDER BY RevId) as ChangeStartRowNum
INTO #RowNumsToChange
FROM
cteEndRowNumsToChange
;WITH cteOriginalTableRowNum AS (
SELECT
RevId
,ROW_NUMBER() OVER (PARTITION BY RevId ORDER BY (SELECT 0)) as RowNumByRevId
FROM
#Table t
)
, cteRecordsAllowedToChange AS (
SELECT
o.RevId
,o.RowNumByRevId
,ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 0)) as ChangeRowNum
FROM
cteOriginalTableRowNum o
INNER JOIN #RowNumsToChange t
ON o.RevId = t.RevId
AND t.NumRecordsToUpdate < 0
AND o.RowNumByRevId <= ABS(t.NumRecordsToUpdate)
)
UPDATE o
SET RevId = u.RevId
FROM
cteOriginalTableRowNum o
INNER JOIN cteRecordsAllowedToChange c
ON o.RevId = c.RevId
AND o.RowNumByRevId = c.RowNumByRevId
INNER JOIN #RowNumsToChange u
ON c.ChangeRowNum > u.ChangeStartRowNum
AND c.ChangeRowNum <= u.ChangeEndRowNum
AND u.NumRecordsToUpdate > 0
IF OBJECT_ID('tempdb..#RowNumsToChange') IS NOT NULL
BEGIN
DROP TABLE #RowNumsToChange
END
/***************************** End of Main Method *******************************/
-- Compare the results and clean up
;WITH ctePostUpdateResults AS (
SELECT
RevId
,COUNT(*) as AfterChangeOccurences
FROM
#Table
GROUP BY
RevId
)
SELECT *
FROM
#StartingCounts s
INNER JOIN ctePostUpdateResults r
ON s.RevId = r.RevId
ORDER BY
s.RevId
IF OBJECT_ID('tempdb..#StartingCounts') IS NOT NULL
BEGIN
DROP TABLE #StartingCounts
END

Since you've given no rules for how you'd like the balance to operate we're left to speculate. Here's an approach that would find the most overrepresented value and then find an underrepresented value that can take on the entire overage.
I have no idea how optimal this is and it will probably run in an infinite loop without more logic.
declare #balance int = 125;
declare #cnt_over int;
declare #cnt_under int;
declare #revID_overrepresented varchar(32);
declare #revID_underrepresented varchar(32);
declare #rowcount int = 1;
while #rowcount > 0
begin
select top 1 #revID_overrepresented = RevID, #cnt_over = count(*)
from T
group by RevID
having count(*) > #balance
order by count(*) desc
select top 1 #revID_underrepresented = RevID, #cnt_under = count(*)
from T
group by RevID
having count(*) < #balance - #cnt_over
order by count(*) desc
update top #cnt_over - #balance T
set RevId = #revID_underrepresented
where RevId = #revID_overrepresented;
set #rowcount = ##rowcount;
end

The problem is I don't even know what you mean by balance...You say it needs to be evenly represented but it seems like you want it to be 125. 125 is not "even", it is just 125.
I can't tell what you are trying to do, but I'm guessing this is not really an SQL problem. But you can use SQL to help. Here is some helpful SQL for you. You can use this in your language of choice to solve the problem.
Find the rev values and their counts:
SELECT RevID, COUNT(*)
FROM MyTable
GROUP BY MyTable
Update #X rows (with RevID of value #RevID) to a new value #NewValue
UPDATE TOP #X FROM MyTable
SET RevID = #NewValue
WHERE RevID = #RevID
Using these two queries you should be able to apply your business rules (which you never specified) in a loop or whatever to change the data.

How to get columns names that been updated

Suppose I have a table like this :
Table 1:
date account name type status open_account_date
31.12.14 1000 20 40 50 31.12.14
2.1.15 1000 10 10 50 31.12.14
3.1.15 1000 5 15 50 31.12.14
and I want to build a summary table like this for the first quarter :
account numOfChanges Changes
1000 4 (name, type)
The first row in table 1 indicats that that the account was opened and somebody enterd for the account some details but the others indicats changes but i want to know which fields has been changed. Is there any suggestion or an idea how to perform this?

DECLARE #StartOfQuarter DATE = '1/1/2015'
;WITH cteRcordStateBeforeQuarter AS (
SELECT
[date]
,account
,name
,[type]
,[status]
,open_account_date
,RowNum = ROW_NUMBER() OVER (PARTITION BY account ORDER BY [date] DESC)
FROM
#Table
WHERE
[date] < #StartOfQuarter
)
, cteRecordStatesDuringQuarter AS (
SELECT
[date]
,account
,name
,[type]
,[status]
,open_account_date
,RowNum = ROW_NUMBER() OVER (PARTITION BY account ORDER BY [date] ASC) + 1
--add 1 because the first row is going to be the last one prior to quarter
,LatestChangeRowNum = ROW_NUMBER() OVER (PARTITION BY account ORDER BY [date] DESC)
FROM
#Table
WHERE
DATEPART(QQ,[date]) = 1
--change to suite ongoing needs such as quater and YEAR([date]) = ??
)
, cteRecursive AS (
SELECT
[date]
,account
,name
,[type]
,[status]
,open_account_date
,RowNum
,LatestChangeRowNum = 0
,NumOfChanges = 0
,[Changes] = CAST('' AS VARCHAR(100))
FROM
cteRcordStateBeforeQuarter
WHERE
RowNum = 1
UNION ALL
SELECT
q.[date]
,q.account
,q.name
,q.[type]
,q.[status]
,q.open_account_date
,q.RowNum
,LatestChangeRowNum = CAST(q.LatestChangeRowNum AS INT)
,NumOfChanges = r.NumOfChanges
+ CASE WHEN ISNULL(q.name,-99999) <> ISNULL(r.name,-99999) THEN 1 ELSE 0 END
+ CASE WHEN ISNULL(q.[type],-99999) <> ISNULL(r.[type],-99999) THEN 1 ELSE 0 END
+ CASE WHEN ISNULL(q.[status],-99999) <> ISNULL(r.[status],-99999) THEN 1 ELSE 0 END
+ CASE WHEN ISNULL(q.open_account_date,'1/1/1900') <> ISNULL(r.open_account_date,'1/1/1900') THEN 1 ELSE 0 END
,[Changes] = CAST(ISNULL(r.[Changes],'')
+ CASE WHEN ISNULL(q.name,-99999) <> ISNULL(r.name,-99999) AND r.[Changes] NOT LIKE '%name%' THEN ',name' ELSE '' END
+ CASE WHEN ISNULL(q.[type],-99999) <> ISNULL(r.[type],-99999) AND r.[Changes] NOT LIKE '%type%' THEN ',type' ELSE '' END
+ CASE WHEN ISNULL(q.[status],-99999) <> ISNULL(r.[status],-99999) AND r.[Changes] NOT LIKE '%status%' THEN ',status' ELSE '' END
+ CASE WHEN ISNULL(q.open_account_date,'1/1/1900') <> ISNULL(r.open_account_date,'1/1/1900') AND r.[Changes] NOT LIKE '%open_account_date%' THEN ',open_account_date' ELSE '' END
AS VARCHAR(100))
FROM
cteRecordStatesDuringQuarter q
INNER JOIN cteRecursive r
ON q.account = r.account
AND q.RowNum = r.RowNum + 1
)
SELECT
account
,NumOfChanges
,[Changes] = REPLACE(IIF(CHARINDEX(',',[Changes]) = 1, RIGHT([Changes],LEN([Changes]) - 1),[Changes]),',',', ')
FROM
cteRecursive
WHERE
LatestChangeRowNum = 1
If you where using SQL 2012 + it would be a little easier because you could use LAG or LEAD window functions and IIF instead of case. But a recursive cte works pretty well too. I assume your recordset will have multiple accounts as well as multiple quarters and there for multiple recordstates prior to the quarter. Due to this you will need to tweak the date logic slightly but this will give you the gist.
First find the record state prior to the quarter. Then find all of the changes during the quarter. Add some row numbers to determine which is the first and which is the last change. Then use a recursive cte to test for changes. In the end you just need to format the string the way you want.

SQL Server 2008 filling gaps with dimension

I have a data table as below
#data
---------------
Account AccountType
---------------
1 2
2 0
3 5
4 2
5 1
6 5
AccountType 2 is headers and 5 is totals. Meaning accounts of type 2 have to look after the next 1 or 0 to determin if its Dim value is 1 or 0. Totals of type 5 have to look up at nearest 1 or 0 to determin its Dim value. Accounts of type 1 or 0 have there type as Dim.
Accounts of type 2 appear as islands so its not enough to just check RowNumber + 1 and same goes for accounsts of type 5.
I have arrived at the following table using CTE's. But can't find a quick way to go from here to my final result of Account, AccountType, Dim for all accounts
T3
-------------------
StartRow EndRow AccountType Dim
-------------------
1 1 2 0
2 2 0 0
3 3 5 0
4 4 2 1
5 5 0 1
6 6 5 1
Below code is MS TSQL copy paste it all and see it run. The final join on the CTE select statement is extremly slow for even 500 rows it takes 30 sec. I have 100.000 rows i need to handle. I done a cursor based solution which do it in 10-20 sec thats workable and a fast recursive CTE solution that do it in 5 sec for 100.000 rows, but it dependent on the fragmentation of the #data table. I should add this is simplified the real problem have alot more dimension that need to be taking into account. But it will work the same for this simple problem.
Anyway is there a fast way to do this using joins or another set based solution.
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data') IS NOT NULL
DROP TABLE #data
CREATE TABLE #data
(
Account INTEGER IDENTITY(1,1),
AccountType INTEGER,
)
BEGIN -- TEST DATA
DECLARE #Counter INTEGER = 0
DECLARE #MaxDataRows INTEGER = 50 -- Change here to check performance
DECLARE #Type INTEGER
WHILE(#Counter < #MaxDataRows)
BEGIN
SET #Type = CASE
WHEN #Counter % 10 < 3 THEN 2
WHEN #Counter % 10 >= 8 THEN 5
WHEN #Counter % 10 >= 3 THEN (CASE WHEN #Counter < #MaxDataRows / 2.0 THEN 0 ELSE 1 END )
ELSE 0
END
INSERT INTO #data VALUES(#Type)
SET #Counter = #Counter + 1
END
END -- TEST DATA END
;WITH groupIds_cte AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY AccountType ORDER BY Account) - Account AS GroupId
FROM #data
),
islandRanges_cte AS
(
SELECT
MIN(Account) AS StartRow,
MAX(Account) AS EndRow,
AccountType
FROM groupIds_cte
GROUP BY GroupId,AccountType
),
T3 AS
(
SELECT I.*, J.AccountType AS Dim
FROM islandRanges_cte I
INNER JOIN islandRanges_cte J
ON (I.EndRow + 1 = J.StartRow AND I.AccountType = 2)
UNION ALL
SELECT I.*, J.AccountType AS Dim
FROM islandRanges_cte I
INNER JOIN islandRanges_cte J
ON (I.StartRow - 1 = J.EndRow AND I.AccountType = 5)
UNION ALL
SELECT *, AccountType AS Dim
FROM islandRanges_cte
WHERE AccountType = 0 OR AccountType = 1
),
T4 AS
(
SELECT Account, Dim
FROM (
SELECT FlattenRow AS Account, StartRow, EndRow, Dim
FROM T3 I
CROSS APPLY (VALUES(StartRow),(EndRow)) newValues (FlattenRow)
) T
)
--SELECT * FROM T3 ORDER BY StartRow
--SELECT * FROM T4 ORDER BY Account
-- Final correct result but very very slow
SELECT D.Account, D.AccountType, I.Dim FROM T3 I
INNER JOIN #data D
ON D.Account BETWEEN I.StartRow AND I.EndRow
ORDER BY Account
EDIT with some time testing
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data') IS NULL
CREATE TABLE #times
(
RecId INTEGER IDENTITY(1,1),
Batch INTEGER,
Method NVARCHAR(255),
MethodDescription NVARCHAR(255),
RunTime INTEGER
)
IF OBJECT_ID('tempdb..#batch') IS NULL
CREATE TABLE #batch
(
Batch INTEGER IDENTITY(1,1),
Bit BIT
)
INSERT INTO #batch VALUES(0)
IF OBJECT_ID('tempdb..#data') IS NOT NULL
DROP TABLE #data
CREATE TABLE #data
(
Account INTEGER
)
CREATE NONCLUSTERED INDEX data_account_index ON #data (Account)
IF OBJECT_ID('tempdb..#islands') IS NOT NULL
DROP TABLE #islands
CREATE TABLE #islands
(
AccountFrom INTEGER ,
AccountTo INTEGER,
Dim INTEGER,
)
CREATE NONCLUSTERED INDEX islands_from_index ON #islands (AccountFrom, AccountTo, Dim)
BEGIN -- TEST DATA
INSERT INTO #data
SELECT TOP 100000 ROW_NUMBER() OVER(ORDER BY t1.number) AS N
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
INSERT INTO #islands
SELECT MIN(Account) AS Start, MAX(Account), Grp
FROM (SELECT *, NTILE(10) OVER (ORDER BY Account) AS Grp FROM #data) T
GROUP BY Grp ORDER BY Start
END -- TEST DATA END
--SELECT * FROM #data
--SELECT * FROM #islands
--PRINT CONVERT(varchar(20),DATEDIFF(MS,#RunDate,GETDATE()))+' ms Sub Query'
DECLARE #RunDate datetime
SET #RunDate=GETDATE()
SELECT Account, (SELECT Dim From #islands WHERE Account BETWEEN AccountFrom AND AccountTo) AS Dim
FROM #data
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'subquery','',DATEDIFF(MS,#RunDate,GETDATE()))
SET #RunDate=GETDATE()
SELECT D.Account, V.Dim
FROM #data D
CROSS APPLY
(
SELECT Dim From #islands V
WHERE D.Account BETWEEN V.AccountFrom AND V.AccountTo
) V
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'crossapply','',DATEDIFF(MS,#RunDate,GETDATE()))
SET #RunDate=GETDATE()
SELECT D.Account, I.Dim
FROM #data D
JOIN #islands I
ON D.Account BETWEEN I.AccountFrom AND I.AccountTo
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'join','',DATEDIFF(MS,#RunDate,GETDATE()))
SET #RunDate=GETDATE()
;WITH cte AS
(
SELECT Account, AccountFrom, AccountTo, Dim, 1 AS Counting
FROM #islands
CROSS APPLY (VALUES(AccountFrom),(AccountTo)) V (Account)
UNION ALL
SELECT Account + 1 ,AccountFrom, AccountTo, Dim, Counting + 1
FROM cte
WHERE (Account + 1) > AccountFrom AND (Account + 1) < AccountTo
)
SELECT Account, Dim, Counting FROM cte OPTION(MAXRECURSION 32767)
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'recursivecte','',DATEDIFF(MS,#RunDate,GETDATE()))
You can select from the #times table to see the run times :)

I think you want a join, but using an inequality rather than an equality:
select tt.id, tt.dim1, it.dim2
from TallyTable tt join
IslandsTable it
on tt.id between it."from" and it."to"
This works for the data that you provide in the question.
Here is another idea that might work. Here is the query:
select d.*,
(select top 1 AccountType from #data d2 where d2.Account > d.Account and d2.AccountType not in (2, 5)
) nextAccountType
from #data d
order by d.account;
I just ran this on 50,000 rows and this version took 17 seconds on my system. Changing the table to:
CREATE TABLE #data (
Account INTEGER IDENTITY(1,1) primary key,
AccountType INTEGER,
);
Has actually slowed it down to about 1:33 -- quite to my surprise. Perhaps one of these will help you.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Incremental Group BY - sql

How I can achieve incremental grouping in query ? I need to group by all the non-zero values into different named groups. Please help me write a query based on columns date and subscribers.

Related

Multilevel CTE Expression slows the execution (Reposted with changes) [duplicate]

Selecting data from table where sum of values in a column equal to the value in another column

SQL Server - loop through table and update based on count

How to get columns names that been updated

SQL Server 2008 filling gaps with dimension

Categories

Resources