How to effectively split grouped records into batches - sql

For each group in table I need to split that group into specific amount of records (batches) and mark each record in batch with according batch id.
Right now, my implementation based on cursors is IMHO clumsy. It takes 1 minute to split set of 10 000 rows which is, needless to say, very slow. Any clues how to make that work faster?
Here is test script.
-- Needed to generate big data
DECLARE #Naturals TABLE (ID INT)
INSERT INTO #Naturals (ID)
VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
DECLARE #TestData TABLE
(
LINK INT,
F_House INT,
F_Batch UNIQUEIDENTIFIER
)
INSERT INTO #TestData (LINK, F_House)
SELECT ROW_NUMBER() OVER (order by T1.ID), ROW_NUMBER() OVER (order by T1.ID) % 5
FROM
#Naturals T1
CROSS JOIN #Naturals T2
CROSS JOIN #Naturals T3
CROSS JOIN #Naturals T4
--CROSS JOIN #Naturals T5 -- that would give us 100 000
-- Finished preparing Data (10 000 rows)
SELECT 'Processing:', COUNT(*) FROM #TestData
DECLARE #batchSize INT -- That would be amount of rows in each batch
SET #batchSize = 50
IF OBJECT_ID('tempdb..#G') IS NOT NULL -- Split set of data into groups. We need to create batches in each group.
DROP TABLE #G
SELECT
buf.F_House, COUNT(*) AS GroupCount
INTO #G
FROM #TestData buf
GROUP BY buf.F_House -- That logic could be tricky one. Right now simplifying
DECLARE #F_House INT -- That would be group key
DECLARE db_cursor CURSOR FOR
SELECT F_House
FROM #G
ORDER BY F_House
OPEN db_cursor FETCH NEXT FROM db_cursor INTO #F_House
WHILE ##FETCH_STATUS = 0
BEGIN
PRINT 'Processing house group: ' + CAST(#F_House AS VARCHAR(10))
-- For each group let's create batches
WHILE EXISTS (SELECT 1 FROM #TestData AS itmds
WHERE itmds.F_House = #F_House
AND itmds.F_Batch IS NULL
)
BEGIN
DECLARE #batchLink UNIQUEIDENTIFIER
SET #batchLink = NEWID()
UPDATE itmds
SET itmds.F_Batch = #batchLink
FROM #TestData AS itmds
WHERE itmds.F_House = #F_House
AND itmds.F_Batch IS NULL
AND itmds.LINK IN
(
SELECT TOP (#batchSize)
sub.LINK
FROM #TestData sub
WHERE sub.F_House = #F_House
AND sub.F_Batch IS NULL
)
END
FETCH NEXT FROM db_cursor INTO #F_House
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT
buf.F_House, COUNT(distinct F_Batch) AS BatchCountInHouse
FROM #TestData buf
GROUP BY buf.F_House
ORDER BY buf.F_House
Expected output (considering batchsize = 50)
10 000 rows / 5 houses = 2000 rows/house
2000 rows/house / 50(batchSize) = 40 batches/house

This is set based avoiding a cursor. The assigned F_Batch is a BIGINT:
;with baseRowNum as
(
SELECT LINK, F_House,
-- row number per F_House
Row_Number() Over (PARTITION BY F_House ORDER BY LINK) AS rn
FROM #TestData
)
SELECT *,
-- combine F_House & group number into a unique result
F_House * 10000 +
-- start a new sub group for every F_House or after #batchSize rows
Sum(CASE WHEN rn % #batchSize = 1 THEN 1 ELSE 0 END)
Over (ORDER BY F_House, rn
ROWS Unbounded Preceding) AS F_Batch
FROM baseRowNum
If you really need a UNIQUEINDENTIFIER you can join back:
;with baseRowNums as
(
SELECT LINK, F_House,
-- row number per F_House
Row_Number() Over (PARTITION BY F_House ORDER BY LINK) AS rn
FROM #TestData
)
,batchNums as
(
SELECT *,
-- combine F_House & group number into a unique result
F_House * 10000 +
-- start a new sub group for every F_House or after #batchSize rows
Sum(CASE WHEN rn % #batchSize = 1 THEN 1 ELSE 0 END)
Over (ORDER BY F_House, rn
ROWS Unbounded Preceding) AS F_Batch
FROM baseRowNums
)
,GUIDs as
(
select F_Batch, MAX(newid()) as GUID
from batchNums
group by F_Batch
)
-- select * from
--from batchNums join GUIDs
-- on batchNums.F_Batch = GUIDs.F_Batch
select F_House, GUID, count(*)
from batchNums join GUIDs
on batchNums.F_Batch = GUIDs.F_Batch
group by F_House, GUID
order by F_House, count(*) desc
See Fiddle.

I would use an inner looping inside of a looping referencing a grouping level.
Then you can iterate through from the grouping down into the BatchGrouping. However as you pointed out speed is an issue with table variables and CTE's for that reason I in this case tested with a tempdb # table. This way I could index after the insert and optimize performance. I can run a million rows of aggregation logic in about 16 seconds. I consider that acceptable performance. But my Dev Box is an I7 6700, with 16 gigs of DDR4, and an SSD. Performance times may vary based on hardware obviously.
--Make up some fake data for example
DECLARE
#Start INT = 1
, #End INT = 100000
;
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE tempdb..#Temp
CREATE Table #Temp (Id INT, Grp int, Val VARCHAR(8), BatchGroup int)
WHILE #Start <= #End
BEGIN
INSERT INTO #Temp (Id, Grp, Val)
VALUES (#Start, CAST(RAND() * 8 AS INT) + 1, LEFT(NEWID(), 8))
SELECT #Start += 1;
END
CREATE CLUSTERED INDEX IX_Temp_Grp ON #Temp(Grp, BatchGroup)
--Determine Batch Size You want for groupings
DECLARE #BatchSize INT = 1000;
--Let's randomly mess with groupings
DECLARE #X INT = 1
WHILE #X <= 4
BEGIN
; WITH x AS
(
SELECT TOP (#BatchSize * 4)
Id
, Grp
, Val
FROM #Temp
WHERE Grp = CAST(RAND() * 8 AS INT) + 1
)
UPDATE x
SET Grp = CAST(RAND() * 8 AS INT) + 1
SELECT #X += 1
END
DECLARE
#CurrentGroup INT = 1
, #CurrentBatch INT = 1
WHILE #CurrentGroup <= (SELECT MAX(Grp) FROM #Temp) -- Exists (SELECT 1 FROM #Temp WHERE BatchGroup IS NULL)
BEGIN
WHILE EXISTS (SELECT 1 FROM #Temp WHERE Grp = #CurrentGroup AND BatchGroup IS NULL)
BEGIN
; WITH x AS
(
SELECT TOP (#BatchSize) *
FROM #Temp
WHERE Grp = #CurrentGroup
AND BatchGroup IS NULL
)
update x
SET BatchGroup = #CurrentBatch
SELECT #CurrentBatch += 1;
END
SELECT #CurrentBatch = 1
SELECT #CurrentGroup += 1;
END
--Proof
Select
Grp
, COUNT(DISTINCT Id)
, COUNT(DISTINCT BatchGroup)
From #Temp
GROUP BY Grp

Actually, I've tried NTILE() with cursors and it's quite fast(I mean its faster then 1 minute for 10 000 rows).
10 000 rows for 0 seconds.
100 000 rows for 3 seconds.
1 000 000 rows for 34 seconds.
10 000 000 rows for 6 minutes
Linear grow in complexity which is nice.
-- Needed to generate big data
DECLARE #Naturals TABLE (ID INT)
INSERT INTO #Naturals (ID)
VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
DECLARE #TestData TABLE
(
LINK INT,
F_House INT,
F_Batch UNIQUEIDENTIFIER
)
INSERT INTO #TestData (LINK, F_House)
SELECT ROW_NUMBER() OVER (order by T1.ID), ROW_NUMBER() OVER (order by T1.ID) % 5
FROM
#Naturals T1
CROSS JOIN #Naturals T2
CROSS JOIN #Naturals T3
CROSS JOIN #Naturals T4
--CROSS JOIN #Naturals T5 -- that would give us 100 000
-- Finished preparing Data (10 000 rows)
SELECT 'Processing:', COUNT(*) FROM #TestData
DECLARE #batchSize INT -- That would be amount of rows in each batch
SET #batchSize = 50
IF OBJECT_ID('tempdb..#G') IS NOT NULL -- Split set of data into groups. We need to create batches in each group.
DROP TABLE #G
SELECT
buf.F_House, COUNT(*) AS GroupCount
INTO #G
FROM #TestData buf
GROUP BY buf.F_House -- That logic could be tricky one. Right now simplifying
DECLARE #F_House INT -- That would be group key
DECLARE db_cursor CURSOR FOR
SELECT F_House
FROM #G
ORDER BY F_House
OPEN db_cursor FETCH NEXT FROM db_cursor INTO #F_House
WHILE ##FETCH_STATUS = 0
BEGIN
PRINT 'Processing house group: ' + CAST(#F_House AS VARCHAR(10))
DECLARE #rowsInGroup INT
SELECT #rowsInGroup = COUNT(*) FROM #TestData
WHERE F_House = #F_House
IF OBJECT_ID('tempdb..#TileBatch') IS NOT NULL
DROP TABLE #TileBatch
SELECT
T.[NTile], NEWID() AS F_Batch
INTO #TileBatch
FROM
(
SELECT distinct
NTILE(#rowsInGroup / #batchSize) OVER (ORDER BY LINK) AS [NTile]
from
#TestData
WHERE F_House = #F_House
) T
UPDATE D
SET D.F_Batch = B.F_Batch
FROM
#TestData D
INNER JOIN
(
SELECT
*, NTILE(#rowsInGroup / #batchSize) OVER (ORDER BY LINK) AS [NTile]
from
#TestData
WHERE F_House = #F_House
) DT ON D.LINK = DT.LINK
INNER JOIN
#TileBatch B ON DT.[NTile] = B.[NTile]
WHERE D.F_House = #F_House
FETCH NEXT FROM db_cursor INTO #F_House
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT
buf.F_House, COUNT(distinct F_Batch) AS BatchCountInHouse
FROM #TestData buf
GROUP BY buf.F_House
ORDER BY buf.F_House

Related

SQL Server - loop through table and update based on count

I have a SQL Server database. I need to loop through a table to get the count of each value in the column 'RevID'. Each value should only be in the table a certain number of times - for example 125 times. If the count of the value is greater than 125 or less than 125, I need to update the column to ensure all values in the RevID (are over 25 different values) is within the same range of 125 (ok to be a few numbers off)
For example, the count of RevID = "A2" is = 45 and the count of RevID = 'B2' is = 165 then I need to update RevID so the 45 count increases and the 165 decreases until they are within the 125 range.
This is what I have so far:
DECLARE #i INT = 1,
#RevCnt INT = SELECT RevId, COUNT(RevId) FROM MyTable group by RevId
WHILE(#RevCnt >= 50)
BEGIN
UPDATE MyTable
SET RevID= (SELECT COUNT(RevID) FROM MyTable)
WHERE RevID < 50)
#i = #i + 1
END
I have also played around with a cursor and instead of trigger. Any idea on how to achieve this? Thanks for any input.
Okay I cam back to this because I found it interesting even though clearly there are some business rules/discussion that you and I and others are not seeing. anyway, if you want to evenly and distribute arbitrarily there are a few ways you could do it by building recursive Common Table Expressions [CTE] or by building temp tables and more. Anyway here is a way that I decided to give it a try, I did utilize 1 temp table because sql was throwing in a little inconsistency with the main logic table as a cte about every 10th time but the temp table seems to have cleared that up. Anyway, this will evenly spread RevId arbitrarily and randomly assigning any remainder (# of Records / # of RevIds) to one of the RevIds. This script also doesn't rely on having a UniqueID or anything it works dynamically over row numbers it creates..... here you go just subtract out test data etc and you have what you more than likely want. Though rebuilding the table/values would probably be easier.
--Build Some Test Data
DECLARE #Table AS TABLE (RevId VARCHAR(10))
DECLARE #C AS INT = 1
WHILE #C <= 400
BEGIN
IF #C <= 200
BEGIN
INSERT INTO #Table (RevId) VALUES ('A1')
END
IF #c <= 170
BEGIN
INSERT INTO #Table (RevId) VALUES ('B2')
END
IF #c <= 100
BEGIN
INSERT INTO #Table (RevId) VALUES ('C3')
END
IF #c <= 400
BEGIN
INSERT INTO #Table (RevId) VALUES ('D4')
END
IF #c <= 1
BEGIN
INSERT INTO #Table (RevId) VALUES ('E5')
END
SET #C = #C+ 1
END
--save starting counts of test data to temp table to compare with later
IF OBJECT_ID('tempdb..#StartingCounts') IS NOT NULL
BEGIN
DROP TABLE #StartingCounts
END
SELECT
RevId
,COUNT(*) as Occurences
INTO #StartingCounts
FROM
#Table
GROUP BY
RevId
ORDER BY
RevId
/************************ This is the main method **********************************/
--clear temp table that is the main processing logic
IF OBJECT_ID('tempdb..#RowNumsToChange') IS NOT NULL
BEGIN
DROP TABLE #RowNumsToChange
END
--figure out how many records there are and how many there should be for each RevId
;WITH cteTargetNumbers AS (
SELECT
RevId
--,COUNT(*) as RevIdCount
--,SUM(COUNT(*)) OVER (PARTITION BY 1) / COUNT(*) OVER (PARTITION BY 1) +
--CASE
--WHEN ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY NEWID()) <=
--SUM(COUNT(*)) OVER (PARTITION BY 1) % COUNT(*) OVER (PARTITION BY 1)
--THEN 1
--ELSE 0
--END as TargetNumOfRecords
,SUM(COUNT(*)) OVER (PARTITION BY 1) / COUNT(*) OVER (PARTITION BY 1) +
CASE
WHEN ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY NEWID()) <=
SUM(COUNT(*)) OVER (PARTITION BY 1) % COUNT(*) OVER (PARTITION BY 1)
THEN 1
ELSE 0
END - COUNT(*) AS NumRecordsToUpdate
FROM
#Table
GROUP BY
RevId
)
, cteEndRowNumsToChange AS (
SELECT *
,SUM(CASE WHEN NumRecordsToUpdate > 1 THEN NumRecordsToUpdate ELSE 0 END)
OVER (PARTITION BY 1 ORDER BY RevId) AS ChangeEndRowNum
FROM
cteTargetNumbers
)
SELECT
*
,LAG(ChangeEndRowNum,1,0) OVER (PARTITION BY 1 ORDER BY RevId) as ChangeStartRowNum
INTO #RowNumsToChange
FROM
cteEndRowNumsToChange
;WITH cteOriginalTableRowNum AS (
SELECT
RevId
,ROW_NUMBER() OVER (PARTITION BY RevId ORDER BY (SELECT 0)) as RowNumByRevId
FROM
#Table t
)
, cteRecordsAllowedToChange AS (
SELECT
o.RevId
,o.RowNumByRevId
,ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 0)) as ChangeRowNum
FROM
cteOriginalTableRowNum o
INNER JOIN #RowNumsToChange t
ON o.RevId = t.RevId
AND t.NumRecordsToUpdate < 0
AND o.RowNumByRevId <= ABS(t.NumRecordsToUpdate)
)
UPDATE o
SET RevId = u.RevId
FROM
cteOriginalTableRowNum o
INNER JOIN cteRecordsAllowedToChange c
ON o.RevId = c.RevId
AND o.RowNumByRevId = c.RowNumByRevId
INNER JOIN #RowNumsToChange u
ON c.ChangeRowNum > u.ChangeStartRowNum
AND c.ChangeRowNum <= u.ChangeEndRowNum
AND u.NumRecordsToUpdate > 0
IF OBJECT_ID('tempdb..#RowNumsToChange') IS NOT NULL
BEGIN
DROP TABLE #RowNumsToChange
END
/***************************** End of Main Method *******************************/
-- Compare the results and clean up
;WITH ctePostUpdateResults AS (
SELECT
RevId
,COUNT(*) as AfterChangeOccurences
FROM
#Table
GROUP BY
RevId
)
SELECT *
FROM
#StartingCounts s
INNER JOIN ctePostUpdateResults r
ON s.RevId = r.RevId
ORDER BY
s.RevId
IF OBJECT_ID('tempdb..#StartingCounts') IS NOT NULL
BEGIN
DROP TABLE #StartingCounts
END
Since you've given no rules for how you'd like the balance to operate we're left to speculate. Here's an approach that would find the most overrepresented value and then find an underrepresented value that can take on the entire overage.
I have no idea how optimal this is and it will probably run in an infinite loop without more logic.
declare #balance int = 125;
declare #cnt_over int;
declare #cnt_under int;
declare #revID_overrepresented varchar(32);
declare #revID_underrepresented varchar(32);
declare #rowcount int = 1;
while #rowcount > 0
begin
select top 1 #revID_overrepresented = RevID, #cnt_over = count(*)
from T
group by RevID
having count(*) > #balance
order by count(*) desc
select top 1 #revID_underrepresented = RevID, #cnt_under = count(*)
from T
group by RevID
having count(*) < #balance - #cnt_over
order by count(*) desc
update top #cnt_over - #balance T
set RevId = #revID_underrepresented
where RevId = #revID_overrepresented;
set #rowcount = ##rowcount;
end
The problem is I don't even know what you mean by balance...You say it needs to be evenly represented but it seems like you want it to be 125. 125 is not "even", it is just 125.
I can't tell what you are trying to do, but I'm guessing this is not really an SQL problem. But you can use SQL to help. Here is some helpful SQL for you. You can use this in your language of choice to solve the problem.
Find the rev values and their counts:
SELECT RevID, COUNT(*)
FROM MyTable
GROUP BY MyTable
Update #X rows (with RevID of value #RevID) to a new value #NewValue
UPDATE TOP #X FROM MyTable
SET RevID = #NewValue
WHERE RevID = #RevID
Using these two queries you should be able to apply your business rules (which you never specified) in a loop or whatever to change the data.

SQL multiplying rows in select

I would like to select some rows multiple-times, depending on the column's value.
Source table
Article | Count
===============
A | 1
B | 4
C | 2
Wanted result
Article
===============
A
B
B
B
B
C
C
Any hints or samples, please?
You could use:
SELECT m.Article
FROM mytable m
CROSS APPLY (VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) AS s(n)
WHERE s.n <= m.[Count];
LiveDemo
Note: CROSS APLLY with any tally table. Here values up to 10.
Related: What is the best way to create and populate a numbers table?
You could also use a recursive CTE which works with numbers > 10 (here up to 1000):
With NumberSequence( Number ) as
(
Select 0 as Number
union all
Select Number + 1
from NumberSequence
where Number BETWEEN 0 AND 1000
)
SELECT Article
FROM ArticleCounts
CROSS APPLY NumberSequence
WHERE Number BETWEEN 1 AND [Count]
ORDER BY Article
Option (MaxRecursion 0)
Demo
A number-table will certainly be the best option.
http://sqlperformance.com/2013/01/t-sql-queries/generate-a-set-2
Please check following SQL script
Before executing the SELECT statement, note that I used a user function which is used to simulate a numbers table
You can find the sql codes of numbers table in SQL Server at referred tutorial
----create table myTempTbl (Article varchar(10), Count int)
--insert into myTempTbl select 'A',1
--insert into myTempTbl select 'B',4
--insert into myTempTbl select 'C',2
select t.*
from myTempTbl t
cross apply dbo.NumbersTable(1,100,1) n
where n.i <= t.Count
order by t.Article
one more CTE
with cte_t as (
select c as c, 1 as i
from mytable
group by c
union all
select t.c, ctet.i + 1
from mytable t
join cte_t ctet
on ctet.c = t.c
and ctet.i < t.i
)
select cte_t.c
from cte_t
order by cte_t.c
Can obtain the output using simple WHILE LOOP
DECLARE #table TABLE
(ID int ,Article varchar(5),[Count] int)
INSERT INTO #table
(ID,Article,Count)
VALUES
(1,'A',1),(2,'B',4),(3,'C',2)
DECLARE #temp TABLE
(Article varchar(5))
DECLARE #Cnt1 INT
DECLARE #Cnt2 INT
DECLARE #Check INT
DECLARE #max INT
SET #max =0
SET #Cnt1 = (SELECT Count(Article) FROM #table)
WHILE (#max < #Cnt1)
BEGIN
SET #max = #max +1
SET #Cnt2 = (SELECT [Count] FROM #table WHERE ID =#max)
SET #Check =(SELECT [Count] FROM #table WHERE ID =#max)
WHILE (#Cnt2 > 0)
BEGIN
INSERT INTO #temp
SELECT Article FROM #table WHERE [Count] =#Check
SET #Cnt2 = #Cnt2 -1
END
END
SELECT * FROM #temp

How to tune the following query?

This query gives me the desired result but i can't run this query every time.The 2 loops is costing me.So i need to implement something like view.But the logic has temp tables involved which isn't allowed in views as well.so, is there any other way to store this result or change the query so that it will cost me less.
DECLARE #Temp TABLE (
[SiteID] VARCHAR(100)
,[StructureID] INT
,[row] DECIMAL(4, 2)
,[col] DECIMAL(4, 2)
)
DECLARE #siteID VARCHAR(100)
,#structureID INT
,#struct_row INT
,#struct_col INT
,#rows_count INT
,#cols_count INT
,#row INT
,#col INT
DECLARE structure_cursor CURSOR
FOR
SELECT StructureID
,SiteID
,Cols / 8.5 AS Cols
,Rows / 11 AS Rows
FROM Structure
WHERE SellerID = 658 --AND StructureID = 55
OPEN structure_cursor
FETCH NEXT
FROM structure_cursor
INTO #structureID
,#siteID
,#struct_col
,#struct_row
SELECT #rows_count = 1
,#cols_count = 1
,#row = 1
,#col = 1
WHILE ##FETCH_STATUS = 0
BEGIN
WHILE #row <= #struct_row
BEGIN
WHILE #col <= #struct_col
BEGIN
--PRINT 'MEssage';
INSERT INTO #Temp (
SiteID
,StructureID
,row
,col
)
VALUES (
#siteID
,#structureID
,#rows_count
,#cols_count
)
SET #cols_count = #cols_count + 1;
SET #col = #col + 1;
END
SET #cols_count = 1;
SET #col = 1;
SET #rows_count = #rows_count + 1;
SET #row = #row + 1;
END
SET #row = 1;
SET #col = 1;
SET #rows_count = 1;
FETCH NEXT
FROM structure_cursor
INTO #structureID
,#siteID
,#struct_col
,#struct_row
END
CLOSE structure_cursor;
DEALLOCATE structure_cursor;
SELECT * FROM #Temp
Do this with a set-based operation. I think you just want insert . . . select:
INSERT INTO #Temp (SiteID, StructureID, row, col)
SELECT StructureID, SiteID, Cols / 8.5 AS Cols, Rows / 11 AS Rows
FROM Structure
WHERE SellerID = 658;
You should avoid cursors, unless you really need them for some reason (such as calling a stored procedure or using dynamic SQL on each row).
EDIT:
Reading the logic, it looks like you want to insert rows for based on the limits in each row. You still don't want to use a cursor. For that, you need a number generator and master..spt_values is a convenient one, if it has enough rows. So:
with n as (
select row_number() over (order by (select null)) as n
from master..spt_values
)
INSERT INTO #Temp (SiteID, StructureID, row, col)
SELECT StructureID, SiteID, ncol.n / 8.5 AS Cols, nrow.n / 11 AS Rows
FROM Structure s JOIN
n ncol
ON ncol.n <= s.struct_col CROSS JOIN
n nrow
ON nrow <= s.struct_row
WHERE SellerID = 658;
You can generate the number of rows and columns and then CROSS APPLY with those, like below. I've left out your SellerID condition.
;WITH Cols
AS
(
SELECT StructureID, SiteID, CAST(Cols / 8.5 AS INT) AS Col
FROM Structure
UNION ALL
SELECT s.StructureID, s.SiteID, Col - 1
FROM Structure s
INNER JOIN Cols c ON s.StructureID = c.StructureID AND s.SiteID = c.SiteID
WHERE Col > 1
)
, Rows
AS
(
SELECT StructureID, SiteID, CAST(Rows / 11 AS INT) AS Row
FROM Structure
UNION ALL
SELECT s.StructureID, s.SiteID, Row - 1
FROM Structure s
INNER JOIN Rows r ON s.StructureID = r.StructureID AND s.SiteID = r.SiteID
WHERE Row > 1
)
--INSERT INTO #Temp (SiteID, StructureID, row, col)
SELECT s.SiteID, s.StructureID, r.Row, c.Col
FROM Structure s
CROSS APPLY Cols c
CROSS APPLY Rows r
WHERE s.StructureID = c.StructureID AND s.SiteID = c.SiteID
AND s.StructureID = r.StructureID AND s.SiteID = r.SiteID
We can do this by using CROSS APPLY and CTE.
CREATE TABLE Structure(SiteID varchar(20), StructureID int,
Cols decimal(18,2), [Rows] decimal(18,2))
INSERT INTO Structure (SiteID, StructureID, Cols, [Rows])
VALUES
('MN353970', 51,17,22),
('MN272252', 52,17,11)
;WITH RowCTE([Rows]) AS
(
SELECT 1
UNION ALL
SELECT 2
),
ColCTE(Cols) AS
(
SELECT 1
UNION ALL
SELECT 2
)
SELECT SiteID, StructureID, R.Rows, C.Cols
FROM Structure s
CROSS APPLY
(
SELECT Cols FROM ColCTE
) C
CROSS APPLY
(
SELECT [Rows] FROM RowCTE
) R
Sql Fiddle Demo

Select non-existing rows

Let say I have a table:
ColumnA ColumnB
---------------------------------
1 10.75
4 1234.30
6 2000.99
How can I write a SELECT query that will result in the following:
ColumnA ColumnB
---------------------------------
1 10.75
2 0.00
3 0.00
4 1234.30
5 0.00
6 2000.99
You can use a CTE to create a list of numbers from 1 to the maximum value in your table:
; with numbers as
(
select max(ColumnA) as nr
from YourTable
union all
select nr - 1
from numbers
where nr > 1
)
select nr.nr as ColumnA
, yt.ColumnB
from numbers nr
left join
YourTable yt
on nr.nr = yt.ColumnA
order by
nr.nr
option (maxrecursion 0)
See it working at SQL Fiddle.
Please try:
declare #min int, #max int
select #min=MIN(ColumnA), #max=MAX(ColumnA) from tbl
select
distinct number ColumnA,
isnull(b.ColumnB, 0) ColumnB
from
master.dbo.spt_values a left join tbl b on a.number=b.ColumnA
where number between #min and #max
Create a TallyTable (or NumbersTable) - see this question: What is the best way to create and populate a numbers table?
With that table create an insert statement:
INSERT INTO YourTable (ColumnA, ColumnB)
SELECT Number FROM NumberTable
WHERE
NOT EXISTS (SELECT 1 FROM YourTable WHERE NumberTable.Number = YourTable.ColumnA)
-- Adjust this value or calculate it with a query to the maximum of the source table
AND NumberTable.Number < 230130
DECLARE #t TABLE (ID INT,Val DECIMAL(10,2))
INSERT INTO #t (ID,Val) VALUES (1,10.75)
INSERT INTO #t (ID,Val) VALUES (4,6.75)
INSERT INTO #t (ID,Val) VALUES (7,4.75)
declare #MinNo int
declare #MaxNo int
declare #IncrementStep int
set #MinNo = 1
set #MaxNo = 10
set #IncrementStep = 1
;with C as
(
select #MinNo as Num
union all
select Num + #IncrementStep
from C
where Num < #MaxNo
)
select Num,
CASE WHEN Val IS NOT NULL THEN Val ELSE 0.00 END AS NUMBER
from C
LEFT JOIN #t t
ON t.ID = c.Num
You could use a number-table or following trick to generate a sequence which you can LEFT OUTER JOIN with your table. I assume you want to determine the boundaries dynamically:
WITH Seq AS
(
SELECT TOP ((SELECT Max(ColumnA)FROM Table1) - (SELECT Min(ColumnA) FROM Table1) + 1)
Num = (SELECT Min(ColumnA) FROM Table1)+ Row_number() OVER (ORDER BY [object_id]) -1
FROM sys.all_objects)
SELECT ColumnA = Seq.Num,
ColumnB = COALESCE(t.ColumnB ,0.00)
FROM Seq
LEFT OUTER JOIN Table1 t
ON Seq.Num = t.ColumnA
Demo with your sample.
Worth reading: http://www.sqlperformance.com/2013/01/t-sql-queries/generate-a-set-1
I have my collect of table functions like these.
create function dbo.GetNumbers(#Start int, #End int)
returns #Items table
(
Item int
)
as
begin
while (#Start <= #End)
begin
insert into #Items
values (#Start)
set #Start = #Start + 1
end
return
end
Then I can use it to left join to my data table and every value will be there.
declare #min int, #max int
set #min = 10
set #max = 20
select gn.Item
from dbo.GetNumbers(#min, #max) gn
I have similar table functions for date ranges, times, timezones, etc.

SQL query to find Missing sequence numbers

I have a column named sequence. The data in this column looks like 1, 2, 3, 4, 5, 7, 9, 10, 15.
I need to find the missing sequence numbers from the table. What SQL query will find the missing sequence numbers from my table? I am expecting results like
Missing numbers
---------------
6
8
11
12
13
14
I am using only one table. I tried the query below, but am not getting the results I want.
select de.sequence + 1 as sequence from dataentry as de
left outer join dataentry as de1 on de.sequence + 1 = de1.sequence
where de1.sequence is null order by sequence asc;
How about something like:
select (select isnull(max(val)+1,1) from mydata where val < md.val) as [from],
md.val - 1 as [to]
from mydata md
where md.val != 1 and not exists (
select 1 from mydata md2 where md2.val = md.val - 1)
giving summarised results:
from to
----------- -----------
6 6
8 8
11 14
I know this is a very old post but I wanted to add this solution that I found HERE so that I can find it easier:
WITH Missing (missnum, maxid)
AS
(
SELECT 1 AS missnum, (select max(id) from #TT)
UNION ALL
SELECT missnum + 1, maxid FROM Missing
WHERE missnum < maxid
)
SELECT missnum
FROM Missing
LEFT OUTER JOIN #TT tt on tt.id = Missing.missnum
WHERE tt.id is NULL
OPTION (MAXRECURSION 0);
Try with this:
declare #min int
declare #max int
select #min = min(seq_field), #max = max(seq_field) from [Table]
create table #tmp (Field_No int)
while #min <= #max
begin
if not exists (select * from [Table] where seq_field = #min)
insert into #tmp (Field_No) values (#min)
set #min = #min + 1
end
select * from #tmp
drop table #tmp
The best solutions are those that use a temporary table with the sequence. Assuming you build such a table, LEFT JOIN with NULL check should do the job:
SELECT #sequence.value
FROM #sequence
LEFT JOIN MyTable ON #sequence.value = MyTable.value
WHERE MyTable.value IS NULL
But if you have to repeat this operation often (and more then for 1 sequence in the database), I would create a "static-data" table and have a script to populate it to the MAX(value) of all the tables you need.
SELECT CASE WHEN MAX(column_name) = COUNT(*)
THEN CAST(NULL AS INTEGER)
-- THEN MAX(column_name) + 1 as other option
WHEN MIN(column_name) > 1
THEN 1
WHEN MAX(column_name) <> COUNT(*)
THEN (SELECT MIN(column_name)+1
FROM table_name
WHERE (column_name+ 1)
NOT IN (SELECT column_name FROM table_name))
ELSE NULL END
FROM table_name;
Here is a script to create a stored procedure that returns missing sequential numbers for a given date range.
CREATE PROCEDURE dbo.ddc_RolledBackOrders
-- Add the parameters for the stored procedure here
#StartDate DATETIME ,
#EndDate DATETIME
AS
BEGIN
SET NOCOUNT ON;
DECLARE #Min BIGINT
DECLARE #Max BIGINT
DECLARE #i BIGINT
IF OBJECT_ID('tempdb..#TempTable') IS NOT NULL
BEGIN
DROP TABLE #TempTable
END
CREATE TABLE #TempTable
(
TempOrderNumber BIGINT
)
SELECT #Min = ( SELECT MIN(ordernumber)
FROM dbo.Orders WITH ( NOLOCK )
WHERE OrderDate BETWEEN #StartDate AND #EndDate)
SELECT #Max = ( SELECT MAX(ordernumber)
FROM dbo.Orders WITH ( NOLOCK )
WHERE OrderDate BETWEEN #StartDate AND #EndDate)
SELECT #i = #Min
WHILE #i <= #Max
BEGIN
INSERT INTO #TempTable
SELECT #i
SELECT #i = #i + 1
END
SELECT TempOrderNumber
FROM #TempTable
LEFT JOIN dbo.orders o WITH ( NOLOCK ) ON tempordernumber = o.OrderNumber
WHERE o.OrderNumber IS NULL
END
GO
Aren't all given solutions way too complex?
wouldn't this be much simpler:
SELECT *
FROM (SELECT row_number() over(order by number) as N from master..spt_values) t
where N not in (select 1 as sequence union
select 2 union
select 3 union
select 4 union
select 5 union
select 7 union
select 10 union
select 15
)
This is my interpretation of this issue, placing the contents in a Table variable that I can easily access in the remainder of my script.
DECLARE #IDS TABLE (row int, ID int)
INSERT INTO #IDS
select ROW_NUMBER() OVER (ORDER BY x.[Referred_ID]), x.[Referred_ID] FROM
(SELECT b.[Referred_ID] + 1 [Referred_ID]
FROM [catalog].[dbo].[Referrals] b) as x
LEFT JOIN [catalog].[dbo].[Referrals] a ON x.[Referred_ID] = a.[Referred_ID]
WHERE a.[Referred_ID] IS NULL
select * from #IDS
Just for fun, I decided to post my solution.
I had an identity column in my table and I wanted to find missing invoice numbers.
I reviewed all the examples I could find but they were not elegant enough.
CREATE VIEW EENSkippedInvoicveNo
AS
SELECT CASE WHEN MSCNT = 1 THEN CAST(MSFIRST AS VARCHAR (8)) ELSE
CAST(MSFIRST AS VARCHAR (8)) + ' - ' + CAST(MSlAST AS VARCHAR (8)) END AS MISSING,
MSCNT, INV_DT FROM (
select invNo+1 as Msfirst, inv_no -1 as Mslast, inv_no - invno -1 as msCnt, dbo.fmtdt(Inv_dt) AS INV_dT
from (select inv_no as invNo, a4glidentity + 1 as a4glid
from oehdrhst_sql where inv_dt > 20140401) as s
inner Join oehdrhst_sql as h
on a4glid = a4glidentity
where inv_no - invno <> 1
) AS SS
DECLARE #MaxID INT = (SELECT MAX(timerecordid) FROM dbo.TimeRecord)
SELECT SeqID AS MissingSeqID
FROM (SELECT ROW_NUMBER() OVER (ORDER BY column_id) SeqID from sys.columns) LkUp
LEFT JOIN dbo.TimeRecord t ON t.timeRecordId = LkUp.SeqID
WHERE t.timeRecordId is null and SeqID < #MaxID
I found this answer here:
http://sql-developers.blogspot.com/2012/10/how-to-find-missing-identitysequence.html
I was looking for a solution and found many answers. This is the one I used and it worked very well. I hope this helps anyone looking for a similar answer.
-- This will return better Results
-- ----------------------------------
;With CTERange
As (
select (select isnull(max(ArchiveID)+1,1) from tblArchives where ArchiveID < md.ArchiveID) as [from],
md.ArchiveID - 1 as [to]
from tblArchives md
where md.ArchiveID != 1 and not exists (
select 1 from tblArchives md2 where md2.ArchiveID = md.ArchiveID - 1)
) SELECT [from], [to], ([to]-[from])+1 [total missing]
From CTERange
ORDER BY ([to]-[from])+1 DESC;
from to total missing
------- ------- --------------
6 6 1
8 8 1
11 14 4
DECLARE #TempSujith TABLE
(MissingId int)
Declare #Id Int
DECLARE #mycur CURSOR
SET #mycur = CURSOR FOR Select Id From tbl_Table
OPEN #mycur
FETCH NEXT FROM #mycur INTO #Id
Declare #index int
Set #index = 1
WHILE ##FETCH_STATUS = 0
BEGIN
if (#index < #Id)
begin
while #index < #Id
begin
insert into #TempSujith values (#index)
set #index = #index + 1
end
end
set #index = #index + 1
FETCH NEXT FROM #mycur INTO #Id
END
Select Id from tbl_Table
select MissingId from #TempSujith
Create a useful Tally table:
-- can go up to 4 million or 2^22
select top 100000 identity(int, 1, 1) Id
into Tally
from master..spt_values
cross join master..spt_values
Index it, or make that single column as PK.
Then use EXCEPT to get your missing number.
select Id from Tally where Id <= (select max(Id) from TestTable)
except
select Id from TestTable
You could also solve using something like a CTE to generate the full sequence:
create table #tmp(sequence int)
insert into #tmp(sequence) values (1)
insert into #tmp(sequence) values (2)
insert into #tmp(sequence) values (3)
insert into #tmp(sequence) values (5)
insert into #tmp(sequence) values (6)
insert into #tmp(sequence) values (8)
insert into #tmp(sequence) values (10)
insert into #tmp(sequence) values (11)
insert into #tmp(sequence) values (14)
DECLARE #max INT
SELECT #max = max(sequence) from #tmp;
with full_sequence
(
Sequence
)
as
(
SELECT 1 Sequence
UNION ALL
SELECT Sequence + 1
FROM full_sequence
WHERE Sequence < #max
)
SELECT
full_sequence.sequence
FROM
full_sequence
LEFT JOIN
#tmp
ON
full_sequence.sequence = #tmp.sequence
WHERE
#tmp.sequence IS NULL
Hmmmm - the formatting is not working on here for some reason? Can anyone see the problem?
i had made a proc so you can send the table name and the key and the result is a list of missing numbers from the given table
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
create PROCEDURE [dbo].[action_FindMissing_Autoincremnt]
(
#tblname as nvarchar(50),
#tblKey as nvarchar(50)
)
AS
BEGIN
SET NOCOUNT ON;
declare #qry nvarchar(4000)
set #qry = 'declare #min int '
set #qry = #qry + 'declare #max int '
set #qry = #qry +'select #min = min(' + #tblKey + ')'
set #qry = #qry + ', #max = max('+ #tblKey +') '
set #qry = #qry + ' from '+ #tblname
set #qry = #qry + ' create table #tmp (Field_No int)
while #min <= #max
begin
if not exists (select * from '+ #tblname +' where '+ #tblKey +' = #min)
insert into #tmp (Field_No) values (#min)
set #min = #min + 1
end
select * from #tmp order by Field_No
drop table #tmp '
exec sp_executesql #qry
END
GO
SELECT TOP 1 (Id + 1)
FROM CustomerNumberGenerator
WHERE (Id + 1) NOT IN ( SELECT Id FROM CustomerNumberGenerator )
Working on a customer number generator for my company. Not the most efficient but definitely most readable
The table has one Id column.
The table allows for Ids to be inserted at manually by a user off sequence.
The solution solves the case where the user decided to pick a high number