Fastest way to insert 100000 records into SQL Server - sql

I am using the following script to insert 100,000 records into a table. Basically int from 500,001 to 600,000 are inserted. I am casting the integer into a string and inserting coz thats how i want it in the table (an integer in the form of string). I am using a merge to check if the record already exists or not.
DECLARE #first AS INT
SET #first = 500001
DECLARE #step AS INT
SET #step = 1
DECLARE #last AS INT
SET #last = 600000
BEGIN TRANSACTION
WHILE(#first <= #last)
BEGIN
MERGE dbo.Identifiers As target
USING (SELECT CAST(#first as varchar(10)) AS Identifier) AS source
ON (source.Identifier = target.Identifier)
WHEN NOT MATCHED THEN
INSERT (Identifier)
VALUES (source.Identifier);
SET #first += #step
END
COMMIT TRANSACTION
Its taking more than 2 minutes to load. I am doing something terribly wrong but unable to trace out where.
Note: The table has unique non-clustered index on Identifier Column.

I am wondering how much your procedural looping and the MERGE (instead of a simple INSERT) contributes to bad performance. I would opt for a strictly set-based solution like this:
INSERT INTO dbo.Identifiers (Identifier)
SELECT n FROM dbo.GetNums(500001, 600000)
WHERE n NOT IN (SELECT Identifier FROM dbo.Identifiers);
Now, this relies on a user-defined table-valued function dbo.GetNums that returns a table containing all numbers between 500,001 and 600,000 in a column called n. How do you write that function? You need to generate a range of numbers on the fly inside it.
The following implementation is taken from the book "Microsoft SQL Server 2012 High-Performance T-SQL Using Window Functions" by Itzik Ben-Gak.
CREATE FUNCTION dbo.GetNums(#low AS BIGINT, #high AS BIGINT) RETURNS TABLE
AS
RETURN
WITH L0 AS (SELECT c FROM (VALUES(1),(1)) AS D(c)),
L1 AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS rownum FROM L5)
SELECT #low + rownum - 1 AS n
FROM Nums
ORDER BY rownum
OFFSET 0 ROWS FETCH FIRST #high - #low + 1 ROWS ONLY;
(Since this comes from a book on SQL Server 2012, it might not work on SQL Server 2008 out-of-the-box, but it should be possible to adapt.)

Try this one. It uses a tally table. Reference: http://www.sqlservercentral.com/articles/T-SQL/62867/
create table #temp_table(
N int
)
declare #first as int
set #first = 500001
declare #step as int
set #step = 1
declare #last as int
set #last = 600000
with
e1 as(select 1 as N union all select 1), --2 rows
e2 as(select 1 as N from e1 as a, e1 as b), --4 rows
e3 as(select 1 as N from e2 as a, e2 as b), --16 rows
e4 as(select 1 as N from e3 as a, e3 as b), --256 rows
e5 as(select 1 as N from e4 as a, e4 as b), --65,356 rows
e6 as(select 1 as N from e5 as a, e1 as b), -- 131,072 rows
tally as (select 500000 + (row_number() over(order by N) * #step) as N from e6) -- change 500000 with desired start
insert into #temp_table
select cast(N as varchar(10))
from tally t
where
N >= #first
and N <=#last
and not exists(
select 1 from #temp_table where N = t.N
)
drop table #temp_table

Vinoth, Something given below could also help you.
Declare #tab table (id int identity(1,1),num int)
Insert into #tab (num) Values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
Declare #start as int
set #start = 500000
Insert into dbo.Identifiers (Identifier)
Select #start + ((E.id-1)*10000) +((D.id-1)*1000) +((C.id-1)*100) + ((B.id-1) * 10) + A.id
from #tab A,#tab B,#tab C,#tab D,#tab E
Order by #start + ((E.id-1)*10000) +((D.id-1)*1000) +((C.id-1)*100) + ((B.id-1) * 10) + A.id
In my DB, dbo.Identifiers is a table without any index. It took only 230 ms for the insertion.

Create index on the Identifier column and then try the above insert

Related

Is this a good algorithm for listing prime numbers?

DECLARE #c int = 1000;
DECLARE #numbers TABLE (n int NOT NULL PRIMARY KEY);
DECLARE #products TABLE (p int NOT NULL PRIMARY KEY);
DECLARE #primes TABLE (p int NOT NULL PRIMARY KEY);
-- The 'composite exclusion' approach
-- 1. list all n = 2, 3, 4, ... c
WITH numbers AS
(
SELECT 2 AS n
UNION ALL
SELECT n + 1 FROM numbers
WHERE n <= #c - 1
)
INSERT INTO #numbers SELECT n FROM numbers OPTION(MAXRECURSION 0);
-- 2. find all products n x n <= c
WITH products AS
(
SELECT DISTINCT m.n * n.n AS p
FROM #numbers m LEFT OUTER JOIN
#numbers n ON 1 = 1
WHERE m.n * n.n <= #c
)
INSERT INTO #products SELECT p FROM products;
-- 3. numbers with no matching products are not composite, i.e, they're prime numbers.
INSERT INTO #primes
SELECT n.n FROM #numbers n LEFT JOIN #products p ON n.n = p.p WHERE p.p IS NULL;
It's kind of a one pass Sieve of Eratosthenes approach.
I've seen loops, stored procedures and the like, as well as pseudo-code and other language implementations, but it seems to me that this simple, set-based approach stemming from the definition of prime numbers should suffice.
Please bear in mind I'm not concerned with performance or memory consumption or optimizations at this time, and I have not tested it with larger numbers. I just want to publish the algorithm and have people confirm (or challenge) that excluding composite numbers from the list is enough.
Recursive CTEs (rCTE) are very rarely the best performing solution. Below is an approach that uses a tally table, it's a slightly tweaked version of the approach that Hugo Kornelis posted here: https://sqlserverfast.com/blog/hugo/2006/09/the-prime-number-challenge-great-waste-of-time/
Let's compare the tally table solution to the rCTE solution:
SET STATISTICS TIME ON;
PRINT 'tally table approach'+char(13)+char(10)+replicate('-',50);
DECLARE #primes TABLE (p int NOT NULL PRIMARY KEY);
DECLARE #limit bigint = 10000;
WITH E(x) AS (SELECT * FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(x)),
iTally(N) AS (SELECT TOP(#limit) ROW_NUMBER() OVER (ORDER BY (SELECT 1)) FROM E a, E b, E c, E d, E f)
INSERT #primes
SELECT n1.N
FROM itally AS n1
WHERE n1.N > 1
AND n1.N < #Limit
AND NOT EXISTS
(SELECT *
FROM itally AS n2
WHERE n2.N < #limit
AND n2.N BETWEEN 2 AND n1.N-1
AND n1.n % n2.N = 0)
--ORDER BY N
GO
PRINT 'rCTE approach'+char(13)+char(10)+replicate('-',50);
DECLARE #c int = 10000;
DECLARE #numbers TABLE (n int NOT NULL PRIMARY KEY);
DECLARE #products TABLE (p int NOT NULL PRIMARY KEY);
DECLARE #primes TABLE (p int NOT NULL PRIMARY KEY);
WITH numbers AS
(
SELECT 2 AS n
UNION ALL
SELECT n + 1 FROM numbers
WHERE n <= #c - 1
)
INSERT INTO #numbers SELECT n FROM numbers OPTION(MAXRECURSION 0);
-- 2. find all products n x n <= c
WITH products AS
(
SELECT DISTINCT m.n * n.n AS p
FROM #numbers m LEFT OUTER JOIN
#numbers n ON 1 = 1
WHERE m.n * n.n <= #c
)
INSERT INTO #products SELECT p FROM products;
-- 3. numbers with no matching products are not composite, i.e, they're prime numbers.
INSERT INTO #primes
SELECT n.n FROM #numbers n LEFT JOIN #products p ON n.n = p.p WHERE p.p IS NULL;
SET STATISTICS TIME OFF;
and the results:
tally table approach
--------------------------------------------------
SQL Server Execution Times:
CPU time = 3042 ms, elapsed time = 3241 ms.
SQL Server parse and compile time:
CPU time = 0 ms, elapsed time = 10 ms.
rCTE approach
--------------------------------------------------
SQL Server Execution Times:
CPU time = 14976 ms, elapsed time = 15757 ms.
As you can see, the tally table approach against 10,000 was 5 times faster and also doesn't produce any reads (the rCTE produces a ton!)
If you are really working with prime numbers the absolute fastest approach would be to store them in a table so you don't need to calculate them each time you need prime numbers.

Get the missing numbers for each section

I want to get the missing protocol Numbers from this list for each section
I have my list
ProtocolNumber Section
--------------------------------
14A1000014 | A1
14A1000015 | A1
14A1000018 | A1
14A1000019 | A1
14A2000014 | A2
14A2000015 | A2
14A2000019 | A2
I try this
SELECT lb1.ProtocolNumber, lb1.Section FROM #tmp lb1
WHERE not exists ( SELECT * FROM #tmp lb2
WHERE lb2.ProtocolNumber = lb1.ProtocolNumber + 1 and lb2.Section = lb1.Section)
The output should be like this
ProtocolNumber Section
--------------------------------
14A1000016 | A1
14A1000017 | A1
14A2000016 | A2
14A2000017 | A2
14A2000018 | A2
With the assumption that you are trying to generate a list of missing protocol numbers between the minimum and maximum range currently existing for that section, I'd suggest the following:
/*Sample Data*/
CREATE TABLE #tmp (ProtocolNumber VARCHAR(20), Section VARCHAR(2))
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000014', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000015', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000018', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000019', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A2000014', 'A2'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A2000015', 'A2'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A2000019', 'A2'
/*CTEs to generate numbers list: 1 through 1,000,000*/
;WITH
E1(N) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) s(N)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
E5(N) AS (SELECT 1 FROM E4 a, E2 b), --1,000,000 rows max
cteTally(N) AS ( SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E5 ),
/*CTE to identify ranges of current numbers for each Section*/
Ranges AS
(
SELECT
Section,
MIN(CAST(SUBSTRING(ProtocolNumber, 5,6) AS INT)) MinNumber,
MAX(CAST(SUBSTRING(ProtocolNumber, 5,6) AS INT)) MaxNumber
FROM
#tmp
GROUP BY Section
),
/*CTE to generate full list of available protocols for each Section*/
ProtocolList AS
(
SELECT DISTINCT
Section,
'14' + Section + RIGHT('00000' + CAST(N AS VARCHAR(6)),6) AS ProtocolNumber
FROM Ranges
INNER JOIN
cteTally ON
cteTally.N >= Ranges.MinNumber AND
cteTally.N <= Ranges.MaxNumber
)
/*Final SELECT - protocols in the master list that do not exist for those sections in the temp table*/
SELECT l.ProtocolNumber, l.Section
FROM
ProtocolList l
LEFT JOIN
#tmp t ON
l.ProtocolNumber = t.ProtocolNumber
WHERE t.ProtocolNumber IS NULL
ORDER BY
l.Section,
l.ProtocolNumber
DROP TABLE #tmp
Does it have to be a one line select statment?
How about writing a stored procedure or Table-Valued function that using cursor iterates through your table and creates missing records, which the function returns
Implemenation would also need to check for max protocol number for each give section so that you only create records with in range
LIST OF MISSING PROTOCOL
DECLARE #P INT=(SELECT COUNT(DISTINCT SECTION) FROM PROTOCOL) --NUMBER OF SECTION
DECLARE #w varchar(10)='A1' --HOLD TYPE OF SECTION
WHILE #P>0
BEGIN
DECLARE #q table(numx int) --HOLD MAX TO MIN ProtocolNumber
declare #i table(num int) --HOLD EXISTS 'ProtocolNumber'
INSERT INTO #i
select convert(int,right( ProtocolNumber,2)) FROM protocol WHERE Section=#w
DECLARE #x int=(select max(convert(int,right( ProtocolNumber,2))) FROM protocol WHERE Section=#w)
DECLARE #y int=(select min(convert(int,right( ProtocolNumber,2))) FROM protocol WHERE Section=#w)
WHILE #y <= #x
begin
INSERT INTO #q (numx)
VALUES(#y)
SET #y= #y + 1
END
SELECT ('14'+#w+'0000'+ convert(varchar(10),numx)) AS ProtocolNumber,#W AS Section
FROM #Q
where numx NOT IN (select * FROM #i)
SET #P=#P-1
SET #W='A2'
DELETE FROM #Q
DELETE FROM #I
END

How to generate a range of numbers between two numbers?

I have two numbers as input from the user, like for example 1000 and 1050.
How do I generate the numbers between these two numbers, using a sql query, in seperate rows? I want this:
1000
1001
1002
1003
.
.
1050
Select non-persisted values with the VALUES keyword. Then use JOINs to generate lots and lots of combinations (can be extended to create hundreds of thousands of rows and beyond).
Short and fast version (not that easy to read):
WITH x AS (SELECT n FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) v(n))
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM x ones, x tens, x hundreds, x thousands
ORDER BY 1
Demo
More verbose version:
SELECT ones.n + 10*tens.n + 100*hundreds.n + 1000*thousands.n
FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) thousands(n)
ORDER BY 1
Demo
Both versions can easily be extended with a WHERE clause, limiting the output of numbers to a user-specified range. If you want to reuse it, you can define a table-valued function for it.
an alternative solution is recursive CTE:
DECLARE #startnum INT=1000
DECLARE #endnum INT=1050
;
WITH gen AS (
SELECT #startnum AS num
UNION ALL
SELECT num+1 FROM gen WHERE num+1<=#endnum
)
SELECT * FROM gen
option (maxrecursion 10000)
SELECT DISTINCT n = number
FROM master..[spt_values]
WHERE number BETWEEN #start AND #end
Demo
Note that this table has a maximum of 2048 because then the numbers have gaps.
Here's a slightly better approach using a system view(since from SQL-Server 2005):
;WITH Nums AS
(
SELECT n = ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_objects
)
SELECT n FROM Nums
WHERE n BETWEEN #start AND #end
ORDER BY n;
Demo
or use a custom a number-table. Credits to Aaron Bertrand, i suggest to read the whole article: Generate a set or sequence without loops
The best option I have used is as follows:
DECLARE #min bigint, #max bigint
SELECT #Min=919859000000 ,#Max=919859999999
SELECT TOP (#Max-#Min+1) #Min-1+row_number() over(order by t1.number) as N
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
I have generated millions of records using this and it works perfect.
I recently wrote this inline table valued function to solve this very problem. It's not limited in range other than memory and storage. It accesses no tables so there's no need for disk reads or writes generally. It adds joins values exponentially on each iteration so it's very fast even for very large ranges. It creates ten million records in five seconds on my server. It also works with negative values.
CREATE FUNCTION [dbo].[fn_ConsecutiveNumbers]
(
#start int,
#end int
) RETURNS TABLE
RETURN
select
x268435456.X
| x16777216.X
| x1048576.X
| x65536.X
| x4096.X
| x256.X
| x16.X
| x1.X
+ #start
X
from
(VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15)) as x1(X)
join
(VALUES (0),(16),(32),(48),(64),(80),(96),(112),(128),(144),(160),(176),(192),(208),(224),(240)) as x16(X)
on x1.X <= #end-#start and x16.X <= #end-#start
join
(VALUES (0),(256),(512),(768),(1024),(1280),(1536),(1792),(2048),(2304),(2560),(2816),(3072),(3328),(3584),(3840)) as x256(X)
on x256.X <= #end-#start
join
(VALUES (0),(4096),(8192),(12288),(16384),(20480),(24576),(28672),(32768),(36864),(40960),(45056),(49152),(53248),(57344),(61440)) as x4096(X)
on x4096.X <= #end-#start
join
(VALUES (0),(65536),(131072),(196608),(262144),(327680),(393216),(458752),(524288),(589824),(655360),(720896),(786432),(851968),(917504),(983040)) as x65536(X)
on x65536.X <= #end-#start
join
(VALUES (0),(1048576),(2097152),(3145728),(4194304),(5242880),(6291456),(7340032),(8388608),(9437184),(10485760),(11534336),(12582912),(13631488),(14680064),(15728640)) as x1048576(X)
on x1048576.X <= #end-#start
join
(VALUES (0),(16777216),(33554432),(50331648),(67108864),(83886080),(100663296),(117440512),(134217728),(150994944),(167772160),(184549376),(201326592),(218103808),(234881024),(251658240)) as x16777216(X)
on x16777216.X <= #end-#start
join
(VALUES (0),(268435456),(536870912),(805306368),(1073741824),(1342177280),(1610612736),(1879048192)) as x268435456(X)
on x268435456.X <= #end-#start
WHERE #end >=
x268435456.X
| isnull(x16777216.X, 0)
| isnull(x1048576.X, 0)
| isnull(x65536.X, 0)
| isnull(x4096.X, 0)
| isnull(x256.X, 0)
| isnull(x16.X, 0)
| isnull(x1.X, 0)
+ #start
GO
SELECT X FROM fn_ConsecutiveNumbers(5, 500);
It's handy for date and time ranges as well:
SELECT DATEADD(day,X, 0) DayX
FROM fn_ConsecutiveNumbers(datediff(day,0,'5/8/2015'), datediff(day,0,'5/31/2015'))
SELECT DATEADD(hour,X, 0) HourX
FROM fn_ConsecutiveNumbers(datediff(hour,0,'5/8/2015'), datediff(hour,0,'5/8/2015 12:00 PM'));
You could use a cross apply join on it to split records based on values in the table. So for example to create a record for every minute on a time range in a table you could do something like:
select TimeRanges.StartTime,
TimeRanges.EndTime,
DATEADD(minute,X, 0) MinuteX
FROM TimeRanges
cross apply fn_ConsecutiveNumbers(datediff(hour,0,TimeRanges.StartTime),
datediff(hour,0,TimeRanges.EndTime)) ConsecutiveNumbers
It work for me !
select top 50 ROW_NUMBER() over(order by a.name) + 1000 as Rcount
from sys.all_objects a
I do it with recursive ctes, but i'm not sure if it is the best way
declare #initial as int = 1000;
declare #final as int =1050;
with cte_n as (
select #initial as contador
union all
select contador+1 from cte_n
where contador <#final
) select * from cte_n option (maxrecursion 0)
saludos.
declare #start int = 1000
declare #end int =1050
;with numcte
AS
(
SELECT #start [SEQUENCE]
UNION all
SELECT [SEQUENCE] + 1 FROM numcte WHERE [SEQUENCE] < #end
)
SELECT * FROM numcte
If you don't have a problem installing a CLR assembly in your server a good option is writing a table valued function in .NET. That way you can use a simple syntax, making it easy to join with other queries and as a bonus won't waste memory because the result is streamed.
Create a project containing the following class:
using System;
using System.Collections;
using System.Data;
using System.Data.Sql;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
namespace YourNamespace
{
public sealed class SequenceGenerator
{
[SqlFunction(FillRowMethodName = "FillRow")]
public static IEnumerable Generate(SqlInt32 start, SqlInt32 end)
{
int _start = start.Value;
int _end = end.Value;
for (int i = _start; i <= _end; i++)
yield return i;
}
public static void FillRow(Object obj, out int i)
{
i = (int)obj;
}
private SequenceGenerator() { }
}
}
Put the assembly somewhere on the server and run:
USE db;
CREATE ASSEMBLY SqlUtil FROM 'c:\path\to\assembly.dll'
WITH permission_set=Safe;
CREATE FUNCTION [Seq](#start int, #end int)
RETURNS TABLE(i int)
AS EXTERNAL NAME [SqlUtil].[YourNamespace.SequenceGenerator].[Generate];
Now you can run:
select * from dbo.seq(1, 1000000)
slartidan's answer can be improved, performance wise, by eliminating all references to the cartesian product and using ROW_NUMBER() instead (execution plan compared):
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n FROM
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x1(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x2(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x3(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x4(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x5(x)
ORDER BY n
Wrap it inside a CTE and add a where clause to select desired numbers:
DECLARE #n1 AS INT = 100;
DECLARE #n2 AS INT = 40099;
WITH numbers AS (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n FROM
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x1(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x2(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x3(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x4(x),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) x5(x)
)
SELECT numbers.n
FROM numbers
WHERE n BETWEEN #n1 and #n2
ORDER BY n
Nothing new but I rewrote Brian Pressler solution to be easier on the eye, it might be useful to someone (even if it's just future me):
alter function [dbo].[fn_GenerateNumbers]
(
#start int,
#end int
) returns table
return
with
b0 as (select n from (values (0),(0x00000001),(0x00000002),(0x00000003),(0x00000004),(0x00000005),(0x00000006),(0x00000007),(0x00000008),(0x00000009),(0x0000000A),(0x0000000B),(0x0000000C),(0x0000000D),(0x0000000E),(0x0000000F)) as b0(n)),
b1 as (select n from (values (0),(0x00000010),(0x00000020),(0x00000030),(0x00000040),(0x00000050),(0x00000060),(0x00000070),(0x00000080),(0x00000090),(0x000000A0),(0x000000B0),(0x000000C0),(0x000000D0),(0x000000E0),(0x000000F0)) as b1(n)),
b2 as (select n from (values (0),(0x00000100),(0x00000200),(0x00000300),(0x00000400),(0x00000500),(0x00000600),(0x00000700),(0x00000800),(0x00000900),(0x00000A00),(0x00000B00),(0x00000C00),(0x00000D00),(0x00000E00),(0x00000F00)) as b2(n)),
b3 as (select n from (values (0),(0x00001000),(0x00002000),(0x00003000),(0x00004000),(0x00005000),(0x00006000),(0x00007000),(0x00008000),(0x00009000),(0x0000A000),(0x0000B000),(0x0000C000),(0x0000D000),(0x0000E000),(0x0000F000)) as b3(n)),
b4 as (select n from (values (0),(0x00010000),(0x00020000),(0x00030000),(0x00040000),(0x00050000),(0x00060000),(0x00070000),(0x00080000),(0x00090000),(0x000A0000),(0x000B0000),(0x000C0000),(0x000D0000),(0x000E0000),(0x000F0000)) as b4(n)),
b5 as (select n from (values (0),(0x00100000),(0x00200000),(0x00300000),(0x00400000),(0x00500000),(0x00600000),(0x00700000),(0x00800000),(0x00900000),(0x00A00000),(0x00B00000),(0x00C00000),(0x00D00000),(0x00E00000),(0x00F00000)) as b5(n)),
b6 as (select n from (values (0),(0x01000000),(0x02000000),(0x03000000),(0x04000000),(0x05000000),(0x06000000),(0x07000000),(0x08000000),(0x09000000),(0x0A000000),(0x0B000000),(0x0C000000),(0x0D000000),(0x0E000000),(0x0F000000)) as b6(n)),
b7 as (select n from (values (0),(0x10000000),(0x20000000),(0x30000000),(0x40000000),(0x50000000),(0x60000000),(0x70000000)) as b7(n))
select s.n
from (
select
b7.n
| b6.n
| b5.n
| b4.n
| b3.n
| b2.n
| b1.n
| b0.n
+ #start
n
from b0
join b1 on b0.n <= #end-#start and b1.n <= #end-#start
join b2 on b2.n <= #end-#start
join b3 on b3.n <= #end-#start
join b4 on b4.n <= #end-#start
join b5 on b5.n <= #end-#start
join b6 on b6.n <= #end-#start
join b7 on b7.n <= #end-#start
) s
where #end >= s.n
GO
2 years later, but I found I had the same issue. Here is how I solved it. (edited to include parameters)
DECLARE #Start INT, #End INT
SET #Start = 1000
SET #End = 1050
SELECT TOP (#End - #Start+1) ROW_NUMBER() OVER (ORDER BY S.[object_id])+(#Start - 1) [Numbers]
FROM sys.all_objects S WITH (NOLOCK)
I know I'm 4 years too late, but I stumbled upon yet another alternative answer to this problem. The issue for speed isn't just pre-filtering, but also preventing sorting. It's possible to force the join-order to execute in a manner that the Cartesian product actually counts up as a result of the join. Using slartidan's answer as a jump-off point:
WITH x AS (SELECT n FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) v(n))
SELECT ones.n + 10*tens.n + 100*hundreds.n + 1000*thousands.n
FROM x ones, x tens, x hundreds, x thousands
ORDER BY 1
If we know the range we want, we can specify it via #Upper and #Lower. By combining the join hint REMOTE along with TOP, we can calculate only the subset of values we want with nothing wasted.
WITH x AS (SELECT n FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) v(n))
SELECT TOP (1+#Upper-#Lower) #Lower + ones.n + 10*tens.n + 100*hundreds.n + 1000*thousands.n
FROM x thousands
INNER REMOTE JOIN x hundreds on 1=1
INNER REMOTE JOIN x tens on 1=1
INNER REMOTE JOIN x ones on 1=1
The join hint REMOTE forces the optimizer to compare on the right side of the join first. By specifying each join as REMOTE from most to least significant value, the join itself will count upwards by one correctly. No need to filter with a WHERE, or sort with an ORDER BY.
If you want to increase the range, you can continue to add additional joins with progressively higher orders of magnitude, so long as they're ordered from most to least significant in the FROM clause.
Note that this is a query specific to SQL Server 2008 or higher.
If your SQL-server version is higher than 2022 or supports GENERATE_SERIES function, we can try to use GENERATE_SERIES function and declare START and STOP parameters.
GENERATE_SERIES returns a single-column table containing a sequence of values in which each differs from the preceding by STEP
declare #start int = 1000
declare #stop int = 1050
declare #step int = 2
SELECT [Value]
FROM GENERATE_SERIES(#start, #stop, #step)
Here are couple quite optimal and compatible solutions:
USE master;
declare #min as int; set #min = 1000;
declare #max as int; set #max = 1050; --null returns all
-- Up to 256 - 2 048 rows depending on SQL Server version
select isnull(#min,0)+number.number as number
FROM dbo.spt_values AS number
WHERE number."type" = 'P' --integers
and ( #max is null --return all
or isnull(#min,0)+number.number <= #max --return up to max
)
order by number
;
-- Up to 65 536 - 4 194 303 rows depending on SQL Server version
select isnull(#min,0)+value1.number+(value2.number*numberCount.numbers) as number
FROM dbo.spt_values AS value1
cross join dbo.spt_values AS value2
cross join ( --get the number of numbers (depends on version)
select sum(1) as numbers
from dbo.spt_values
where spt_values."type" = 'P' --integers
) as numberCount
WHERE value1."type" = 'P' --integers
and value2."type" = 'P' --integers
and ( #max is null --return all
or isnull(#min,0)+value1.number+(value2.number*numberCount.numbers)
<= #max --return up to max
)
order by number
;
recursive CTE in exponential size (even for default of 100 recursion, this can build up to 2^100 numbers):
DECLARE #startnum INT=1000
DECLARE #endnum INT=1050
DECLARE #size INT=#endnum-#startnum+1
;
WITH numrange (num) AS (
SELECT 1 AS num
UNION ALL
SELECT num*2 FROM numrange WHERE num*2<=#size
UNION ALL
SELECT num*2+1 FROM numrange WHERE num*2+1<=#size
)
SELECT num+#startnum-1 FROM numrange order by num
Update for SQL 2017 and later:
If the sequence you desire is < 8k then this will work:
Declare #start_num int = 1000
, #end_num int = 1050
Select [number] = #start_num + ROW_NUMBER() over (order by (Select null))
from string_split(replicate(' ',#end_num-#start_num-1),' ')
This will also do
DECLARE #startNum INT = 1000;
DECLARE #endNum INT = 1050;
INSERT INTO dbo.Numbers
( Num
)
SELECT CASE WHEN MAX(Num) IS NULL THEN #startNum
ELSE MAX(Num) + 1
END AS Num
FROM dbo.Numbers
GO 51
The best speed when run query
DECLARE #num INT = 1000
WHILE(#num<1050)
begin
INSERT INTO [dbo].[Codes]
( Code
)
VALUES (#num)
SET #num = #num + 1
end
I had to insert picture filepath into database using similar method. The query below worked fine:
DECLARE #num INT = 8270058
WHILE(#num<8270284)
begin
INSERT INTO [dbo].[Galleries]
(ImagePath)
VALUES
('~/Content/Galeria/P'+CONVERT(varchar(10), #num)+'.JPG')
SET #num = #num + 1
end
The code for you would be:
DECLARE #num INT = 1000
WHILE(#num<1051)
begin
SELECT #num
SET #num = #num + 1
end
Here's what I came up with:
create or alter function dbo.fn_range(#start int, #end int) returns table
return
with u2(n) as (
select n
from (VALUES (0),(1),(2),(3)) v(n)
),
u8(n) as (
select
x0.n | x1.n * 4 | x2.n * 16 | x3.n * 64 as n
from u2 x0, u2 x1, u2 x2, u2 x3
)
select
#start + s.n as n
from (
select
x0.n | isnull(x1.n, 0) * 256 | isnull(x2.n, 0) * 65536 as n
from u8 x0
left join u8 x1 on #end-#start > 256
left join u8 x2 on #end-#start > 65536
) s
where s.n < #end - #start
Generates up to 2^24 values. Join conditions keep it fast for small values.
This is what I do, it's pretty fast and flexible and not a lot of code.
DECLARE #count  int =   65536;
DECLARE #start  int =   11;
DECLARE #xml    xml =   REPLICATE(CAST('<x/>' AS nvarchar(max)), #count);
; WITH GenerateNumbers(Num) AS
(
    SELECT  ROW_NUMBER() OVER (ORDER BY #count) + #start - 1
    FROM    #xml.nodes('/x') X(T)
)
SELECT  Num
FROM    GenerateNumbers;
Note that (ORDER BY #count) is a dummy. It doesn't do anything but ROW_NUMBER() requires an ORDER BY.
Edit:
I realized that the original question was to get a range from x to y. My script can be modified like this to get a range:
DECLARE #start  int =   5;
DECLARE #end   int =   21;
DECLARE #xml    xml =   REPLICATE(CAST('<x/>' AS nvarchar(max)), #end - #start + 1);
; WITH GenerateNumbers(Num) AS
(
    SELECT  ROW_NUMBER() OVER (ORDER BY #end) + #start - 1
    FROM    #xml.nodes('/x') X(T)
)
SELECT  Num
FROM    GenerateNumbers;
-- Generate Numeric Range
-- Source: http://www.sqlservercentral.com/scripts/Miscellaneous/30397/
CREATE TABLE #NumRange(
n int
)
DECLARE #MinNum int
DECLARE #MaxNum int
DECLARE #I int
SET NOCOUNT ON
SET #I = 0
WHILE #I <= 9 BEGIN
INSERT INTO #NumRange VALUES(#I)
SET #I = #I + 1
END
SET #MinNum = 1
SET #MaxNum = 1000000
SELECT num = a.n +
(b.n * 10) +
(c.n * 100) +
(d.n * 1000) +
(e.n * 10000)
FROM #NumRange a
CROSS JOIN #NumRange b
CROSS JOIN #NumRange c
CROSS JOIN #NumRange d
CROSS JOIN #NumRange e
WHERE a.n +
(b.n * 10) +
(c.n * 100) +
(d.n * 1000) +
(e.n * 10000) BETWEEN #MinNum AND #MaxNum
ORDER BY a.n +
(b.n * 10) +
(c.n * 100) +
(d.n * 1000) +
(e.n * 10000)
DROP TABLE #NumRange
This only works for sequences as long as some application table has rows. Assume I want sequence from 1..100, and have application table dbo.foo with column (of numeric or string type) foo.bar:
select
top 100
row_number() over (order by dbo.foo.bar) as seq
from dbo.foo
Despite its presence in an order by clause, dbo.foo.bar does not have to have distinct or even non-null values.
Of course, SQL Server 2012 has sequence objects, so there's a natural solution in that product.
This completed for me in 36 seconds on our DEV server. Like Brian's answer, focusing on filtering to the range is important from within the query; a BETWEEN still tries to generate all the initial records prior to the lower bound even though it doesn't need them.
declare #s bigint = 10000000
, #e bigint = 20000000
;WITH
Z AS (SELECT 0 z FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15)) T(n)),
Y AS (SELECT 0 z FROM Z a, Z b, Z c, Z d, Z e, Z f, Z g, Z h, Z i, Z j, Z k, Z l, Z m, Z n, Z o, Z p),
N AS (SELECT ROW_NUMBER() OVER (PARTITION BY 0 ORDER BY z) n FROM Y)
SELECT TOP (1+#e-#s) #s + n - 1 FROM N
Note that ROW_NUMBER is a bigint, so we can't go over 2^^64 (==16^^16) generated records with any method that uses it. This query therefore respects the same upper limit on generated values.
This uses procedural code and a table-valued function. Slow, but easy and predictable.
CREATE FUNCTION [dbo].[Sequence] (#start int, #end int)
RETURNS
#Result TABLE(ID int)
AS
begin
declare #i int;
set #i = #start;
while #i <= #end
begin
insert into #result values (#i);
set #i = #i+1;
end
return;
end
Usage:
SELECT * FROM dbo.Sequence (3,7);
ID
3
4
5
6
7
It's a table, so you can use it in joins with other data. I most frequently use this function as the left side of a join against a GROUP BY hour, day etc to ensure a contiguous sequence of time values.
SELECT DateAdd(hh,ID,'2018-06-20 00:00:00') as HoursInTheDay FROM dbo.Sequence (0,23) ;
HoursInTheDay
2018-06-20 00:00:00.000
2018-06-20 01:00:00.000
2018-06-20 02:00:00.000
2018-06-20 03:00:00.000
2018-06-20 04:00:00.000
(...)
Performance is uninspiring (16 seconds for a million rows) but good enough for many purposes.
SELECT count(1) FROM [dbo].[Sequence] (
1000001
,2000000)
GO
Oracle 12c; Quick but limited:
select rownum+1000 from all_objects fetch first 50 rows only;
Note: limited to row count of all_objects view;
The solution I've developed and used for quite some time now (riding some on the shared works of others) is slightly similar to at least one posted. It doesn't reference any tables and returns an unsorted range of up to 1048576 values (2^20) and can include negatives if desired. You can of course sort the result if necessary. It runs pretty quickly, especially on smaller ranges.
Select value from dbo.intRange(-500, 1500) order by value -- returns 2001 values
create function dbo.intRange
(
#Starting as int,
#Ending as int
)
returns table
as
return (
select value
from (
select #Starting +
( bit00.v | bit01.v | bit02.v | bit03.v
| bit04.v | bit05.v | bit06.v | bit07.v
| bit08.v | bit09.v | bit10.v | bit11.v
| bit12.v | bit13.v | bit14.v | bit15.v
| bit16.v | bit17.v | bit18.v | bit19.v
) as value
from (select 0 as v union ALL select 0x00001 as v) as bit00
cross join (select 0 as v union ALL select 0x00002 as v) as bit01
cross join (select 0 as v union ALL select 0x00004 as v) as bit02
cross join (select 0 as v union ALL select 0x00008 as v) as bit03
cross join (select 0 as v union ALL select 0x00010 as v) as bit04
cross join (select 0 as v union ALL select 0x00020 as v) as bit05
cross join (select 0 as v union ALL select 0x00040 as v) as bit06
cross join (select 0 as v union ALL select 0x00080 as v) as bit07
cross join (select 0 as v union ALL select 0x00100 as v) as bit08
cross join (select 0 as v union ALL select 0x00200 as v) as bit09
cross join (select 0 as v union ALL select 0x00400 as v) as bit10
cross join (select 0 as v union ALL select 0x00800 as v) as bit11
cross join (select 0 as v union ALL select 0x01000 as v) as bit12
cross join (select 0 as v union ALL select 0x02000 as v) as bit13
cross join (select 0 as v union ALL select 0x04000 as v) as bit14
cross join (select 0 as v union ALL select 0x08000 as v) as bit15
cross join (select 0 as v union ALL select 0x10000 as v) as bit16
cross join (select 0 as v union ALL select 0x20000 as v) as bit17
cross join (select 0 as v union ALL select 0x40000 as v) as bit18
cross join (select 0 as v union ALL select 0x80000 as v) as bit19
) intList
where #Ending - #Starting < 0x100000
and intList.value between #Starting and #Ending
)
;WITH u AS (
SELECT Unit FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) v(Unit)
),
d AS (
SELECT
(Thousands+Hundreds+Tens+Units) V
FROM
(SELECT Thousands = Unit * 1000 FROM u) Thousands
,(SELECT Hundreds = Unit * 100 FROM u) Hundreds
,(SELECT Tens = Unit * 10 FROM u) Tens
,(SELECT Units = Unit FROM u) Units
WHERE
(Thousands+Hundreds+Tens+Units) <= 10000
)
SELECT * FROM d ORDER BY v
I made the below function after reading this thread. Simple and fast:
go
create function numbers(#begin int, #len int)
returns table as return
with d as (
select 1 v from (values(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d(v)
)
select top (#len) #begin -1 + row_number() over(order by (select null)) v
from d d0
cross join d d1
cross join d d2
cross join d d3
cross join d d4
cross join d d5
cross join d d6
cross join d d7
go
select * from numbers(987654321,500000)

finding the missing values in a Sequence

Table1 is as follows :
Col1
1
2
3
4
6
7
8
9
10
13
14
As shown above the col1 has the sequence of values but for some reason the user did not insert 5, 11 and so on. How to find out the missing values in a sequence. Here the sequence is 1 to 14 and the missing values are 5,11. Please help me.
As was said in other answers, the best choice is to do a join with a real sequence table. You can create one using a recursive CTE:
DECLARE #MaxNumber INT
SELECT #MaxNumber = MAX(Col1) FROM YourTable;
WITH CTE AS
(
SELECT 1 Col1
UNION ALL
SELECT Col1+1
FROM CTE
WHERE Col1+1 <= #MaxNumber
)
SELECT A.Col1
FROM CTE A
LEFT JOIN YourTable B
ON A.Col1 = B.Col1
WHERE B.Col1 IS NULL
OPTION(MAXRECURSION 0)
This will work for numbers 0 - 2000 for large numbers you just need to cross join the original result set.
with temp as (
select distinct number
from master..spt_Values
where number between 0 and 2000
)
select * from
temp t
left join your_table y on y.col1 = t.number
where y.col1 is null
alternatively using cross join
This will work for billions obviously slower
WITH
L0 AS(SELECT 1 AS c UNION ALL SELECT 1),
L1 AS(SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS(SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS(SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS(SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS(SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS(SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS n FROM L5)
select * from
l5 t
left join your_table y on y.col1 = t.n
where y.col1 is null
This seems to pretty much be a duplication of
SQL query to find Missing sequence numbers
There's a suggestions this will work:
SELECT l.id + 1 as start
FROM Table1 as l
LEFT JOIN Table1 as r on l.id + 1 = r.id
WHERE r.id IS NULL
Otherwise you can left join on your table with a sequence table. From the above question, you can look at http://www.projectdmx.com/tsql/tblnumbers.aspx to get some ideas on how to generate a suitable sequence table, and the join will be something like
SELECT #sequence.value
FROM #sequence
LEFT JOIN Table1 ON #sequence.value = Table1.value
WHERE Table1.value IS NULL
Side-note to all recursive CTE suggestions. The recursive CTE increases time linear to the number of rows. Using a tally table or cross-join is much better to use...
This would work:
-- data table
CREATE TABLE #data (
value INT
)
INSERT #data VALUES (1)
INSERT #data VALUES (2)
INSERT #data VALUES (3)
INSERT #data VALUES (4)
INSERT #data VALUES (6)
INSERT #data VALUES (7)
INSERT #data VALUES (8)
INSERT #data VALUES (9)
INSERT #data VALUES (10)
INSERT #data VALUES (13)
INSERT #data VALUES (14)
-- normally i have a tally table already for stuff like this but I'll
-- create one temporary here.
CREATE TABLE #tmp_tally (
n INT
)
DECLARE #n INT
SET #n = 1
WHILE #n < 14
BEGIN
INSERT #tmp_tally VALUES (#n)
SET #n = #n + 1
END
SELECT
T.n,
CASE WHEN #data.value IS NULL THEN 'Missing' ELSE 'Not Missing' END
FROM
#tmp_tally T
LEFT JOIN #data ON
T.n = #data.value
WHERE
T.n <= (SELECT MAX(value) FROM #data) -- max of what you want to check against which is 14 in your example
DROP TABLE #data
DROP TABLE #tmp_tally
Try this:
declare #min int
declare #max int
select #min = min(field_ID), #max = max(field_ID) from [Table]
create table #tmp (Field_No int)
while #min <= #max
begin
if not exists (select * from [Table] where field_ID = #min)
insert into #tmp (seq_field) values (#min)
set #min = #min + 1
end
select * from #tmp
drop table #tmp
With the above script you will get missing values in "ID" column from #tmp table.
Hope this will help you!!
I would do a subquery in the same table, to see if another number exist for the current number-1, and if there is not one, you know that a number was skipped. You can do the +1 of this as well.
select
nt.numb,
CASE
(select COUNT(*) from table where numb=nt.numb-1)=0 THEN 'skipped' ELSE 'not skipped'
from
numbertable nt

Create dummy data with WHILE

I try to insert some dummy data inside my table using a WHILE, but it run really really slow.
I was thinking maybe I am writing not properly the code, could yo please have a look and confirm it?
-- Insert dummy data
DECLARE
#i int,
#Content int;
SET #i = 5001;
WHILE #i > 5000 AND #i < 10000
BEGIN
SET #Content = ROUND(((10000-5000)*RAND()+5000),0)
INSERT INTO dbo.CmsImagesContents
(ContentId, Title, AltTag, Caption)
VALUES
(#Content,'Test Title', 'Test AltTag', 'Test Caption');
SET #i = #i + 1;
END
Rather than doing 4999 separate insert statements in a loop, you'll get much better performance if you do a single insert of all 4999 rows. So, if you have a table #T containing 4999 rows you would simply call the following:
INSERT INTO DBO.CmsImagesContents(ContentId, Title, AltTag, Caption)
SELECT (ABS(CAST(CAST(NEWID() AS VARBINARY) AS INT)) % 5000) + 5000 AS ContentID,
'Test Title' AS Title, 'Test AltTag' AS AltTag, 'Test Caption' AS Caption
FROM #T1
If you need to create such a table of 4999 rows in the first place then the following SQL would work for you:
CREATE TABLE #T1
(
N INT NOT NULL PRIMARY key
);
WITH L0 AS (SELECT 1 AS N UNION ALL SELECT 1),
L1 AS (SELECT A.N FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT A.N FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT A.N FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT A.N FROM L3 AS A CROSS JOIN L3 AS B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n FROM L4)
INSERT INTO #T1( N )
SELECT N
FROM Nums
WHERE n < 10000 AND n>5000;