Create dummy data with WHILE

Create dummy data with WHILE - sql

I try to insert some dummy data inside my table using a WHILE, but it run really really slow.
I was thinking maybe I am writing not properly the code, could yo please have a look and confirm it?
-- Insert dummy data
DECLARE
#i int,
#Content int;
SET #i = 5001;
WHILE #i > 5000 AND #i < 10000
BEGIN
SET #Content = ROUND(((10000-5000)*RAND()+5000),0)
INSERT INTO dbo.CmsImagesContents
(ContentId, Title, AltTag, Caption)
VALUES
(#Content,'Test Title', 'Test AltTag', 'Test Caption');
SET #i = #i + 1;
END

Rather than doing 4999 separate insert statements in a loop, you'll get much better performance if you do a single insert of all 4999 rows. So, if you have a table #T containing 4999 rows you would simply call the following:
INSERT INTO DBO.CmsImagesContents(ContentId, Title, AltTag, Caption)
SELECT (ABS(CAST(CAST(NEWID() AS VARBINARY) AS INT)) % 5000) + 5000 AS ContentID,
'Test Title' AS Title, 'Test AltTag' AS AltTag, 'Test Caption' AS Caption
FROM #T1
If you need to create such a table of 4999 rows in the first place then the following SQL would work for you:
CREATE TABLE #T1
(
N INT NOT NULL PRIMARY key
);
WITH L0 AS (SELECT 1 AS N UNION ALL SELECT 1),
L1 AS (SELECT A.N FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT A.N FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT A.N FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT A.N FROM L3 AS A CROSS JOIN L3 AS B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n FROM L4)
INSERT INTO #T1( N )
SELECT N
FROM Nums
WHERE n < 10000 AND n>5000;

Related

Workaround for temporary Table in view SQL Server

I have the following problem: I want to create a view that contains among others a table, whose column 'verylongtext' should be split in strings that mustn't exceed 50 characters.
this result set should be joined in the view.
the temporary table would be created the following way:
create table #results(id int, string varchar(400))
declare #results table(id int, string varchar(400))
declare #id int
declare #strings varchar(400)
set #id = 0
while exists (select * from roottable where row_id > #id)
begin
select top 1 #id = row_id, #strings = verylongtext from roottable
where row_id > #id
order by row_id asc
insert into #results
select #id, data from dbo.Split( [dbo].[DelineateEachNth](#strings, 50, '$'), '$')
end
The problem is of course, that no temporary tables are allowed in views. CTEs don't seem to work with the resultset of the function. Is there any possible other way? I am absolutely clueless. Thanks in advance!!

You can call your split function directly on the base table using APPLY, meaning no temp table or loops are required:
SELECT r.id, s.data
FROM RootTable AS r
CROSS APPLY dbo.Split(dbo.DelineateEachNth(r.VeryLongText, 50, '$'), '$') AS s;
You may find the scalar function dbo.DelineateEachNth is a performance killer (as all scalar UDFs are), as such an alternative way to split the string is to use a tally table:
CREATE FUNCTION dbo.FixedLengthSplit
(
#String NVARCHAR(MAX),
#Length INT
)
RETURNS TABLE
WITH SCHEMABINDING AS
RETURN
( WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1), (1)) n (N)),
N2(N) AS (SELECT 1 FROM N1 a CROSS JOIN N1 b),
N3(N) AS (SELECT 1 FROM N2 a CROSS JOIN N2 b),
N4(N) AS (SELECT 1 FROM N3 a CROSS JOIN N3 b),
Numbers (N) AS
( SELECT TOP (CONVERT(INT, CEILING(1.0 * ISNULL(LEN(#String),1) / #Length)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1
FROM n4
)
SELECT ItemNumber = N + 1,
Data = SUBSTRING(#String, 1 + (#Length * N), #Length)
FROM Numbers
);
Then your view is just:
SELECT *
FROM rootTable AS r
CROSS APPLY dbo.FixedLengthSplit(r.VeryLongString, 50) AS s;

There is a way to generate a list of GUID's using NEWID function?

I need to create a list of GUID's in SQL Server 2008 R2 and I am using the NEWID() function.
This is what I am trying but I just get only one ID:
SELECT TOP 100 NEWID()
I am new to SQL Server and I don't know if there is a way to do that or a way to create a loop for do it.
I don't need to persist those GUID's I just want to show them on screen.

You can use an arbitrary table as "sequence-generator":
SELECT TOP (100) Guid = NEWID()
FROM [master]..spt_values;
Demo
Note that this table contains only 2346 rows.
Worth reading: Generate a set or sequence without loops

You could do this:
DECLARE #nbrOf INT
SET #nbrOf=100
;WITH Nbrs ( n ) AS (
SELECT 1 UNION ALL
SELECT 1 + n FROM Nbrs WHERE n < #nbrOf )
SELECT
NEWID()
FROM
Nbrs
OPTION ( MAXRECURSION 0 )
Or with cross joins:
DECLARE #nbrOf INT
SET #nbrOf=100
;WITH E00(N) AS (SELECT 1 UNION ALL SELECT 1),
E02(N) AS (SELECT 1 FROM E00 a, E00 b),
E04(N) AS (SELECT 1 FROM E02 a, E02 b),
E08(N) AS (SELECT 1 FROM E04 a, E04 b),
E16(N) AS (SELECT 1 FROM E08 a, E08 b),
E32(N) AS (SELECT 1 FROM E16 a, E16 b),
Nbrs(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY N) FROM E32)
SELECT
NEWID()
FROM
Nbrs
WHERE
Nbrs.N<=#nbrOf

Just use a loop. Try the following:
create table #GUIDS (tempID uniqueidentifier)
declare #i int = 0
while (#i < 100)
begin
insert into #GUIDS
select newid()
set #i = #i + 1
end
select * from #GUIDS
drop table #GUIDS
NOTE: This is not a good solution to use for a large number of iterations, as it loops through the result in a row-by-row fashion.

create table #GUIDS (tempID uniqueidentifier)
INSERT INTO #GUIDS ([tempID ])VALUES (NEWID())
GO 100
select * from #GUIDS
drop table #GUIDS
this can be used to create 100 records and avoid a for loop.

Get the missing numbers for each section

I want to get the missing protocol Numbers from this list for each section
I have my list
ProtocolNumber Section
--------------------------------
14A1000014 | A1
14A1000015 | A1
14A1000018 | A1
14A1000019 | A1
14A2000014 | A2
14A2000015 | A2
14A2000019 | A2
I try this
SELECT lb1.ProtocolNumber, lb1.Section FROM #tmp lb1
WHERE not exists ( SELECT * FROM #tmp lb2
WHERE lb2.ProtocolNumber = lb1.ProtocolNumber + 1 and lb2.Section = lb1.Section)
The output should be like this
ProtocolNumber Section
--------------------------------
14A1000016 | A1
14A1000017 | A1
14A2000016 | A2
14A2000017 | A2
14A2000018 | A2

With the assumption that you are trying to generate a list of missing protocol numbers between the minimum and maximum range currently existing for that section, I'd suggest the following:
/*Sample Data*/
CREATE TABLE #tmp (ProtocolNumber VARCHAR(20), Section VARCHAR(2))
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000014', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000015', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000018', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A1000019', 'A1'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A2000014', 'A2'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A2000015', 'A2'
INSERT INTO #tmp (ProtocolNumber, Section) SELECT'14A2000019', 'A2'
/*CTEs to generate numbers list: 1 through 1,000,000*/
;WITH
E1(N) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) s(N)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
E5(N) AS (SELECT 1 FROM E4 a, E2 b), --1,000,000 rows max
cteTally(N) AS ( SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E5 ),
/*CTE to identify ranges of current numbers for each Section*/
Ranges AS
(
SELECT
Section,
MIN(CAST(SUBSTRING(ProtocolNumber, 5,6) AS INT)) MinNumber,
MAX(CAST(SUBSTRING(ProtocolNumber, 5,6) AS INT)) MaxNumber
FROM
#tmp
GROUP BY Section
),
/*CTE to generate full list of available protocols for each Section*/
ProtocolList AS
(
SELECT DISTINCT
Section,
'14' + Section + RIGHT('00000' + CAST(N AS VARCHAR(6)),6) AS ProtocolNumber
FROM Ranges
INNER JOIN
cteTally ON
cteTally.N >= Ranges.MinNumber AND
cteTally.N <= Ranges.MaxNumber
)
/*Final SELECT - protocols in the master list that do not exist for those sections in the temp table*/
SELECT l.ProtocolNumber, l.Section
FROM
ProtocolList l
LEFT JOIN
#tmp t ON
l.ProtocolNumber = t.ProtocolNumber
WHERE t.ProtocolNumber IS NULL
ORDER BY
l.Section,
l.ProtocolNumber
DROP TABLE #tmp

Does it have to be a one line select statment?
How about writing a stored procedure or Table-Valued function that using cursor iterates through your table and creates missing records, which the function returns
Implemenation would also need to check for max protocol number for each give section so that you only create records with in range

LIST OF MISSING PROTOCOL
DECLARE #P INT=(SELECT COUNT(DISTINCT SECTION) FROM PROTOCOL) --NUMBER OF SECTION
DECLARE #w varchar(10)='A1' --HOLD TYPE OF SECTION
WHILE #P>0
BEGIN
DECLARE #q table(numx int) --HOLD MAX TO MIN ProtocolNumber
declare #i table(num int) --HOLD EXISTS 'ProtocolNumber'
INSERT INTO #i
select convert(int,right( ProtocolNumber,2)) FROM protocol WHERE Section=#w
DECLARE #x int=(select max(convert(int,right( ProtocolNumber,2))) FROM protocol WHERE Section=#w)
DECLARE #y int=(select min(convert(int,right( ProtocolNumber,2))) FROM protocol WHERE Section=#w)
WHILE #y <= #x
begin
INSERT INTO #q (numx)
VALUES(#y)
SET #y= #y + 1
END
SELECT ('14'+#w+'0000'+ convert(varchar(10),numx)) AS ProtocolNumber,#W AS Section
FROM #Q
where numx NOT IN (select * FROM #i)
SET #P=#P-1
SET #W='A2'
DELETE FROM #Q
DELETE FROM #I
END

Fastest way to insert 100000 records into SQL Server

I am using the following script to insert 100,000 records into a table. Basically int from 500,001 to 600,000 are inserted. I am casting the integer into a string and inserting coz thats how i want it in the table (an integer in the form of string). I am using a merge to check if the record already exists or not.
DECLARE #first AS INT
SET #first = 500001
DECLARE #step AS INT
SET #step = 1
DECLARE #last AS INT
SET #last = 600000
BEGIN TRANSACTION
WHILE(#first <= #last)
BEGIN
MERGE dbo.Identifiers As target
USING (SELECT CAST(#first as varchar(10)) AS Identifier) AS source
ON (source.Identifier = target.Identifier)
WHEN NOT MATCHED THEN
INSERT (Identifier)
VALUES (source.Identifier);
SET #first += #step
END
COMMIT TRANSACTION
Its taking more than 2 minutes to load. I am doing something terribly wrong but unable to trace out where.
Note: The table has unique non-clustered index on Identifier Column.

I am wondering how much your procedural looping and the MERGE (instead of a simple INSERT) contributes to bad performance. I would opt for a strictly set-based solution like this:
INSERT INTO dbo.Identifiers (Identifier)
SELECT n FROM dbo.GetNums(500001, 600000)
WHERE n NOT IN (SELECT Identifier FROM dbo.Identifiers);
Now, this relies on a user-defined table-valued function dbo.GetNums that returns a table containing all numbers between 500,001 and 600,000 in a column called n. How do you write that function? You need to generate a range of numbers on the fly inside it.
The following implementation is taken from the book "Microsoft SQL Server 2012 High-Performance T-SQL Using Window Functions" by Itzik Ben-Gak.
CREATE FUNCTION dbo.GetNums(#low AS BIGINT, #high AS BIGINT) RETURNS TABLE
AS
RETURN
WITH L0 AS (SELECT c FROM (VALUES(1),(1)) AS D(c)),
L1 AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS rownum FROM L5)
SELECT #low + rownum - 1 AS n
FROM Nums
ORDER BY rownum
OFFSET 0 ROWS FETCH FIRST #high - #low + 1 ROWS ONLY;
(Since this comes from a book on SQL Server 2012, it might not work on SQL Server 2008 out-of-the-box, but it should be possible to adapt.)

Try this one. It uses a tally table. Reference: http://www.sqlservercentral.com/articles/T-SQL/62867/
create table #temp_table(
N int
)
declare #first as int
set #first = 500001
declare #step as int
set #step = 1
declare #last as int
set #last = 600000
with
e1 as(select 1 as N union all select 1), --2 rows
e2 as(select 1 as N from e1 as a, e1 as b), --4 rows
e3 as(select 1 as N from e2 as a, e2 as b), --16 rows
e4 as(select 1 as N from e3 as a, e3 as b), --256 rows
e5 as(select 1 as N from e4 as a, e4 as b), --65,356 rows
e6 as(select 1 as N from e5 as a, e1 as b), -- 131,072 rows
tally as (select 500000 + (row_number() over(order by N) * #step) as N from e6) -- change 500000 with desired start
insert into #temp_table
select cast(N as varchar(10))
from tally t
where
N >= #first
and N <=#last
and not exists(
select 1 from #temp_table where N = t.N
)
drop table #temp_table

Vinoth, Something given below could also help you.
Declare #tab table (id int identity(1,1),num int)
Insert into #tab (num) Values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
Declare #start as int
set #start = 500000
Insert into dbo.Identifiers (Identifier)
Select #start + ((E.id-1)*10000) +((D.id-1)*1000) +((C.id-1)*100) + ((B.id-1) * 10) + A.id
from #tab A,#tab B,#tab C,#tab D,#tab E
Order by #start + ((E.id-1)*10000) +((D.id-1)*1000) +((C.id-1)*100) + ((B.id-1) * 10) + A.id
In my DB, dbo.Identifiers is a table without any index. It took only 230 ms for the insertion.

Create index on the Identifier column and then try the above insert

finding the missing values in a Sequence

Table1 is as follows :
Col1
1
2
3
4
6
7
8
9
10
13
14
As shown above the col1 has the sequence of values but for some reason the user did not insert 5, 11 and so on. How to find out the missing values in a sequence. Here the sequence is 1 to 14 and the missing values are 5,11. Please help me.

As was said in other answers, the best choice is to do a join with a real sequence table. You can create one using a recursive CTE:
DECLARE #MaxNumber INT
SELECT #MaxNumber = MAX(Col1) FROM YourTable;
WITH CTE AS
(
SELECT 1 Col1
UNION ALL
SELECT Col1+1
FROM CTE
WHERE Col1+1 <= #MaxNumber
)
SELECT A.Col1
FROM CTE A
LEFT JOIN YourTable B
ON A.Col1 = B.Col1
WHERE B.Col1 IS NULL
OPTION(MAXRECURSION 0)

This will work for numbers 0 - 2000 for large numbers you just need to cross join the original result set.
with temp as (
select distinct number
from master..spt_Values
where number between 0 and 2000
)
select * from
temp t
left join your_table y on y.col1 = t.number
where y.col1 is null
alternatively using cross join
This will work for billions obviously slower
WITH
L0 AS(SELECT 1 AS c UNION ALL SELECT 1),
L1 AS(SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS(SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS(SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS(SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS(SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS(SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS n FROM L5)
select * from
l5 t
left join your_table y on y.col1 = t.n
where y.col1 is null

This seems to pretty much be a duplication of
SQL query to find Missing sequence numbers
There's a suggestions this will work:
SELECT l.id + 1 as start
FROM Table1 as l
LEFT JOIN Table1 as r on l.id + 1 = r.id
WHERE r.id IS NULL
Otherwise you can left join on your table with a sequence table. From the above question, you can look at http://www.projectdmx.com/tsql/tblnumbers.aspx to get some ideas on how to generate a suitable sequence table, and the join will be something like
SELECT #sequence.value
FROM #sequence
LEFT JOIN Table1 ON #sequence.value = Table1.value
WHERE Table1.value IS NULL

Side-note to all recursive CTE suggestions. The recursive CTE increases time linear to the number of rows. Using a tally table or cross-join is much better to use...
This would work:
-- data table
CREATE TABLE #data (
value INT
)
INSERT #data VALUES (1)
INSERT #data VALUES (2)
INSERT #data VALUES (3)
INSERT #data VALUES (4)
INSERT #data VALUES (6)
INSERT #data VALUES (7)
INSERT #data VALUES (8)
INSERT #data VALUES (9)
INSERT #data VALUES (10)
INSERT #data VALUES (13)
INSERT #data VALUES (14)
-- normally i have a tally table already for stuff like this but I'll
-- create one temporary here.
CREATE TABLE #tmp_tally (
n INT
)
DECLARE #n INT
SET #n = 1
WHILE #n < 14
BEGIN
INSERT #tmp_tally VALUES (#n)
SET #n = #n + 1
END
SELECT
T.n,
CASE WHEN #data.value IS NULL THEN 'Missing' ELSE 'Not Missing' END
FROM
#tmp_tally T
LEFT JOIN #data ON
T.n = #data.value
WHERE
T.n <= (SELECT MAX(value) FROM #data) -- max of what you want to check against which is 14 in your example
DROP TABLE #data
DROP TABLE #tmp_tally

Try this:
declare #min int
declare #max int
select #min = min(field_ID), #max = max(field_ID) from [Table]
create table #tmp (Field_No int)
while #min <= #max
begin
if not exists (select * from [Table] where field_ID = #min)
insert into #tmp (seq_field) values (#min)
set #min = #min + 1
end
select * from #tmp
drop table #tmp
With the above script you will get missing values in "ID" column from #tmp table.
Hope this will help you!!

I would do a subquery in the same table, to see if another number exist for the current number-1, and if there is not one, you know that a number was skipped. You can do the +1 of this as well.
select
nt.numb,
CASE
(select COUNT(*) from table where numb=nt.numb-1)=0 THEN 'skipped' ELSE 'not skipped'
from
numbertable nt

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Create dummy data with WHILE - sql

Related

Workaround for temporary Table in view SQL Server

There is a way to generate a list of GUID's using NEWID function?

Get the missing numbers for each section

Fastest way to insert 100000 records into SQL Server

finding the missing values in a Sequence

Categories

Resources