Sequential numbers randomly selected and added to table - sql

The SO Question has lead me to the following question.
If a table has 16 rows I'd like to add a field to the table with the numbers 1,2,3,4,5,...,16 arranged randomly i.e in the 'RndVal' field for row 1 this could be 2, then for row 2 it could be 5 i.e each of the 16 integers needs to appear once without repetition.
Why doesn't the following work? Ideally I'd like to see this working then to see alternative solutions.
This creates the table ok:
IF OBJECT_ID('tempdb..#A') IS NOT NULL BEGIN DROP TABLE #A END
IF OBJECT_ID('tempdb..#B') IS NOT NULL BEGIN DROP TABLE #B END
IF OBJECT_ID('tempdb..#C') IS NOT NULL BEGIN DROP TABLE #C END
IF OBJECT_ID('tempdb..#myTable') IS NOT NULL BEGIN DROP TABLE #myTable END
CREATE TABLE #B (B_ID INT)
CREATE TABLE #C (C_ID INT)
INSERT INTO #B(B_ID) VALUES
(10),
(20),
(30),
(40)
INSERT INTO #C(C_ID)VALUES
(1),
(2),
(3),
(4)
CREATE TABLE #A
(
B_ID INT
, C_ID INT
, RndVal INT
)
INSERT INTO #A(B_ID, C_ID, RndVal)
SELECT
#B.B_ID
, #C.C_ID
, 0
FROM #B CROSS JOIN #C;
Then I'm attempting to add the random column using the following. The logic is to add random numbers between 1 and 16 > then to effectively overwrite any that are duplicated with other numbers > in a loop ...
SELECT
ROW_NUMBER() OVER(ORDER BY B_ID) AS Row
, B_ID
, C_ID
, RndVal
INTO #myTable
FROM #A
DECLARE #rowsRequired INT = (SELECT COUNT(*) CNT FROM #myTable)
DECLARE #i INT = (SELECT #rowsRequired - SUM(CASE WHEN RndVal > 0 THEN 1 ELSE 0 END) FROM #myTable)--0
DECLARE #end INT = 1
WHILE #end > 0
BEGIN
SELECT #i = #rowsRequired - SUM(CASE WHEN RndVal > 0 THEN 1 ELSE 0 END) FROM #myTable
WHILE #i>0
BEGIN
UPDATE x
SET x.RndVal = FLOOR(RAND()*#rowsRequired)
FROM #myTable x
WHERE x.RndVal = 0
SET #i = #i-1
END
--this is to remove possible duplicates
UPDATE c
SET c.RndVal = 0
FROM
#myTable c
INNER JOIN
(
SELECT RndVal
FROM #myTable
GROUP BY RndVal
HAVING COUNT(RndVal)>1
) t
ON
c.RndVal = t.RndVal
SET #end = ##ROWCOUNT
END
TRUNCATE TABLE #A
INSERT INTO #A
SELECT
B_ID
, C_ID
, RndVal
FROM #myTable
If the original table has 6 rows then the result should end up something like this
B_ID|C_ID|RndVal
----------------
| | 5
| | 4
| | 1
| | 6
| | 3
| | 2

I don't understand your code, frankly
This will update each row with a random number, non-repeated number between 1 and the number of rows in the table
UPDATE T
SET SomeCol = T2.X
FROM
MyTable T
JOIN
(
SELECT
KeyCol, ROW_NUMBER() OVER (ORDER BY NEWID()) AS X
FROM
MyTable
) T2 ON T.KeyCol = T2.KeyCol
This is more concise but can't test to see if it works as expected
UPDATE T
SET SomeCol = X
FROM
(
SELECT
SomeCol, ROW_NUMBER() OVER (ORDER BY NEWID()) AS X
FROM
MyTable
) T

When you add TOP (1) (because you need to update first RndVal=0 record) and +1 (because otherwise your zero mark means nothing) to your update, things will start to move. But extremely slowly (around 40 seconds on my rather outdated laptop). This is because, as #myTable gets filled with generated random numbers, it becomes less and less probable to get missing numbers - you usually get duplicate, and have to start again.
UPDATE top (1) x
SET x.RndVal = FLOOR(RAND()*#rowsRequired) + 1
FROM #myTable x
WHERE x.RndVal = 0
Of course, #gbn has perfectly valid solution.

This is basically the same as the previous answer, but specific to your code:
;WITH CTE As
(
SELECT B_ID, C_ID, RndVal,
ROW_NUMBER() OVER(ORDER BY NewID()) As NewOrder
FROM #A
)
UPDATE CTE
SET RndVal = NewOrder
SELECT * FROM #A ORDER BY RndVal

Related

How to create loop based on value of row?

I have problem when I use my query bellow to have a looping inside the cursor.
data in table1 will be like this:
id | data
----|---------
A | 4
B | 2
C | 5
the result in table2 should be like this:
id | data
----|---------
A | 1
A | 1
A | 1
A | 1
B | 1
B | 1
C | 1
C | 1
C | 1
C | 1
C | 1
I have SQL query with cursor like this:
DECLARE #table2 table ( id VARCHAR(500), data INTEGER)
DECLARE Cur CURSOR FOR
SELECT id, data FROM table1
OPEN Cur
WHILE ( ##FETCH_STATUS = 0 )
BEGIN
DECLARE #LoopNum INTEGER
DECLARE #tempID VARCHAR(255)
DECLARE #tempDATA INTEGER
FETCH NEXT FROM Cur INTO #tempID, #tempDATA
set #LoopNum = 0
WHILE #LoopNum < #tempDATA
BEGIN
INSERT INTO table2 (id, data)
VALUES( #tempID, 1)
SET #LoopNum = #LoopNum + 1
END
END
CLOSE Cur
DEALLOCATE Cur
SELECT * FROM table2
but the query didn't work. is there something wrong with my query?
Thank you.
Use this query to the expected result.
CREATE TABLE #test
(id CHAR(1),data INT)
INSERT #test VALUES ('A',4)
INSERT #test VALUES('B',2)
INSERT #test VALUES('C',5);
SELECT s.id, 1 AS data
FROM #test s
INNER JOIN
master.dbo.spt_values t ON t.type='P'
AND t.number BETWEEN 1 AND s.data
Note: Refer this Why (and how) to split column using master..spt_values?
You actually don't need a loop
IF OBJECT_ID('TEMPDB..#TEMP') IS NOT NULL
DROP TABLE #TEMP
SELECT 'A' AS ID, 4 AS DATA
INTO #TEMP UNION
SELECT 'B', 2 UNION
SELECT 'C', 5
;WITH CTE AS
(
SELECT 1 AS NUMBER
UNION ALL
SELECT NUMBER + 1
FROM CTE
WHERE NUMBER < 100
)
SELECT T.ID, 1
FROM CTE C
INNER JOIN #TEMP T
ON C.NUMBER <= T.DATA
ORDER BY T.ID
Carefull that if you want ot generate a large set of numbers in the CTE it may become slower.
Use a Recursive CTE which will help you to loop through the records.
CREATE TABLE #test
(id CHAR(1),data INT)
INSERT #test
VALUES ('A',4),('B',2),('C',5);
WITH cte
AS (SELECT 1 AS da,id,data
FROM #test a
UNION ALL
SELECT da + 1,id,data
FROM cte a
WHERE da < (SELECT data
FROM #test b
WHERE a.id = b.id))
SELECT id,
1 AS data
FROM cte
ORDER BY id
i used two loops
1. for each row
2. for number for duplicate insert
SET NOCOUNT on;
DECLARE #t table(row int IDENTITY(1,1),id varchar(10),data int)
INSERT INTO #t
SELECT * from xyz
DECLARE #x table(id varchar(10),data int) --table to hold the new data
DECLARE #i int=(SELECT count (*) from xyz) --number of rows main table
DECLARE #y int --number of duplicate
DECLARE #p int=1 --number of rows
WHILE #i!=0 --loop until last row of main table
BEGIN
SET #y=(SELECT data FROM #t WHERE row=#p) --set #y for number of 'row duplicate'
WHILE #y!=0
BEGIN
INSERT INTO #x
SELECT id,1
FROM #t
WHERE row=#p
SET #y=#y-1
END
SET #p=#p+1
SET #i=#i-1
END
SELECT * FROM #x

Select non-existing rows

Let say I have a table:
ColumnA ColumnB
---------------------------------
1 10.75
4 1234.30
6 2000.99
How can I write a SELECT query that will result in the following:
ColumnA ColumnB
---------------------------------
1 10.75
2 0.00
3 0.00
4 1234.30
5 0.00
6 2000.99
You can use a CTE to create a list of numbers from 1 to the maximum value in your table:
; with numbers as
(
select max(ColumnA) as nr
from YourTable
union all
select nr - 1
from numbers
where nr > 1
)
select nr.nr as ColumnA
, yt.ColumnB
from numbers nr
left join
YourTable yt
on nr.nr = yt.ColumnA
order by
nr.nr
option (maxrecursion 0)
See it working at SQL Fiddle.
Please try:
declare #min int, #max int
select #min=MIN(ColumnA), #max=MAX(ColumnA) from tbl
select
distinct number ColumnA,
isnull(b.ColumnB, 0) ColumnB
from
master.dbo.spt_values a left join tbl b on a.number=b.ColumnA
where number between #min and #max
Create a TallyTable (or NumbersTable) - see this question: What is the best way to create and populate a numbers table?
With that table create an insert statement:
INSERT INTO YourTable (ColumnA, ColumnB)
SELECT Number FROM NumberTable
WHERE
NOT EXISTS (SELECT 1 FROM YourTable WHERE NumberTable.Number = YourTable.ColumnA)
-- Adjust this value or calculate it with a query to the maximum of the source table
AND NumberTable.Number < 230130
DECLARE #t TABLE (ID INT,Val DECIMAL(10,2))
INSERT INTO #t (ID,Val) VALUES (1,10.75)
INSERT INTO #t (ID,Val) VALUES (4,6.75)
INSERT INTO #t (ID,Val) VALUES (7,4.75)
declare #MinNo int
declare #MaxNo int
declare #IncrementStep int
set #MinNo = 1
set #MaxNo = 10
set #IncrementStep = 1
;with C as
(
select #MinNo as Num
union all
select Num + #IncrementStep
from C
where Num < #MaxNo
)
select Num,
CASE WHEN Val IS NOT NULL THEN Val ELSE 0.00 END AS NUMBER
from C
LEFT JOIN #t t
ON t.ID = c.Num
You could use a number-table or following trick to generate a sequence which you can LEFT OUTER JOIN with your table. I assume you want to determine the boundaries dynamically:
WITH Seq AS
(
SELECT TOP ((SELECT Max(ColumnA)FROM Table1) - (SELECT Min(ColumnA) FROM Table1) + 1)
Num = (SELECT Min(ColumnA) FROM Table1)+ Row_number() OVER (ORDER BY [object_id]) -1
FROM sys.all_objects)
SELECT ColumnA = Seq.Num,
ColumnB = COALESCE(t.ColumnB ,0.00)
FROM Seq
LEFT OUTER JOIN Table1 t
ON Seq.Num = t.ColumnA
Demo with your sample.
Worth reading: http://www.sqlperformance.com/2013/01/t-sql-queries/generate-a-set-1
I have my collect of table functions like these.
create function dbo.GetNumbers(#Start int, #End int)
returns #Items table
(
Item int
)
as
begin
while (#Start <= #End)
begin
insert into #Items
values (#Start)
set #Start = #Start + 1
end
return
end
Then I can use it to left join to my data table and every value will be there.
declare #min int, #max int
set #min = 10
set #max = 20
select gn.Item
from dbo.GetNumbers(#min, #max) gn
I have similar table functions for date ranges, times, timezones, etc.

SQL Server 2008 filling gaps with dimension

I have a data table as below
#data
---------------
Account AccountType
---------------
1 2
2 0
3 5
4 2
5 1
6 5
AccountType 2 is headers and 5 is totals. Meaning accounts of type 2 have to look after the next 1 or 0 to determin if its Dim value is 1 or 0. Totals of type 5 have to look up at nearest 1 or 0 to determin its Dim value. Accounts of type 1 or 0 have there type as Dim.
Accounts of type 2 appear as islands so its not enough to just check RowNumber + 1 and same goes for accounsts of type 5.
I have arrived at the following table using CTE's. But can't find a quick way to go from here to my final result of Account, AccountType, Dim for all accounts
T3
-------------------
StartRow EndRow AccountType Dim
-------------------
1 1 2 0
2 2 0 0
3 3 5 0
4 4 2 1
5 5 0 1
6 6 5 1
Below code is MS TSQL copy paste it all and see it run. The final join on the CTE select statement is extremly slow for even 500 rows it takes 30 sec. I have 100.000 rows i need to handle. I done a cursor based solution which do it in 10-20 sec thats workable and a fast recursive CTE solution that do it in 5 sec for 100.000 rows, but it dependent on the fragmentation of the #data table. I should add this is simplified the real problem have alot more dimension that need to be taking into account. But it will work the same for this simple problem.
Anyway is there a fast way to do this using joins or another set based solution.
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data') IS NOT NULL
DROP TABLE #data
CREATE TABLE #data
(
Account INTEGER IDENTITY(1,1),
AccountType INTEGER,
)
BEGIN -- TEST DATA
DECLARE #Counter INTEGER = 0
DECLARE #MaxDataRows INTEGER = 50 -- Change here to check performance
DECLARE #Type INTEGER
WHILE(#Counter < #MaxDataRows)
BEGIN
SET #Type = CASE
WHEN #Counter % 10 < 3 THEN 2
WHEN #Counter % 10 >= 8 THEN 5
WHEN #Counter % 10 >= 3 THEN (CASE WHEN #Counter < #MaxDataRows / 2.0 THEN 0 ELSE 1 END )
ELSE 0
END
INSERT INTO #data VALUES(#Type)
SET #Counter = #Counter + 1
END
END -- TEST DATA END
;WITH groupIds_cte AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY AccountType ORDER BY Account) - Account AS GroupId
FROM #data
),
islandRanges_cte AS
(
SELECT
MIN(Account) AS StartRow,
MAX(Account) AS EndRow,
AccountType
FROM groupIds_cte
GROUP BY GroupId,AccountType
),
T3 AS
(
SELECT I.*, J.AccountType AS Dim
FROM islandRanges_cte I
INNER JOIN islandRanges_cte J
ON (I.EndRow + 1 = J.StartRow AND I.AccountType = 2)
UNION ALL
SELECT I.*, J.AccountType AS Dim
FROM islandRanges_cte I
INNER JOIN islandRanges_cte J
ON (I.StartRow - 1 = J.EndRow AND I.AccountType = 5)
UNION ALL
SELECT *, AccountType AS Dim
FROM islandRanges_cte
WHERE AccountType = 0 OR AccountType = 1
),
T4 AS
(
SELECT Account, Dim
FROM (
SELECT FlattenRow AS Account, StartRow, EndRow, Dim
FROM T3 I
CROSS APPLY (VALUES(StartRow),(EndRow)) newValues (FlattenRow)
) T
)
--SELECT * FROM T3 ORDER BY StartRow
--SELECT * FROM T4 ORDER BY Account
-- Final correct result but very very slow
SELECT D.Account, D.AccountType, I.Dim FROM T3 I
INNER JOIN #data D
ON D.Account BETWEEN I.StartRow AND I.EndRow
ORDER BY Account
EDIT with some time testing
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#data') IS NULL
CREATE TABLE #times
(
RecId INTEGER IDENTITY(1,1),
Batch INTEGER,
Method NVARCHAR(255),
MethodDescription NVARCHAR(255),
RunTime INTEGER
)
IF OBJECT_ID('tempdb..#batch') IS NULL
CREATE TABLE #batch
(
Batch INTEGER IDENTITY(1,1),
Bit BIT
)
INSERT INTO #batch VALUES(0)
IF OBJECT_ID('tempdb..#data') IS NOT NULL
DROP TABLE #data
CREATE TABLE #data
(
Account INTEGER
)
CREATE NONCLUSTERED INDEX data_account_index ON #data (Account)
IF OBJECT_ID('tempdb..#islands') IS NOT NULL
DROP TABLE #islands
CREATE TABLE #islands
(
AccountFrom INTEGER ,
AccountTo INTEGER,
Dim INTEGER,
)
CREATE NONCLUSTERED INDEX islands_from_index ON #islands (AccountFrom, AccountTo, Dim)
BEGIN -- TEST DATA
INSERT INTO #data
SELECT TOP 100000 ROW_NUMBER() OVER(ORDER BY t1.number) AS N
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
INSERT INTO #islands
SELECT MIN(Account) AS Start, MAX(Account), Grp
FROM (SELECT *, NTILE(10) OVER (ORDER BY Account) AS Grp FROM #data) T
GROUP BY Grp ORDER BY Start
END -- TEST DATA END
--SELECT * FROM #data
--SELECT * FROM #islands
--PRINT CONVERT(varchar(20),DATEDIFF(MS,#RunDate,GETDATE()))+' ms Sub Query'
DECLARE #RunDate datetime
SET #RunDate=GETDATE()
SELECT Account, (SELECT Dim From #islands WHERE Account BETWEEN AccountFrom AND AccountTo) AS Dim
FROM #data
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'subquery','',DATEDIFF(MS,#RunDate,GETDATE()))
SET #RunDate=GETDATE()
SELECT D.Account, V.Dim
FROM #data D
CROSS APPLY
(
SELECT Dim From #islands V
WHERE D.Account BETWEEN V.AccountFrom AND V.AccountTo
) V
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'crossapply','',DATEDIFF(MS,#RunDate,GETDATE()))
SET #RunDate=GETDATE()
SELECT D.Account, I.Dim
FROM #data D
JOIN #islands I
ON D.Account BETWEEN I.AccountFrom AND I.AccountTo
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'join','',DATEDIFF(MS,#RunDate,GETDATE()))
SET #RunDate=GETDATE()
;WITH cte AS
(
SELECT Account, AccountFrom, AccountTo, Dim, 1 AS Counting
FROM #islands
CROSS APPLY (VALUES(AccountFrom),(AccountTo)) V (Account)
UNION ALL
SELECT Account + 1 ,AccountFrom, AccountTo, Dim, Counting + 1
FROM cte
WHERE (Account + 1) > AccountFrom AND (Account + 1) < AccountTo
)
SELECT Account, Dim, Counting FROM cte OPTION(MAXRECURSION 32767)
INSERT INTO #times VALUES ((SELECT MAX(Batch) FROM #batch) ,'recursivecte','',DATEDIFF(MS,#RunDate,GETDATE()))
You can select from the #times table to see the run times :)
I think you want a join, but using an inequality rather than an equality:
select tt.id, tt.dim1, it.dim2
from TallyTable tt join
IslandsTable it
on tt.id between it."from" and it."to"
This works for the data that you provide in the question.
Here is another idea that might work. Here is the query:
select d.*,
(select top 1 AccountType from #data d2 where d2.Account > d.Account and d2.AccountType not in (2, 5)
) nextAccountType
from #data d
order by d.account;
I just ran this on 50,000 rows and this version took 17 seconds on my system. Changing the table to:
CREATE TABLE #data (
Account INTEGER IDENTITY(1,1) primary key,
AccountType INTEGER,
);
Has actually slowed it down to about 1:33 -- quite to my surprise. Perhaps one of these will help you.

Fastest method to insert numbers into sql server table?

SELECT TOP 1000000 row_number() over(ORDER by sv.number) AS num
INTO numbertest
from master..spt_values sv CROSS JOIN master..spt_values sv2
SELECT TOP 1000000 IDENTITY(int,1,1) AS Number
INTO NumberTest
FROM master..spt_values sv1
CROSS JOIN master..spt_values s2
I've come across two methods to insert 1 to 1000000 numbers in a table which works perfectly but doesn't insert 1 to 1000000 sequentially? how can i insert sequentially with fast insertion rate?
I have table Numbers in my database that i fill with the following query.
CREATE TABLE [dbo].[NUMBERS] (
[number] INT IDENTITY (1, 1) NOT NULL
);
set Identity_insert dbo.Numbers oN
declare
#row_count int,
#target_rows int
set #target_rows = 1048576
set #row_count = null
while ( 1 = 1 ) begin
if ( #row_count is null ) begin
insert into Numbers ( [number] ) values ( 1 )
end
else begin
insert into Numbers ( [number] )
select [number] = [number] + #row_count
from Numbers
end
set #row_count = isnull( #row_count, 0 ) + ##rowcount
if ( #row_count >= #target_rows ) begin
break
end
end
what I understood that you need to add/insert rows with serial number 1 to 10,00,000 (ten lac rows)
your two command seems select command instead of insert command
do really required to add serial number field in one of your filed
or
you can run query to any existing table to get result with serial number like
for eg, table name : employee
filed name : name
Note : no fileds existing like SerialNumber
you can run a command to get output with serial number
e.g. select ROW_NUMBER() over (order by employee.name) as SerialNumber, Employee.name from employee
your result will be like
SerialNumber name
1 abc
2 xyz

SQL: how to get random number of rows from one table for each row in another

I have two tables where the data is not related
For each row in table A i want e.g. 3 random rows in table B
This is fairly easy using a cursor, but it is awfully slow
So how can i express this in single statement to avoid RBAR ?
To get a random number between 0 and (N-1), you can use.
abs(checksum(newid())) % N
Which means to get positive values 1-N, you use
1 + abs(checksum(newid())) % N
Note: RAND() doesn't work - it is evaluated once per query batch and you get stuck with the same value for all rows of tableA.
The query:
SELECT *
FROM tableA A
JOIN (select *, rn=row_number() over (order by newid())
from tableB) B ON B.rn <= 1 + abs(checksum(newid())) % 9
(assuming you wanted up to 9 random rows of B per A)
assuming tableB has integer surrogate key, try
Declare #maxRecs integer = 11 -- Maximum number of b records per a record
Select a.*, b.*
From tableA a Join tableB b
On b.PKColumn % (floor(Rand() * #maxRecs)) = 0
If you have a fixed number that you know in advance (such as 3), then:
select a.*, b.*
from a cross join
(select top 3 * from b) b
If you want a random number of rows from "b" for each row in "a", the problem is a bit harder in SQL Server.
Heres an example of how this could be done, code is self contained, copy and press F5 ;)
-- create two tables we can join
DECLARE #datatable TABLE(ID INT)
DECLARE #randomtable TABLE(ID INT)
-- add some dummy data
DECLARE #i INT = 1
WHILE(#i < 3) BEGIN
INSERT INTO #datatable (ID) VALUES (#i)
SET #i = #i + 1
END
SET #i = 1
WHILE(#i < 100) BEGIN
INSERT INTO #randomtable (ID) VALUES (#i)
SET #i = #i + 1
END
--The key here being the ORDER BY newid() which makes sure that
--the TOP 3 is different every time
SELECT
d.ID AS DataID
,rtable.ID RandomRow
FROM #datatable d
LEFT JOIN (SELECT TOP 3 * FROM #randomtable ORDER BY newid()) as rtable ON 1 = 1
Heres an example of the output