Generate unique random numbers using SQL - sql

I have some SQL code which generates random numbers using the following technique:
DECLARE #Random1 INT, #Random2 INT, #Random3 INT, #Random4 INT, #Random5 INT, #Random6 INT, #Upper INT, #Lower INT
---- This will create a random number between 1 and 49
SET #Lower = 1 ---- The lowest random number
SET #Upper = 49; ---- The highest random number
with nums as (
select #lower as n
union all
select nums.n+1
from nums
where nums.n < #Upper
),
randnums as
(select nums.n, ROW_NUMBER() over (order by newid()) as seqnum
from nums
)
select #Random1 = MAX(case when rn.seqnum = 1 then rn.n end),
#Random2 = MAX(case when rn.seqnum = 2 then rn.n end),
#Random3 = MAX(case when rn.seqnum = 3 then rn.n end),
#Random4 = MAX(case when rn.seqnum = 4 then rn.n end),
#Random5 = MAX(case when rn.seqnum = 5 then rn.n end),
#Random6 = MAX(case when rn.seqnum = 6 then rn.n end)
from randnums rn;
select #Random1, #Random2, #Random3, #Random4, #Random5, #Random6
My question is how random is this number generation? and is there another way to do this which is more "random".
I am using:
Microsoft SQL Server 2008 (SP3) - 10.0.5512.0 (X64) Aug 22 2012 19:25:47 Copyright (c) 1988-2008 Microsoft Corporation Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)
The problem with most solutions is you'll end up with values like this: 14,29,8,14,27,27 I cannot have duplicate numbers!

I guess you could do something like this much simpler and much easier
DECLARE #Upper INT;
DECLARE #Lower INT;
SET #Lower = 1; /* -- The lowest random number */
SET #Upper = 49; /* -- The highest random number */
SELECT #Lower + CONVERT(INT, (#Upper-#Lower+1)*RAND());
For getting a random number without repetition, this will do the job
WITH CTE
AS
(
SELECT randomNumber, COUNT(1) countOfRandomNumber
FROM (
SELECT ABS(CAST(NEWID() AS binary(6)) %49) + 1 randomNumber
FROM sysobjects
) sample
GROUP BY randomNumber
)
SELECT TOP 5 randomNumber
FROM CTE
ORDER BY newid()
To set the highest limit, you can replace 49 with your highest limit number.

For Laravel:
public function generatUniqueId()
{
$rand = rand(10000, 99999);
$itemId = $rand;
while (true) {
if (!BookItem::whereBookItemId($itemId)->exists()) {
break;
}
$itemId = rand(10000, 99999);
}
return $itemId;
}

Create a list of random numbers. For this example I did 100, could be more, could be less (but no less than your limit)
Use row_number() function to detect duplicates
Once you delete the duplicates, select top 6 number in your list
with RandomNumbers as
(
select id = 1, number = round(((56 - 1 -1) * RAND(CHECKSUM(NEWID())) + 1), 0),
orderid = round(((56 - 1 -1) * RAND(CHECKSUM(NEWID())) + 1), 0)
union all
select id + 1, round(((56 - 1 -1) * RAND(CHECKSUM(NEWID())) + 1), 0), round(((56 - 1 -1) * RAND(CHECKSUM(NEWID())) + 1), 0)
from RandomNumbers where id < 100
),
NoDuplicates as
(
select number, id = row_number() over (partition by number orderid by order) from
(
select numeber, order, repeat = row_number() over (partition by numeber orderid by order)
from RandomNumbers
) a
where repeat = 1
)
select * from NoDuplicates where id <= 6

You can use Rand() function .
select CEILING(RAND() *<max of random numbers))

Related

Generating a list of random numbers, summing to a fixed amount using SQL

This question is about generating N random numbers whose sum is M(constant) using SQL.
For example we have a number M=9.754. We need 10 random numbers whose sum is 9.754.
It should be done in SQL Server environment.
Can anybody help me?
While the #Squirrel answer is interesting but numbers here is more random
here is the code:
DECLARE #s INT=1,
#k FLOAT=0,
#final FLOAT=9.917,
#sum FLOAT =0,
#min FLOAT=1,
#max FLOAT=9.917
BEGIN
WHILE (#sum <> #final)
BEGIN
WHILE (#s <= 10)
BEGIN
SET #k =
(
SELECT ROUND(RAND(CHECKSUM(NEWID())) * (#max - #min) + #min,3)
);
PRINT (CONCAT('random: ',#k));
IF(#sum+#k <=#final)
SET #sum+=#k;
SET #max=#final-#sum;
PRINT (CONCAT('computed sum: ',#k));
IF(#max>1) SET #min=1 ELSE SET #min=0;
IF(#sum=#final)
BREAK;
SET #s = #s + 1;
SET #k = #k + 0;
END;
PRINT (CONCAT('final', #final))
PRINT (CONCAT('sum', #sum))
IF(#sum<>#final)--force stop if after 10 try the sum not match with final
BEGIN
PRINT(CONCAT('final random number:',#final-#sum))
SET #sum=#final;
END;
SET #s=0;
IF(#sum=#final)
BEGIN
PRINT('****************************DONE****************************')
BREAK;
END
END;
PRINT ('end');
END;
Interesting requirement.
Query below uses tally table / number table to generate 10 random numbers after that find the ratio. Final query check for case where sum of the numbers is not equal to #m and make the adjustment on the biggest number.
declare #m decimal(10,3) = 9.754,
#n int = 10;
with
-- using recursive CTE to generate a number table
numbers as
(
select n = 1
union all
select n = n + 1
from numbers
where n < #n
),
-- generate random positive numbers using newid()
-- Note : 100 is chosen arbitrary
num as
(
select n = abs(checksum(newid())) % 100
from numbers
),
-- calculate the ratio
ratio as
(
select r,
rn = row_number() over (order by r desc),
sum_r = sum(r) over()
from
(
select r = convert(decimal(10,3), n * 1.0 / sum(n) over() * #m)
from num
) r
)
-- sum(r) may not equal to #m due to rounding
-- find the difference and adjust it to the biggest r
select r, rn, sum_r,
adj_r = r + case when rn = 1 then #m - sum_r else 0 end,
sum_adj_r = sum(r + case when rn = 1 then #m - sum_r else 0 end) over()
from ratio
dbfiddle demo

Substract one Step for current parameter SQL

I have the following t-sql:
declare #startno int = 1, #finishno int = 365, #AfterO float = 97
declare #daysbet int = 80, #decVal float = 0.10 / 100;
;with ctetest as
(
select #startno as oDay, 1 as oBet, #AfterO as Prod
union all
select oDay +1, case when ((oBet + 1) = #daysbet) then 0 else (oBet + 1) end,
case when (Prod = 0) then #AfterO else
case when ((oBet + 1) = #daysbet) then 0 else (Prod - (#decVal * Prod)) end
end
from ctetest
where oDay+1 <= #finishno
)
select * into #t1
from ctetest option (maxrecursion 0)
Select #daysbet oVal, AVG(Prod) aProd, SUM(Prod) * 24 sProd FROM #t1
So the result should be:
---------------------------------
oVal aProd sProd
---------------------------------
80 92.484 810168.297
My question is, how can I substract some value from oVal on single execute? For instance, If the oVal value is 80 the the 2nd record is 79, 3rd record is 78 and the other columns will follow. Result should be:
---------------------------------
oVal aProd sProd
---------------------------------
80 92.484 810168.297
79 92.xxx 810444.069
78 92.xxx 810675.718
ff.---->
until oVal reach 1
So, the simple way is, if the #daysbet is 80, then the average of Prod is 92.484. If the #daysbet is 79, then the average of Prod is 92 poin something. and so on... In single execute.
Does anyone have an idea for this situation? Please advice.
Thank you.
Maybe this will help you
declare #startno int = 1, #finishno int = 365, #AfterO float = 97
declare #daysbet int = 80, #decVal float = 0.10 / 100;
if object_id(N'tempdb..#t1', N'U') is not null drop table #t1;
;with ctetest as
(
select a2.[n], a1.*
from (select [oDay] = #startno, [oBet] = 1, [prod] = #AfterO) as a1
cross join
(
select top(#daysbet) [n] = row_number() over(order by spt1.[number])
from master..spt_values as spt1
cross join master..spt_values as spt2
) as a2
union all
select [n]
,[oDay] + 1
,case when ((oBet + 1) = [n]) then 0 else (oBet + 1) end
,case when (Prod = 0) then #AfterO else case when ((oBet + 1) = [n]) then 0 else (Prod - (#decVal * Prod)) end
end
from ctetest
where oDay + 1 <= #finishno
)
select * into #t1
from ctetest option (maxrecursion 0)
select [oVal] = t1.[n]
,[aProd] = avg(t1.[prod])
,[sProd] = sum(t1.[prod]) * 24
from #t1 as t1
group by t1.[n]
order by t1.[n] desc

Incremental Group BY

How I can achieve incremental grouping in query ?
I need to group by all the non-zero values into different named groups.
Please help me write a query based on columns date and subscribers.
If you have SQL Server 2012 or newer, you can use few tricks with windows functions to get this kind of grouping without cursors, with something like this:
select
Date, Subscribers,
case when Subscribers = 0 then 'No group'
else 'Group' + convert(varchar, GRP) end as GRP
from (
select
Date, Subscribers,
sum (GRP) over (order by Date asc) as GRP
from (
select
*,
case when Subscribers > 0 and
isnull(lag(Subscribers) over (order by Date asc),0) = 0 then 1 else 0 end as GRP
from SubscribersCountByDay S
) X
) Y
Example in SQL Fiddle
In general I advocate AGAINST cursors but in this case it ill not hurt since it ill iterate, sum up and do the conditional all in one pass.
Also note I hinted it with FAST_FORWARD to not degrade performance.
I'm guessing you do want what #HABO commented.
See the working example below, it just sums up until find a ZERO, reset and starts again. Note the and #Sum > 0 handles the case where the first row is ZERO.
create table dbo.SubscribersCountByDay
(
[Date] date not null
,Subscribers int not null
)
GO
insert into dbo.SubscribersCountByDay
([Date], Subscribers)
values
('2015-10-01', 1)
,('2015-10-02', 2)
,('2015-10-03', 0)
,('2015-10-04', 4)
,('2015-10-05', 5)
,('2015-10-06', 0)
,('2015-10-07', 7)
GO
declare
#Date date
,#Subscribers int
,#Sum int = 0
,#GroupId int = 1
declare #Result as Table
(
GroupName varchar(10) not null
,[Sum] int not null
)
declare ScanIt cursor fast_forward
for
(
select [Date], Subscribers
from dbo.SubscribersCountByDay
union
select '2030-12-31', 0
) order by [Date]
open ScanIt
fetch next from ScanIt into #Date, #Subscribers
while ##FETCH_STATUS = 0
begin
if (#Subscribers = 0 and #Sum > 0)
begin
insert into #Result (GroupName, [Sum]) values ('Group ' + cast(#GroupId as varchar(6)), #Sum)
set #GroupId = #GroupId + 1
set #Sum = 0
end
else begin
set #Sum = #Sum + #Subscribers
end
fetch next from ScanIt into #Date, #Subscribers
end
close ScanIt
deallocate ScanIt
select * from #Result
GO
For the OP: Please next time write the table, just posting an image is lazy
In a version of SQL Server modern enough to support CTEs you can use the following cursorless query:
-- Sample data.
declare #SampleData as Table ( Id Int Identity, Subscribers Int );
insert into #SampleData ( Subscribers ) values
-- ( 0 ), -- Test edge case when we have a zero first row.
( 200 ), ( 100 ), ( 200 ),
( 0 ), ( 0 ), ( 0 ),
( 50 ), ( 50 ), ( 12 ),
( 0 ), ( 0 ),
( 43 ), ( 34 ), ( 34 );
select * from #SampleData;
-- Run the query.
with ZerosAndRows as (
-- Add IsZero to indicate zero/non-zero and a row number to each row.
select Id, Subscribers,
case when Subscribers = 0 then 0 else 1 end as IsZero,
Row_Number() over ( order by Id ) as RowNumber
from #SampleData ),
Groups as (
-- Add a group number to every row.
select Id, Subscribers, IsZero, RowNumber, 1 as GroupNumber
from ZerosAndRows
where RowNumber = 1
union all
select FAR.Id, FAR.Subscribers, FAR.IsZero, FAR.RowNumber,
-- Increment GroupNumber only when we move from a non-zero row to a zero row.
case when Groups.IsZero = 1 and FAR.IsZero = 0 then Groups.GroupNumber + 1 else Groups.GroupNumber end
from ZerosAndRows as FAR inner join Groups on Groups.RowNumber + 1 = FAR.RowNumber
)
-- Display the results.
select Id, Subscribers,
case when IsZero = 0 then 'no group' else 'Group' + Cast( GroupNumber as VarChar(10) ) end as Grouped
from Groups
order by Id;
To see the intermediate results just replace the final select with select * from FlagsAndRows or select * from Groups.

Recursive UNION ALL to parse string taking very long

I am trying to speed up this recursive UNION ALL as shown below, but I cannot think how to do it. Maybe a while loop but I am not sure. The movement data is stored as one long string of encoded movement data and the script recursively calls the select statement to parse/extract this data and then it is all casted.
I would really like to understand more about speeding up recursive union all's or finding another way. I don't believe indexing is a problem so this is not really a possible solution.
"RouteData" is the long string that is parsed by fixed length intervals.
Here is a sample of the encoded data:
ScenarioPID : 3
LegID :1
RoutePart : 0x0000000000000000000100000000000000000000000000
RouteData : 0x40323AAAAAAAAAAB00013FA6FFD663CCA3310000001F00403 ... (goes on)
cnt : 37
sequence : 1
StartTime : 8828
The final output data looks like this for one track.
ScenarioPID LegID sequence TrackID Offset TimeOffset Length StartTime
3 1 1 1 0 0 6300 8828
3 1 2 1 0.0449 31 6300 8828
3 1 3 1 0.8942 325 6300 8828
3 1 4 1 0.9736 356 6300 8828
3 1 5 1 1 369 6300 8828
USE nss_demo;
DECLARE #scenario1 INT;
DECLARE #DAY_START INT;
DECLARE #DAY_END INT;
DECLARE #TRAIN_TYPE VARCHAR(50);
DECLARE #TRACK_TYPE VARCHAR(50);
SET #scenario1 = 3;
SET #DAY_START = 0;
SET #DAY_END = 7;
SET #TRAIN_TYPE = 'Empty Train';
SET #TRACK_TYPE = 'East Track';
DECLARE #KM_START INT;
DECLARE #KM_END INT;
SET #KM_START = 0;
SET #KM_END = 200;
WITH movement
AS (SELECT m.scenariopid,
m.legid,
Substring(routedata, 1, 23) AS RoutePart,
Substring(routedata, 24, Len(routedata) - 23) AS RouteData,
Len(routedata) / 23 - 1 AS cnt,
1 AS sequence,
m.starttime
FROM output.movement m
WHERE scenariopid = #scenario1
AND m.starttime BETWEEN ( #DAY_START * 86400 ) AND
( #DAY_END * 86400 )
UNION ALL
SELECT scenariopid,
legid,
Substring(m1.routedata, 1, 23) AS RoutePart
,
Substring(m1.routedata, 24,
Len(m1.routedata) - 23) AS RouteData,
Len(m1.routedata) / 23 - 1 AS cnt,
sequence + 1 AS sequence,
m1.starttime
FROM movement m1
WHERE m1.cnt > 0),
casttable
AS (SELECT tt.scenariopid,
tt.legid,
tt.sequence,
tt.trackid,
tt.offset,
tt.timeoffset,
tr.[length],
tt.starttime
FROM (SELECT scenariopid,
legid,
sequence,
Cast(trackidbin AS SMALLINT) AS TrackID,
Sign(Cast(offsetbin AS BIGINT)) *
( 1.0 +
( Cast(offsetbin AS BIGINT) & 0x000FFFFFFFFFFFFF ) *
Power(Cast(2 AS FLOAT), -52) )
*
Power(Cast(2 AS FLOAT), ( Cast(offsetbin AS BIGINT) &
0x7ff0000000000000
) /
0x0010000000000000
- 1023) AS Offset,
Cast(timebin AS INT) AS TimeOffset,
starttime AS StartTime
FROM (SELECT legid,
scenariopid,
sequence,
Substring(routepart, 9, 2) AS TrackIDBin,
Substring(routepart, 11, 8) AS OffsetBin,
Substring(routepart, 19, 4) AS TimeBin,
starttime
FROM movement) t) tt
INNER JOIN input.track tr
ON tr.trackid = tt.trackid
AND tr.scenariopid = tt.scenariopid)
SELECT *
FROM casttable
ORDER BY legid,
sequence
OPTION (maxrecursion 20000)
Use a Numbers Table (zero-based assumed below) to create CTE movement like this:
WITH movement
AS (SELECT m.scenariopid,
m.legid,
Substring(routedata, n.N*23 + 1, 23) AS RoutePart,
n.N AS cnt,
-- 1 AS sequence, -- use a row_number function here instead, as per your vendor.
m.starttime
FROM output.movement m
JOIN Numbers n
on n < Len(routedata) / 23
WHERE scenariopid = #scenario1
AND m.starttime BETWEEN ( #DAY_START * 86400 ) AND
( #DAY_END * 86400 )
),
-- etc.
If you don't have a static Numbers Table, my answer here demonstrates how to create one dynamically in a CTE.

SQL Data Sampling

We have had a request to provide some data to an external company.
They require only a sample of data, simple right? wrong.
Here is their sampling criteria:
Total Number of records divided by 720 (required sample size) - this gives sampling interval (if result is a fraction, round down to next whole number).
Halve the sampling interval to get the starting point.
Return each record by adding on the sampling interval.
EXAMPLE:
10,000 Records - Sampling interval = 13 (10,000/720)
Starting Point = 6 (13/2 Rounded)
Return records 6, 19 (6+13), 32 (19+13), 45 (32+13) etc.....
Please can someone tell me how (if) something like this is possible in SQL.
If you have use of ROW_NUMBER(), then you can do this relatively easily.
SELECT
*
FROM
(
SELECT
ROW_NUMBER() OVER (ORDER BY a, b, c, d) AS record_id,
*
FROM
yourTable
)
AS data
WHERE
(record_id + 360) % 720 = 0
ROW_NUMBER() gives all your data a sequential identifier (this is important as the id field must both be unique and NOT have ANY gaps). It also defines the order you want the data in (ORDER BY a, b, c, d).
With that id, if you use Modulo (Often the % operator), you can test if the record is the 720th record, 1440th record, etc (because 720 % 720 = 0).
Then, if you offset your id value by 360, you can change the starting point of your result set.
EDIT
After re-reading the question, I see you don't want every 720th record, but uniformly selected 720 records.
As such, replace 720 with (SELECT COUNT(*) / 720 FROM yourTable)
And replace 360 with (SELECT (COUNT(*) / 720) / 2 FROM yourTable)
EDIT
Ignoring the rounding conditions will allow a result of exactly 720 records. This requires using non-integer values, and the result of the modulo being less than 1.
WHERE
(record_id + (SELECT COUNT(*) FROM yourTable) / 1440.0)
%
((SELECT COUNT(*) FROM yourTable) / 720.0)
<
1.0
declare #sample_size int, #starting_point int
select #sample_size = 200
select top (#sample_size) col1, col2, col3, col4
from (
select *, row_number() over (order by col1, col2) as row
from your_table
) t
where (row % ((select count(*) from your_table) / #sample_size)) - (select count(*) from your_table) / #sample_size / 2) = 0
It's going to work in SQL Server 2005+.
TOP (#variable) is used to limit rows (where condition because of integers rounding might not be enough, may return more rows then needed) and ROW_NUMBER() to number and order rows.
Working example: https://data.stackexchange.com/stackoverflow/query/62315/sql-data-sampling below code:
declare #tab table (id int identity(1,1), col1 varchar(3), col2 varchar(3))
declare #i int
set #i = 0
while #i <= 1000
begin
insert into #tab
select 'aaa', 'bbb'
set #i = #i+1
end
declare #sample_size int
select #sample_size = 123
select ((select count(*) from #tab) / #sample_size) as sample_interval
select top (#sample_size) *
from (
select *, row_number() over (order by col1, col2, id desc) as row
from #tab
) t
where (row % ((select count(*) from #tab) / #sample_size)) - ((select count(*) from #tab) / #sample_size / 2) = 0
SQL server has in-built function for it.
SELECT FirstName, LastName
FROM Person.Person
TABLESAMPLE (10 PERCENT) ;
You can use rank to get a row-number. The following code will create 10000 records in a table, then select the 6th, 19th, 32nd, etc, for a total of 769 rows.
CREATE TABLE Tbl (
Data varchar (255)
)
GO
DECLARE #i int
SET #i = 0
WHILE (#i < 10000)
BEGIN
INSERT INTO Tbl (Data) VALUES (CONVERT(varchar(255), NEWID()))
SET #i = #i + 1
END
GO
DECLARE #interval int
DECLARE #start int
DECLARE #total int
SELECT #total = COUNT(*),
#start = FLOOR(COUNT(*) / 720) / 2,
#interval = FLOOR(COUNT(*) / 720)
FROM Tbl
PRINT 'Start record: ' + CAST(#start as varchar(10))
PRINT 'Interval: ' + CAST(#interval as varchar(10))
SELECT rank, Data
FROM (
SELECT rank()
OVER (ORDER BY t.Data) as rank, t.Data AS Data
FROM Tbl t) q
WHERE ((rank + 1) + #start) % #interval = 0