What is the easiest way to print all prime numbers until n, in SQL?

What is the easiest way to print all prime numbers until n, in SQL? - sql

What kind of algorithm should I use, for printing all prime numbers until let's say 1000?
SQL Server is preferred.
Thanks

You could use this to list all primes that smaller than 1000 in SQL
;WITH temp AS
(
SELECT 2 AS Value
UNION ALL
SELECT t.Value+1 AS VAlue
FROM temp t
WHERE t.Value < 1000
)
SELECT *
FROM temp t
WHERE NOT EXISTS
( SELECT 1 FROM temp t2
WHERE t.Value % t2.Value = 0
AND t.Value != t2. Value
)
OPTION (MAXRECURSION 0)
Demo link: Rextester

This code I've written for SQL Server is very fast, it will find all the primes less than 10 million in about 12 seconds:
DECLARE #Min int = 2, #Max int = 100000
--
IF OBJECT_ID('tempdb..#N','U') IS NOT NULL DROP TABLE #N
--
CREATE TABLE #N(N int NOT NULL, SqrtN int NOT NULL);
--
WITH L0 AS (SELECT 'Anything' N FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS T(N)), -- 16 values
L1 AS (SELECT A.N FROM L0 A, L0 B, L0 C, L0 D, L0 E, L0 F, L0 G, L0 H), -- 15^8 values (2562890625 more than enough for max value of int (2^31-1)
L2 AS (SELECT TOP(#Max/6) CONVERT(int,6*ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) RowNum FROM L1)
INSERT INTO #N(N, SqrtN)
SELECT T.N, SQRT(N)
FROM L2
CROSS APPLY(VALUES(L2.RowNum-1),(L2.RowNum+1)) T(N)
WHERE T.N BETWEEN #Min AND #Max
AND 0 NOT IN (N%5,N%7,N%11,N%13,N%17,N%19,N%23,N%29,N%31,N%37,N%41,N%43,N%47,N%53,N%59) -- Not interested in anything dividable by these low primes
--
ALTER TABLE #N ADD PRIMARY KEY CLUSTERED(N) WITH FILLFACTOR = 100
--
IF OBJECT_ID('tempdb..#Primes','U') IS NOT NULL DROP TABLE #Primes
--
SELECT Z.N Prime
FROM (SELECT N FROM (VALUES(2),(3),(5),(7),(11),(13),(17),(19),(23),(29),(31),(37),(41),(43),(47),(53),(59)) T(N)
WHERE T.N BETWEEN #Min AND #Max
UNION ALL
SELECT X.N
FROM #N AS X
WHERE NOT EXISTS(SELECT *
FROM #N AS C
WHERE C.N <= X.SqrtN
AND 0 = X.N%C.N)) Z
ORDER BY 1

The answer is simple:
Assuming you already have following table populated with data:
CREATE TABLE dbo.PrimeNumber(Num INT NOT NULL PRIMARY KEY);
you'll need only a simple SELECT:
SELECT * FROM dbo.PrimeNumber

This is my answer
I did it in SQL Server
DECLARE #number INT = 4
DECLARE #isprime INT = 0
DECLARE #counter INT
DECLARE #result VARCHAR(MAX) = '2&3&'
WHILE (#number <= 1000)
BEGIN
SET #counter = 2
WHILE (#counter <= CAST(SQRT(#number) as INT))
BEGIN
IF (#number % #counter = 0)
BEGIN
SET #isprime = 0
Break
END
ELSE
BEGIN
SET #isprime = 1
SET #counter += 1
END
END
IF #isprime = 1
SET #result += CAST(#number as VARCHAR(6)) + '&'
SET #number += 1
END
PRINT(LEFT(#result, len(#result)-1))
This is the output
2&3&5&7&11&13&17&19&23&29&31&37&41&43&47&53&59&61&67&71&73&79&83&89&97&101&103&107&109&113&127&131&137&139&149&151&157&163&167&173&179&181&191&193&197&199&211&223&227&229&233&239&241&251&257&263&269&271&277&281&283&293&307&311&313&317&331&337&347&349&353&359&367&373&379&383&389&397&401&409&419&421&431&433&439&443&449&457&461&463&467&479&487&491&499&503&509&521&523&541&547&557&563&569&571&577&587&593&599&601&607&613&617&619&631&641&643&647&653&659&661&673&677&683&691&701&709&719&727&733&739&743&751&757&761&769&773&787&797&809&811&821&823&827&829&839&853&857&859&863&877&881&883&887&907&911&919&929&937&941&947&953&967&971&977&983&991&997

with prime
as
(
select 1 as 'start'
union all
select start+1 'start'
from prime where start<100
)
select e as prime_value from
(select a.start%b.start as w, a.start as e from prime A , Prime B
where --a.start% b.start<>0 and
b.start<a.start
--and a.start between 1 and 100
)A
where w=0
group by A.e
having count(w) <=1
BY Nagaraj M-BE

DECLARE #range int = 1000, #x INT = 2, #y INT = 2
While (#y <= #range)
BEGIN
while (#x <= #y)
begin
IF ((#y%#x) =0)
BEGIN
IF (#x = #y)
PRINT #y
break
END
IF ((#y%#x)<>0)
set #x = #x+1
end
set #x = 2
set #y = #y+1
end

Related

How to select only armstrong numbers from the list?

I want is to select Armstrong numbers from the list below list I have searched of solution of this question bu unable to find in SQL-Server:
Numbers
121
113
423
153
541
371
I am sure most of you know what's the Armstrong number and how to calculate though I am describing is for the simplicity : sum of the cubes of its digits is equal to the number itself i.e.
1*1*1 + 5*5*5 + 3*3*3 = 153
3*3*3 + 7*7*7 + 1*1*1 = 371
Please help me on this as I am also trying but seeking for quick solution. It will be very helpful to me. Thanks in advance.

Obviously static processing during each query is not correct approach but we can create function like this and
create function dbo.IsArmstrongNumber(#n int)
returns int as
begin
declare #retValue int = 0
declare #sum int = 0
declare #num int = #n
while #num > 0
begin
set #sum += (#num%10) * (#num%10) * (#num%10)
set #num = #num/10
end
IF #sum = #n
set #retValue = 1
return #retValue
end
Pre-processing and selecting in IN clause is better
select * from #Numbers where dbo.IsArmstrongNumber(n) = 1

select 153 x into #temp;
insert #temp values(371);
insert #temp values(541);
with cte as (select x, substring(cast(x as nvarchar(40)) ,1,1) as u, 1 as N FROM #temp
union all
select x, substring(cast(x as nvarchar(40)),n+1,1) as u , n+1 from cte where len(cast(x as nvarchar(40))) > n
)
select x from cte group by x having SUM(POWER(cast(u as int),3)) = x
drop table #temp;
here is the mark 2 - you can change the #ORDER to explore power of 4,5 etc
declare #order int = 3;
declare #limit int = 50000;
with nos as (select 1 no
union all
select no + 1 from nos where no < #limit),
cte as (select no as x, substring(cast(no as nvarchar(40)) ,1,1) as u, 1 as N FROM nos
union all
select x, substring(cast(x as nvarchar(40)),n+1,1) as u , n+1 from cte where len(cast(x as nvarchar(40))) > n
)
select x from cte group by x having SUM(POWER(cast(u as int),#order)) = x
option (maxrecursion 0);

This is a quick mod to my sum of digits UDF
Declare #Table table (Numbers int)
Insert into #Table values
(121),
(113),
(423),
(153),
(541),
(371)
Select * from #Table where [dbo].[udf-Stat-Is-Armstrong](Numbers)=1
Returns
Numbers
153
371
The UDF
CREATE Function [dbo].[udf-Stat-Is-Armstrong](#Val bigint)
Returns Bit
As
Begin
Declare #RetVal as bigint
Declare #LenInp as bigint = len(cast(#Val as varchar(25)))
;with i AS (
Select #Val / 10 n, #Val % 10 d
Union ALL
Select n / 10, n % 10
From i
Where n > 0
)
Select #RetVal = IIF(SUM(power(d,#LenInp))=#Val,1,0) FROM i;
Return #RetVal
End

You can use the following to find Armstrong numbers using Sql functions:
WITH Numbers AS(
SELECT 0 AS number UNION ALL SELECT number + 1 FROM Numbers WHERE number < 10000)
SELECT number AS ArmstrongNumber FROM Numbers
WHERE
number = POWER(COALESCE(SUBSTRING(CAST(number AS VARCHAR(10)),1,1),0),3)
+ POWER(COALESCE(SUBSTRING(CAST(number AS VARCHAR(10)),2,1),0),3)
+ POWER(COALESCE(SUBSTRING(CAST(number AS VARCHAR(10)),3,1),0),3)
OPTION(MAXRECURSION 0)

Complicated SQL while loop

I am trying to create a while loop in SQL and it seems kind of complex. Here's what I need it to achieve:
Iterate through a single VARCHAR string (ex. '123')
If the nth character is in an even position in the string (ex. 2nd, 4th .... letter in the string), it must be added(SUM) to a base variable (Let's assume #z)
If the nth character is in an odd position in the string (ex. 1st, 3rd .... letter in the string), it must be multiplied by 2. If this newly generated value (Let's assume #y) is less than 10, it must be added(SUM) to the base variable (Still the same assumed #z). If #y is greater than 10, we need to subtract 9 from #y before adding(SUM) it to #z
After iterating through the entire string, this should return a numeric value generated by the above process.
Here is what I've done so far, but I'm stuck now (Needless to say, this code does not work yet, but I think I'm heading in the right direction):
DECLARE #x varchar(20) = '12345'
DECLARE #p int = len(#x)
WHILE #p > 0
SELECT #x =
stuff(#x, #p, 1,
case when CONVERT(INT,substring(#x, #p, 1)) % 2 = 0 then CONVERT(INT, #x) + CONVERT(INT,substring(#x, #p, 1))
end), #p -= 1
RETURN #x;
PS. The input will always be 100% numeric values, but it is formatted as VARCHAR when I recieve it.
UPDATE
The expected result for the sample string is 15

You can do this without using a loop. Here is a solution using Tally Table:
DECLARE #x VARCHAR(20) = '12345'
DECLARE #z INT = 0 -- base value
;WITH E1(N) AS( -- 10 ^ 1 = 10 rows
SELECT 1 FROM(VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))t(N)
),
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b), -- 10 ^ 2 = 100 rows
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b), -- 10 ^ 4 = 10,000 rows
CteTally(N) AS(
SELECT TOP(LEN(#x)) ROW_NUMBER() OVER(ORDER BY(SELECT NULL))
FROM E4
),
CteChars(N, num) AS(
SELECT
t.N, CAST(SUBSTRING(#x, t.N, 1) AS INT)
FROM CteTally t
WHERE t.N <= LEN(#x)
)
SELECT
SUM(
CASE
WHEN N % 2 = 0 THEN num
WHEN num * 2 < 10 THEN num * 2
ELSE (num * 2) - 9
END
) + #z
FROM CteChars
The CTEs up to CteTally generates a list of number from 1 to LEN(#x). CteChars breaks #x character by character into separate rows. Then the final SELECT does a SUM based on the conditions.
OUTPUT : 15

Check below if it helps you
DECLARE #x varchar(20) = '12345'
DECLARE #p int = 1
DECLARE #result bigint=0;
DECLARE #tempval int =0;
WHILE #p <= len(#x)
BEGIN
SET #tempval = CONVERT(INT,substring(#x, #p, 1));
if(#p%2 = 1)
BEGIN
SET #tempval = #tempval * 2;
IF(#tempval >= 10) SET #tempval = #tempval - 9;
END
SET #result = #result + #tempval;
SET #p = #p + 1;
END;
PRINT #result;--This is the result
RETURN #x;

DECLARE #x INT = 12345
DECLARE #p int = len(#x)
DECLARE #z INT =0
PRINT #p%2
SET #x=#x/10
PRINT #x
WHILE #p > 0
BEGIN
IF(#p%2 = 0)
BEGIN
SET #z=#z+#x%10
SET #p=#p-1
SET #x=#x/10
END
ELSE
BEGIN
SET #z=#z+(2*(#x%10))
SET #p=#p-1
SET #x=#x/10
IF(#x>=10)
BEGIN
SET #x=(#x/10+#x%10)
END
END
END
SELECT #z

The while loop does not seem necessary here.
This can be achieved with a CTE that will split the string and a case statement:
DECLARE #x varchar(20) = '12345';
with split(id, v) as (
select 0, cast(0 as tinyint)
union all
select id+1, cast(SUBSTRING(#x, id+1, 1) as tinyint)
From split
Where id+1 <= len(#x)
)
Select Result = SUM(
Case When id % 2 = 0 then v
When v < 5 then v*2
Else (v*2)-9
End
)
From split
output = 15

SQL: Validate a quantity given a comma delimited set of ranges

I need to 'validate' a quantity. Given any number x, return true or false if the number is contained in a comma delimited set of ranges and numbers. Example: valid numbers "1,5-10,25-50,100,500", some valid numbers would be 1,5,6,7,8,9,10, but not 11,12,51, etc.

If you convert the string into a temporary table of min and max values, you can easily select the valid values.
Example in T-SQL (MS SQL Server):
declare #valid varchar(50) = '1,5-10,25-50,100,500'
declare #i int, #range varchar(10)
declare #t table(min int, max int)
while len(#valid) > 0 begin
set #i = charindex(',', #valid)
if #i = 0 begin
set #range = #valid
set #valid = ''
end else begin
set #range = left(#valid, #i - 1)
set #valid = right(#valid, len(#valid) - #i)
end
set #i = charindex('-', #range)
if #i = 0 begin
insert into #t (min, max) values (cast(#range as int), cast(#range as int))
end else begin
insert into #t (min, max) values (cast(left(#range, #i - 1) as int), cast(right(#range, len(#range) - #i) as int))
end
end
select
n
from
(values(1),(5),(6),(7),(8),(9),(10),(11),(12),(51)) as x (n)
inner join #t on n between min and max

Check this (SQL Server 2008)
CREATE FUNCTION [dbo].[RunningNumbers](#anzahl INT=1000000, #StartAt INT=0)
RETURNS TABLE
AS
RETURN
WITH E1(N) AS(SELECT 1 FROM(VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))t(N)), --10 ^ 1
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b), -- 10 ^ 2 = 100 rows
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b), -- 10 ^ 4 = 10,000 rows
E8(N) AS(SELECT 1 FROM E4 a CROSS JOIN E4 b), -- 10 ^ 8 = 10,000,000 rows
CteTally AS
(
SELECT TOP(ISNULL(#anzahl,1000000)) ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) -1 + ISNULL(#StartAt,0) As Nmbr
FROM E8
)
SELECT * FROM CteTally;
GO
CREATE FUNCTION dbo.CheckRange
(
#ValidTarget INT
,#rangeList VARCHAR(MAX)
)
RETURNS BIT
AS
BEGIN
DECLARE #rangeParts XML=CAST('<root><r>'+REPLACE(#rangeList,',','</r><r>') + '</r></root>' AS XML);
DECLARE #Count INT;
SELECT #Count= COUNT(*)
FROM
(
SELECT Valid.X
FROM #rangeParts.nodes('/root/r') AS rp(p)
CROSS APPLY
(
SELECT CASE WHEN CHARINDEX('-',p.value('.','varchar(max)'))>0
THEN CAST('<root><r>'+REPLACE(p.value('.','varchar(max)'),'-','</r><r>') + '</r></root>' AS XML)
ELSE '<root><r>' + p.value('.','varchar(max)') + '</r><r>' + p.value('.','varchar(max)') + '</r></root>' END AS pr
) AS partsResolved
CROSS APPLY
(
SELECT CASE WHEN #ValidTarget BETWEEN partsResolved.pr.value('(/root/r)[1]','int') AND partsResolved.pr.value('(/root/r)[2]','int') THEN 1 ELSE 0 END AS X
) AS Valid
) AS tbl
WHERE tbl.X=1
RETURN #Count;
END
GO
SELECT Nmbr,dbo.CheckRange(Nmbr,'1,5-10,25-50,100,500')
FROM dbo.RunningNumbers(70,-3)
DROP FUNCTION dbo.RunningNumbers;
GO
DROP FUNCTION dbo.CheckRange;

Multiple Linear Regression function in SQL Server

I have developed Simple Linear regression function in SQL Server from here (https://ask.sqlservercentral.com/questions/96778/can-this-linear-regression-algorithm-for-sql-serve.html) to calculate Alpha,Beta and some extra values like Upper 95% and Lower 95%.
The Simple Linear regression takes the argument as X and y.
Now I am in need of perform Multiple Linear regression SQL Server, which takes arguments y and X1,X2,X3,.....Xn
Hence the Output will be as follows:
Coefficients Standard Error t Stat P-value Lower 95% Upper 95%
+-------------------------------------------------------------------------------------------+
Intercept -23.94650812 19.85250194 -1.20622117 0.351059563 -109.3649298
X Variable 1 0.201064291 0.119759437 1.678901439 0.235179 -0.314218977
X Variable 2 -0.014046021 0.037366638 -0.375897368 0.743119791 -0.174821687
X Variable 3 0.502074905 0.295848189 1.697069389 0.231776287 -0.770857111
X Variable 4 0.068238344 0.219256527 0.311226057 0.785072958 -0.875146351
Anyone can please suggest me a good way to achieve this.

I would look at using CLR integration to take advantage of an existing .NET library supporting Linear Regression, for example Math.NET Numerics. Using a CLR stored procedure you would be able to read the data out of a table, transform it to the .NET libraries matrix type, run the regression, then either write the results back to a table or return a row set directly.
But just for fun here is Linear Least Squares solved via Orthogonal Decomposition using Householder reflections in SQL. (Warning will run slowly on any significant amount of data.)
-- Create a type to repsent a 2D Matrix
CREATE TYPE dbo.Matrix AS TABLE (i int, j int, Aij float, PRIMARY KEY (i, j))
GO
-- Function to perform QR factorisation ie A -> QR
CREATE FUNCTION dbo.QRDecomposition (
#matrix dbo.Matrix READONLY
)
RETURNS #result TABLE (matrix char(1), i int, j int, Aij float)
AS
BEGIN
DECLARE #m int, #n int, #i int, #j int, #a float
SELECT #m = MAX(i), #n = MAX(j)
FROM #matrix
SET #i = 1
SET #j = 1
DECLARE #R dbo.Matrix
DECLARE #Qj dbo.Matrix
DECLARE #Q dbo.Matrix
-- Generate a #m by #m Identity Matrix to transform to Q, add more numbers for m > 1000
;WITH e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b),
e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2),
numbers(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY n) FROM e3)
INSERT INTO #Q (i, j, Aij)
SELECT i.n, j.n, CASE WHEN i.n = j.n THEN 1 ELSE 0 END
FROM numbers i
CROSS JOIN numbers j
WHERE i.n <= #m AND j.n <= #m
-- Copy input matrix to be transformed to R
INSERT #R (i, j, Aij)
SELECT i, j, Aij
FROM #matrix
-- Loop performing Householder reflections
WHILE #j < #n OR (#j = #n AND #m > #n) BEGIN
SELECT #a = SQRT(SUM(Aij * Aij))
FROM #R
WHERE j = #j
AND i >= #i
SELECT #a = -SIGN(Aij) * #a
FROM #R
WHERE j = #j AND i = #j + (#j - 1)
;WITH u (i, j, Aij) AS (
SELECT i, 1, u.ui
FROM (
SELECT i, CASE WHEN i = j THEN Aij + #a ELSE Aij END AS ui
FROM #R
WHERE j = #j
AND i >= #i
) u
)
INSERT #Qj (i, j, Aij)
SELECT i, j, CASE WHEN i = j THEN 1 - 2 * Aij ELSE - 2 * Aij END as Aij
FROM (
SELECT u.i, ut.i AS j, u.Aij * ut.Aij / (SELECT SUM(Aij * Aij) FROM u) AS Aij
FROM u u
CROSS JOIN u ut
) vvt
-- Apply inverse Householder reflection to Q
UPDATE Qj
SET Aij = [Qj+1].Aij
FROM #Q Qj
INNER JOIN (
SELECT Q.i, QjT.j, SUM(QjT.Aij * Q.Aij) AS Aij
FROM #Q Q
INNER JOIN (
SELECT i AS j, j AS i, Aij
FROM #Qj
) QjT ON QjT.i = Q.j
GROUP BY Q.i, QjT.j
) [Qj+1] ON [Qj+1].i = Qj.i AND [Qj+1].j = Qj.j
-- Apply Householder reflections to R
UPDATE Rj
SET Aij = [Rj+1].Aij
FROM #R Rj
INNER JOIN (
SELECT Qj.i, R.j, SUM(Qj.Aij * R.Aij) AS Aij
FROM #Qj Qj
INNER JOIN #R R ON R.i = Qj.j
GROUP BY Qj.i, R.j
) [Rj+1] ON [Rj+1].i = Rj.i AND [Rj+1].j = Rj.j
-- Prepare Qj for next Householder reflection
UPDATE #Qj
SET Aij = CASE WHEN i = j THEN 1 ELSE 0 END
WHERE i <= #j OR j <= #j
DELETE FROM #Qj WHERE i > #j AND j > #j
SET #j = #j + 1
SET #i = #i + 1
END
-- Output Q
INSERT #result (matrix, i, j, Aij)
SELECT 'Q', i, j, Aij
FROM #Q
-- Output R
INSERT #result (matrix, i, j, Aij)
SELECT 'R', i, j, Aij
FROM #R
RETURN
END
GO
-- Function to perform linear regression
CREATE FUNCTION dbo.MatrixLeastSquareRegression (
#X dbo.Matrix READONLY
, #y dbo.Matrix READONLY
)
RETURNS #b TABLE (i int, j int, Aij float)
AS
BEGIN
DECLARE #QR TABLE (matrix char(1), i int, j int, Aij float)
INSERT #QR(matrix, i, j, Aij)
SELECT matrix, i, j, Aij
FROM dbo.QRDecomposition(#X)
DECLARE #Qty dbo.Matrix
-- #Qty = Q'y
INSERT INTO #Qty(i, j, Aij)
SELECT a.j, b.j, SUM(a.Aij * b.Aij)
FROM #QR a
INNER JOIN #y b ON b.i = a.i
WHERE a.matrix = 'Q'
GROUP BY a.j, b.j
DECLARE #m int, #n int, #i int, #j int, #a float
SELECT #m = MAX(j)
FROM #QR R
WHERE R.matrix = 'R'
SET #i = #m
-- Solve Rb = Q'y via back substitution
WHILE #i > 0 BEGIN
INSERT #b (i, j, Aij)
SELECT R.i, 1, ( y.Aij - ISNULL(sumKnown.Aij, 0) ) / R.Aij
FROM #QR R
INNER JOIN #Qty y ON y.i = R.i
LEFT JOIN (
SELECT SUM(R.Aij * ISNULL(b.Aij, 0)) AS Aij
FROM #QR R
INNER JOIN #b b ON b.i = R.j
WHERE R.matrix = 'R'
AND R.i = #i
) sumKnown ON 1 = 1
WHERE R.matrix = 'R'
AND R.i = #i
AND R.j = #i
SET #i = #i - 1
END
RETURN
END
GO
Here is a test script/example of usage:
DECLARE #TestData TABLE (i int IDENTITY(1, 1), X1 float, X2 float, X3 float, X4 float, y float)
DECLARE #c float
DECLARE #b1 float
DECLARE #b2 float
DECLARE #b3 float
DECLARE #b4 float
-- bs are the target coefficiants
SET #c = RAND()
SET #b1 = 2 * RAND()
SET #b2 = 3 * RAND()
SET #b3 = 4 * RAND()
SET #b4 = 5 * RAND()
-- Generate some test data, calcualte y from c + Xb plus some noise: y = c + Xb + e
-- Note: Using RAND() for e is not nomrally ditributed noise as linear regression assumes, this will mess with the estimate of c
DECLARE #k int = 1
WHILE #k < 50 BEGIN
INSERT #TestData(X1, X2, X3, X4, y)
SELECT x1, x2, x3, x4, #c + x1 * #b1 + x2 * #b2 + x3 * #b3 + x4 * #b4 + 0.2 * RAND()
FROM (
SELECT RAND() AS x1, RAND() AS x2, RAND() AS x3, RAND() AS x4
) X
SET #k = #k + 1
END
-- Put our data into dbo.Matrix types
DECLARE #X dbo.Matrix
INSERT #X (i, j, Aij)
-- Extra column for constant
SELECT i, 1, 1
FROM #TestData
UNION
SELECT i, 2, X1
FROM #TestData
UNION
SELECT i, 3, X2
FROM #TestData
UNION
SELECT i, 4, X3
FROM #TestData
UNION
SELECT i, 5, X4
FROM #TestData
DECLARE #y dbo.Matrix
INSERT #y (i, j, Aij)
SELECT i, 1, y
FROM #TestData
-- Estimates for coefficient values
DECLARE #bhat dbo.Matrix
INSERT #bhat (i, j, Aij)
SELECT i, j, Aij
FROM dbo.MatrixLeastSquareRegression(#X, #y)
SELECT CASE i
WHEN 1 THEN #c
WHEN 2 THEN #b1
WHEN 3 THEN #b2
WHEN 4 THEN #b3
WHEN 5 THEN #b4
END AS b
, Aij AS best
FROM #bhat
SELECT y.Aij AS y, Xb.Aij AS yest
FROM (
SELECT x.i, SUM(x.Aij * bh.Aij) AS Aij
FROM #X x
INNER JOIN #bhat bh ON bh.i = x.j
GROUP BY x.i
) Xb
INNER JOIN #y y ON y.i = Xb.i
SELECT SUM(SQUARE(y.Aij - Xb.Aij)) / COUNT(*) AS [Variance]
FROM (
SELECT x.i, SUM(x.Aij * bh.Aij) AS Aij
FROM #X x
INNER JOIN #bhat bh ON bh.i = x.j
GROUP BY x.i
) Xb
INNER JOIN #y y ON y.i = Xb.i

While I applaud the effort of writing pure SQL functions that can perform all sorts of advanced statistical calculations, SQL is simply not the best language to solve these kinds of problems in.
CLR is definitely an option (as suggested by David Manning), and compared to pure SQL, it will most likely perform much better with this particular problem.
A different route is to use a statistical language. I would recommend R. It has built-in packages for reading and writing data to SQL Server, and a multitude of functions for performing all kinds of regression. Best of all: It's free! Here is an excellent introductory article to getting started with R and performing statistical analysis on data from SQL Server 2012.

Why not use LinearRegression DataMining Algorithm from Analysis Services ((though it is naturally a descision tree adapted to linear regression)? You just need to design correct a mining Model for it.
HINT: no OLAP cube is needed, you can design it from relational tables/views
Analysis Services functionaly is included in Standard Edition of SQL Server and higher
After processing the model you can query it with SQL-like language and retrieve regression function, variance and other useful things.

SQL Server solution to simple recursive function

I am looking for a SQL Server solution for a simple recursive formula. In the example, X is my column of numbers and Y is the column I am trying to create with a SQL Query.
I have a list of numbers, denoted X, and wish to produce a special kind of running sum that is not allowed to go less than 0, denoted Y.
Base Case
Y1 = MAX(X1,0)
Recursive Rule
Yi = MAX(Xi+Yi-1,0)
EXAMPLE:
id X(Input) Y(Output)
1 15 15
2 -87 0
3 26 26
4 -87 0
5 4 4
6 -19 0
7 34 34
8 -4 30
9 40 70
10 -14 56

Assuming you have an id column that specifies the ordering, I am pretty sure you have to do this with a recursive CTE. The problem is that the "set negative numbers to zero" complicates the situation.
Let me assume that the id identifies the ordering.
with t as (
select t.*, row_number() over (order by id) as seqnum
from table t
),
cte as (
select X,
(case when X < 0 then 0 else X end) as Y
from t
where id = 1
union all
select tnext.X,
(case when tnext.X + cte.Y < 0 then 0 else tnext.X + cte.Y end) as Y
from cte join
t tnext
on t.id + 1 = tnext.id
)
select *
from cte;

Using a cursor and a table variable to catch the calculated values might be good for performance.
declare #T table
(
id int,
X int,
Y int
);
declare #id int;
declare #X int;
declare #Y int;
set #Y = 0;
declare C cursor local static forward_only read_only for
select T.id, T.X
from T
order by T.id;
open C;
fetch next from C into #id, #X;
while ##fetch_status = 0
begin
set #Y = case when #X + #Y < 0 then 0 else #X + #Y end;
insert into #T(id, X, Y) values (#id, #X, #Y);
fetch next from C into #id, #X;
end
close C;
deallocate C;
select T.id, T.X, T.Y
from #T as T
order by T.id;
SQL Fiddle
Have a look at Best approaches for running totals by Aaron Bertrand

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

What is the easiest way to print all prime numbers until n, in SQL? - sql

What kind of algorithm should I use, for printing all prime numbers until let's say 1000? SQL Server is preferred. Thanks

The answer is simple: Assuming you already have following table populated with data: CREATE TABLE dbo.PrimeNumber(Num INT NOT NULL PRIMARY KEY); you'll need only a simple SELECT: SELECT * FROM dbo.PrimeNumber

DECLARE #range int = 1000, #x INT = 2, #y INT = 2 While (#y <= #range) BEGIN while (#x <= #y) begin IF ((#y%#x) =0) BEGIN IF (#x = #y) PRINT #y break END IF ((#y%#x)<>0) set #x = #x+1 end set #x = 2 set #y = #y+1 end

Related

How to select only armstrong numbers from the list?

Complicated SQL while loop

SQL: Validate a quantity given a comma delimited set of ranges

Multiple Linear Regression function in SQL Server

SQL Server solution to simple recursive function

Categories

Resources