efficient sql prime numbers algorithm - sql

I realize SQL is not the best language for this, but this is a homework assignment to write a function that will take an argument N and will find the prime numbers (N=10,000,000) between 1 and 10 million. I am using Postgresql. Here is my attempt:
--First create table Numbers with all numbers from 1 to 10000000 in it
create table numbers(number bigint);
--Use this function to fill it in:
create or replace function populate(top bigint) RETURNS void as $$
declare
i bigint:=1;
begin
while(i<=top) LOOP
insert into numbers(number)
values(i);
i:=i+1;
END LOOP;
END; $$ LANGUAGE plpgsql;
--Function primes that returns all primes up to N
create or replace function primes(N bigint) RETURNS void AS $$
DECLARE
first bigint :=3;
last bigint :=2;
BEGIN
--create table t1 and insert all odd integers from 3 to N (and 2)
create table t1(a bigint);
INSERT into t1(a)
select number
from numbers
where (number%2 <> 0 or number = 2)
AND number<=N AND number<>1;
--Use Sieve of Erastothenes to find primes
while (last < sqrt(n)) LOOP
first:= (select * from t1 where a>last order by a limit 1);
last:= first* first;
--delete from list of primes all multiples of the primes in the range of first-last
-- (first run-through is primes in range of 3-9, second run-through would be primes in range of 11-121, etc.)
delete from t1
where a in (select n1.number * t.a
from t1 as t
inner join numbers as n1
on n1.number >= t.a
and n1.number<= n/t.a
where t.a>=first
and t.a<last);
END LOOP;
END; $$ LANGUAGE plpgsql;

A good review of the topic is here: https://sqlserverfast.com/blog/hugo/2006/09/the-prime-number-challenge-great-waste-of-time/
But for a homework problem, you should do your own work.

I don't think anyone actually checks or compares most of these postings - I've posted a couple of poor runners just to find out, but nobody called them. If you are inclined to compare though, you'll find this readable and fast:
IF (SELECT OBJECT_ID ('tempdb.dbo.#Numbers')) IS NOT NULL
DROP TABLE #Numbers;
CREATE TABLE #Numbers (Prime INT NOT NULL, Squared BIGINT PRIMARY KEY CLUSTERED);
DECLARE #MaxPrime INT = 1000000;
;WITH
GroupingDriver AS
(
SELECT CAST('7' AS BIGINT) as Interval
UNION ALL
SELECT Interval+30
FROM GroupingDriver
WHERE Interval+30 < #MaxPrime
)
INSERT INTO #Numbers
SELECT 2 AS 'Number', 4 AS 'SquareNo'
UNION ALL
SELECT 3 AS 'Number', 9 AS 'SquareNo'
UNION ALL
SELECT 5 AS 'Number', 25 AS 'SquareNo'
UNION ALL
SELECT Prime.Number, Prime.Number * Prime.Number
FROM GroupingDriver
CROSS APPLY ( VALUES (GroupingDriver.Interval),
(GroupingDriver.Interval+4),
(GroupingDriver.Interval+6),
(GroupingDriver.Interval+10),
(GroupingDriver.Interval+12),
(GroupingDriver.Interval+16),
(GroupingDriver.Interval+22),
(GroupingDriver.Interval+24) ) AS Prime(Number)
WHERE Prime.Number < #MaxPrime
OPTION (MAXRECURSION 0);
Now remove those divisible by other primes. We just used squared as a cutoff point for comparison.
SELECT Prime
FROM #Numbers n
WHERE NOT EXISTS (SELECT 1
FROM #Numbers AS p
WHERE p.Squared <= n.Prime
AND n.Prime % p.Prime = 0);
GO

Related

Generate a list with string prefix in SQL with fixed length

I just want to generate a list like this
XY0001
XY0002
XY0003
The prefix is same for all rows. Need fixed length (6 in this example)
Looking for an easy way to produce such list to put it into temp table.
MS SQL
for a very small number this would do:
DECLARE #TempList TABLE (Name VARCHAR(100));
insert into #TempList Values ('XY00001')
insert into #TempList Values ('XY00002')
insert into #TempList Values ('XY00003')
insert into #TempList Values ('XY00004')
select * from #TempList
You can use an ad-hoc tally table
If 2012+
DECLARE #TempList TABLE (Name VARCHAR(100));
Select Name = 'XY'+format(N,'0000')
From (Select Top 9999 N=Row_Number() Over (Order By (Select NULL)) From master..spt_values N1,master..spt_values N2) A
Order by N
Returns
Name
XY0001
XY0002
XY0003
XY0004
...
XY9997
XY9998
XY9999
If not
DECLARE #TempList TABLE (Name VARCHAR(100));
Select Name = 'XY'+right('00000'+cast(N as varchar(25)),4)
From (Select Top 9999 N=Row_Number() Over (Order By (Select NULL)) From master..spt_values N1,master..spt_values N2) A
Order by N
I like to use recursive CTE's for this.
declare #max_number int = 1000;
with num as (
select 1 as n
union
select n + 1
from num
where n < #max_number
)
select 'XY' + (cast n as char(4))
from num;
The recursive CTE gives you the numbers and the cast does the left-padding with 0's to ensure you get 0001 instead of 1.
This approach will support a variable number of outputs. Though as you alluded to in your question, this is overkill if you only want a few.
(You'll need to test this out for boundary cases. I haven't tested this exact code sample.)
There is likely a limit to how far this scales because it uses recursion.

Compare two columns and make the insert

I want to compare two columns which come from two different tables.
One of the columns, I need to make SUM for all rows with identity let's say 3 and store to a variable.
After that, compare with one row from other table for same identity 3 and to INSERT something ELSE to BREAK if first_column <= second_column.
Can someone suggest some query for this? For Postgresql...
CREATE OR REPLACE FUNCTION "SA_PRJ".usp_add_timesheet_test(p_uid integer, p_project_id integer, p_allocated_time numeric, p_achieved_time numeric, p_task_desc character varying, p_obs character varying, p_date timestamp without time zone)
RETURNS character varying AS
$BODY$
DECLARE sum_alloc_time numeric;
DECLARE alloc_hours integer;
DECLARE fld_id integer;
DECLARE alloc_id integer;
BEGIN
if not "SA_ADM".usp_check_permission(p_uid, 'SA_PRJ', 'usp_add_timesheet_record') then
raise exception 'User ID % dont have permission!', p_uid;
end if;
select a.fld_id into alloc_id from "SD_PRJ".tbl_project_allocation a where a.fld_emp_id = p_uid and a.fld_project_id = p_project_id;
SELECT SUM(fld_allocated_time)
INTO sum_alloc_time
FROM "SD_PRJ".tbl_project_timesheet
WHERE fld_project_id = p_project_id;
SELECT p.fld_allocated_days, p.fld_id
INTO alloc_hours, fld_id
FROM "SD_PRJ".tbl_project p
JOIN "SD_PRJ".tbl_project_timesheet t USING (fld_id)
WHERE t.fld_project_id = p_project_id;
IF #sum_alloc_time <= #alloc_hours THEN
INSERT INTO "SD_PRJ".tbl_project_timesheet
(fld_emp_id, fld_project_id, fld_is_allocated, fld_allocated_time
, fld_achieved_time, fld_task_desc, fld_obs, fld_date)
VALUES (p_uid, p_project_id, coalesce(alloc_id,0), p_allocated_time
, p_achieved_time, p_task_desc, p_obs, p_date);
RAISE NOTICE 'INSERT OK!';
ELSE
RAISE NOTICE 'NOT OK';
END IF;
END
1.tbl_project (fld_id, fld_allocated_days,fld_project_id)
2.tbl_project_timesheet(fld_id,fld_allocated_time,fld_project_id), all INTEGER
I have this , but dosen't work as I wish.Thanks
I think one problem is here:
SELECT p.fld_allocated_days, p.fld_id
INTO alloc_hours, fld_id
FROM "SD_PRJ".tbl_project p
JOIN "SD_PRJ".tbl_project_timesheet t USING (fld_id)
WHERE t.fld_project_id = p_project_id;
That will cough (I think) whenever the select query returns more than one row i.e. whenever tbl_project_timesheet has more than one record for a fld_id,project_id combination.
Anyway. Here's a partial, simplified answer, but hopefully you get the idea...
I wouldn't use local variables. Do the insert in one step:
INSERT INTO timesheet(emp_id,project_id) -- other columns
SELECT
p_uid,p.fld_project_id -- other columns
FROM
projects p
INNER JOIN
(SELECT SUM(fld_allocated_time) as sumtime
FROM timesheet t WHERE fld_project_id = p_project_id) as sumtime_subquery
ON p.fld_allocated_days < sumtime -- just join on the allocated time
WHERE p.fld_project_id = p_project_id;
Now, you need to know if anything was actually inserted. I think you can use the RETURNING option of the INSERT statement, e.g. from here (caveat - I have never used RETURNING, nor set a local variable from a with statement):
WITH ROWS AS (
INSERT INTO timesheet(emp_id,project_id) -- other columns
SELECT
p_uid,p.fld_project_id -- other columns
FROM
projects p
INNER JOIN
(SELECT SUM(fld_allocated_time) as sumtime
FROM timesheet t WHERE fld_project_id = p_project_id) as sumtime_subquery
ON p.fld_allocated_days < sumtime -- just join on the allocated time
WHERE p.fld_project_id = p_project_id
RETURNING 1
)
SELECT COUNT(*) into l_updatedCount FROM rows; -- you have to declare l_updatedCount
-- Now an if statement to handle l_updatedCount

Converting a script from MSSQL to PL/pgSQL

I just started working with the EVE static dump, which is just a lot of tables with data about the game, such as a list of what solar systems connect, which is what I'm dealing with.
I want to make a webpage that lets you filter out systems, and the first step is getting a list of systems nearby, with the distance to them.
I found a script that does it for MSSQL
--By Joanna Davaham http://forum.eveuniversity.org/viewtopic.php?t=44601&p=396107#p424943
--set values
DECLARE #jumpsAway INT =10
DECLARE #MiddleSystemName VARCHAR(50) = 'Aldrat'
DECLARE #Level INT =1
IF OBJECT_ID('tempdb..#map') IS NOT NULL
DROP TABLE #map
CREATE TABLE #map
(fromSolarSystemID INT, toSolarSystemID INT, Level INT)
INSERT INTO #map
SELECT -1, mSS.solarSystemID, 0 FROM mapSolarSystems mSS
WHERE mSS.solarSystemName= #MiddleSystemName
WHILE #Level <= #jumpsAway
BEGIN
INSERT INTO #map
SELECT mSSJ.fromSolarSystemID, mSSJ.toSolarSystemID, #Level FROM mapSolarSystemJumps mSSJ
WHERE mSSJ.fromSolarSystemID IN (SELECT toSolarSystemID FROM #map WHERE Level = #Level-1)
AND mSSJ.fromSolarSystemID NOT IN (SELECT fromSolarSystemID FROM #map)
SET #Level=#Level+1
END
SELECT m.*, mSS.solarSystemName, mSS.security FROM #map m
JOIN mapSolarSystems mSS ON m.toSolarSystemID=mSS.solarSystemID
--WHERE mSS.security<0.45 --uncomment to check all nearby lowsec system
I know that I could probably just use the MSSQL version of the dump, but I also want to be learning more about how to use PostgreSQL better.
I understand what it's doing and everything, but I just don't understand PL/pgSQL well enough to make it work.
My attempt is
CREATE FUNCTION near(VARCHAR, INTEGER) RETURNS TABLE(fromID INT,toID INT,jumps INT,name VARCHAR,security VARCHAR) AS $$
DECLARE --Declaration from here http://www.postgresql.org/docs/9.1/static/plpgsql-declarations.html
MiddleSystemName ALIAS FOR $1;
jumpsAway ALIAS FOR $2;
jumps INTEGER :=1;
BEGIN
--http://stackoverflow.com/questions/11979154/select-into-to-create-a-table-in-pl-pgsql
CREATE TEMP TABLE map AS
SELECT -1, mSS.solarSystemID, 0
FROM mapSolarSystems mSS
WHERE mSS.solarSystemName= MiddleSystemName;
LOOP
--http://www.postgresql.org/docs/9.1/static/plpgsql-statements.html#PLPGSQL-STATEMENTS-EXECUTING-DYN
--If you don't do it with execute, you can only do one row, I guess?
EXECUTE 'SELECT
|| mSSJ.fromSolarSystemID,
|| mSSJ.toSolarSystemID,
|| $1
|| FROM
|| mapSolarSystemJumps mSSJ
|| WHERE
|| mSSJ.fromSolarSystemID EXISTS (SELECT toSolarSystemID FROM map WHERE jumps = $1 - 1)
|| AND mSSJ.fromSolarSystemID NOT EXISTS (SELECT fromSolarSystemID FROM map)'
INTO map
USING jumps;
jumps := jumps + 1
EXIT WHEN jumps > jumpsAway;
END LOOP;
RETURN QUERY SELECT m.*,mSS.solarSystemName, mSS.security FROM JOIN mapSolarSystems mSS ON m.toSolarSystemID = mSS.solarSystemID;
END;
$$ LANGUAGE plpgsql;
And the error that produces is
Error is
ERROR: "map" is not a known variable
LINE 27: INTO map
^
Thanks for all the help.
PL/pgSQL
This should be a valid translation to plpgsql:
CREATE OR REPLACE FUNCTION f_near(_middlesystemname text, _jumpsaway int)
RETURNS TABLE(fromid int, toid int, jumps int, name text, security text) AS
$func$
DECLARE
_jumps integer;
BEGIN
CREATE TEMP TABLE map AS
SELECT -1 AS "fromSolarSystemID"
,m."solarSystemID" AS "toSolarSystemID"
,0 AS level
FROM "mapSolarSystems" m
WHERE "solarSystemName" = _middlesystemname;
-- potentially add indexes on the temp table and ANALYZE if it gets big
FOR _jumps IN 1 .. _jumpsaway LOOP
INSERT INTO map ("fromSolarSystemID", "toSolarSystemID", level)
SELECT sj."fromSolarSystemID", sj."toSolarSystemID", _jumps AS level
FROM "mapSolarSystemJumps" sj
JOIN map m ON m."toSolarSystemID" = sj."fromSolarSystemID"
AND m."level" = _jumps - 1
LEFT JOIN map mx ON mx."fromSolarSystemID" = sj."fromSolarSystemID"
WHERE mx."fromSolarSystemID" IS NULL;
END LOOP;
RETURN QUERY
SELECT m.*, s."solarSystemName", s."security"
FROM map m
JOIN "mapSolarSystems" s ON m."toSolarSystemID" = s."solarSystemID";
END
$func$ LANGUAGE plpgsql;
RECURSIVE CTE - doesn't seem to work
This short SQL query with a recursive CTE should have done it:
WITH RECURSIVE map AS (
SELECT -1 AS fromsolarsystemid, m.solarsystemid, 0 AS level
FROM mapsolarsystems m
WHERE m.solarsystemname = from_id
UNION ALL
SELECT sj.fromsolarsystemid, sj.tosolarsystemid, level + 1
FROM mapsolarsystemjumps sj
JOIN map m USING (level)
LEFT JOIN map mx USING (fromsolarsystemid)
WHERE sj.fromsolarsystemid = m.tosolarsystemid
AND mx.fromsolarsystemid IS NULL
AND m.level < 10 -- jumpsAway
)
SELECT m.*, s.solarsystemname, s.security
FROM map m
JOIN mapsolarsystems s ON m.tosolarsystemid = s.solarsystemid
-- WHERE s.security < 0.45 -- uncomment to check all nearby lowsec system
However:
ERROR: recursive reference to query "map" must not appear within an outer join
LINE 9: LEFT JOIN map mx USING (fromsolarsystemid)

Selecting rows that don't exist physically in the database

I've totally rewritten my question because the simplicity of the previous one people were taking too literally.
The aim:
INSERT INTO X
SELECT TOP 23452345 NEWID()
This query should insert 23452345 GUIDs to the "X" table. actually 23452345 means just any possible number that is entered by user and stored in database.
So the problem is that inserting rows to a database by using
INSERT INTO ... SELECT ...
statement requires you to already have the required amount of rows inserted to database.
Naturally you can emulate the existence of rows by using temporary data and cross joining it but this (in my stupid opinion) creates more results than needed and in some extreme situations might fail due to many unpredicted reasons. I need to be sure that if user entered extremely huge number like 2^32 or even bigger the system will work and behave normally without any possible side effects like extreme memory/time consumption etc...
In all fairness I derived the idea from this site.
;WITH cte AS
(
SELECT 1 x
UNION ALL
SELECT x + 1
FROM cte
WHERE x < 100
)
SELECT NEWID()
FROM cte
EDIT:
The general method we're seeing is to select from a table that has the desired number of rows. It's hackish, but you can create a table, insert the desired number of records, and select from it.
create table #num
(
num int
)
declare #i int
set #i = 1
while (#i <= 77777)
begin
insert into #num values (#i)
set #i = #i + 1
end
select NEWID() from #num
drop table #num
Of course creating a Number table is the best approach and will come in handy. You should definitely have one at your disposal. If you need something as a one-off just join to a known table. I usually use a system table such as spt_values:
declare #result table (id uniqueidentifier)
declare #sDate datetime
set #sDate = getdate();
;with num (n)
as ( select top(777777) row_number() over(order by t1.number) as N
from master..spt_values t1
cross join master..spt_values t2
)
insert into #result(id)
select newid()
from num;
select datediff(ms, #sDate, getdate()) [elasped]
I'd create an integers table and use it. This type of table comes in handy many situations.
CREATE TABLE dbo.Integers
(
i INT IDENTITY(1,1) PRIMARY KEY CLUSTERED
)
WHILE COALESCE(SCOPE_IDENTITY(), 0) <= 100000 /* or some other large value */
BEGIN
INSERT dbo.Integers DEFAULT VALUES
END
Then all you need to do it:
SELECT NEWID()
FROM Integers
WHERE i <= 77777
Try this:
with
L0 as (select 1 as C union all select 1) --2 rows
,L1 as (select 1 as C from L0 as A, L0 as B) --4 rows
,L2 as (select 1 as C from L1 as A, L1 as B) --16 rows
,L3 as (select 1 as C from L2 as A, L2 as B) --256 rows
select top 100 newid() from L3
SELECT TOP 100 NEWID() from sys.all_columns
Or any other datasource that has a large number of records. You can build your own table for 'counting' functionality as such, you can use it in lieu of while loops.
Tally tables: http://www.sqlservercentral.com/articles/T-SQL/62867

How can I extend this SQL query to find the k nearest neighbors?

I have a database full of two-dimensional data - points on a map. Each record has a field of the geometry type. What I need to be able to do is pass a point to a stored procedure which returns the k nearest points (k would also be passed to the sproc, but that's easy). I've found a query at http://blogs.msdn.com/isaac/archive/2008/10/23/nearest-neighbors.aspx which gets the single nearest neighbour, but I can't figure how to extend it to find the k nearest neighbours.
This is the current query - T is the table, g is the geometry field, #x is the point to search around, Numbers is a table with integers 1 to n:
DECLARE #start FLOAT = 1000;
WITH NearestPoints AS
(
SELECT TOP(1) WITH TIES *, T.g.STDistance(#x) AS dist
FROM Numbers JOIN T WITH(INDEX(spatial_index))
ON T.g.STDistance(#x) < #start*POWER(2,Numbers.n)
ORDER BY n
)
SELECT TOP(1) * FROM NearestPoints
ORDER BY n, dist
The inner query selects the nearest non-empty region and the outer query then selects the top result from that region; the outer query can easily be changed to (e.g.) SELECT TOP(20), but if the nearest region only contains one result, you're stuck with that.
I figure I probably need to recursively search for the first region containing k records, but without using a table variable (which would cause maintenance problems as you have to create the table structure and it's liable to change - there're lots of fields), I can't see how.
What happens if you remove TOP (1) WITH TIES from the inner query, and set the outer query to return the top k rows?
I'd also be interested to know whether this amendment helps at all. It ought to be more efficient than using TOP:
DECLARE #start FLOAT = 1000
,#k INT = 20
,#p FLOAT = 2;
WITH NearestPoints AS
(
SELECT *
,T.g.STDistance(#x) AS dist
,ROW_NUMBER() OVER (ORDER BY T.g.STDistance(#x)) AS rn
FROM Numbers
JOIN T WITH(INDEX(spatial_index))
ON T.g.STDistance(#x) < #start*POWER(#p,Numbers.n)
AND (Numbers.n - 1 = 0
OR T.g.STDistance(#x) >= #start*POWER(#p,Numbers.n - 1)
)
)
SELECT *
FROM NearestPoints
WHERE rn <= #k;
NB - untested - I don't have access to SQL 2008 here.
Quoted from Inside Microsoft® SQL Server® 2008: T-SQL Programming. Section 14.8.4.
The following query will return the 10
points of interest nearest to #input:
DECLARE #input GEOGRAPHY = 'POINT (-147 61)';
DECLARE #start FLOAT = 1000;
WITH NearestNeighbor AS(
SELECT TOP 10 WITH TIES
*, b.GEOG.STDistance(#input) AS dist
FROM Nums n JOIN GeoNames b WITH(INDEX(geog_hhhh_16_sidx)) -- index hint
ON b.GEOG.STDistance(#input) < #start*POWER(CAST(2 AS FLOAT),n.n)
AND b.GEOG.STDistance(#input) >=
CASE WHEN n = 1 THEN 0 ELSE #start*POWER(CAST(2 AS FLOAT),n.n-1) END
WHERE n <= 20
ORDER BY n
)
SELECT TOP 10 geonameid, name, feature_code, admin1_code, dist
FROM NearestNeighbor
ORDER BY n, dist;
Note: Only part of this query’s WHERE
clause is supported by the spatial
index. However, the query optimizer
correctly evaluates the supported part
(the "<" comparison) using the index.
This restricts the number of rows for
which the ">=" part must be tested,
and the query performs well. Changing
the value of #start can sometimes
speed up the query if it is slower
than desired.
Listing 2-1. Creating and Populating Auxiliary Table of Numbers
SET NOCOUNT ON;
USE InsideTSQL2008;
IF OBJECT_ID('dbo.Nums', 'U') IS NOT NULL DROP TABLE dbo.Nums;
CREATE TABLE dbo.Nums(n INT NOT NULL PRIMARY KEY);
DECLARE #max AS INT, #rc AS INT;
SET #max = 1000000;
SET #rc = 1;
INSERT INTO Nums VALUES(1);
WHILE #rc * 2 <= #max
BEGIN
INSERT INTO dbo.Nums SELECT n + #rc FROM dbo.Nums;
SET #rc = #rc * 2;
END
INSERT INTO dbo.Nums
SELECT n + #rc FROM dbo.Nums WHERE n + #rc <= #max;