Intersection of multiple arrays in PostgreSQL - sql

I have a view defined as:
CREATE VIEW View1 AS
SELECT Field1, Field2, array_agg(Field3) AS AggField
FROM Table1
GROUP BY Field1, Field2;
What I would like to do is get the intersection of the arrays in AggField with something like:
SELECT intersection(AggField) FROM View1 WHERE Field2 = 'SomeValue';
Is this at all possible, or is there a better way to achieve what I want?

The closest thing to an array intersection that I can think of is this:
select array_agg(e)
from (
select unnest(a1)
intersect
select unnest(a2)
) as dt(e)
This assumes that a1 and a2 are single dimension arrays with the same type of elements. You could wrap that up in a function something like this:
create function array_intersect(a1 int[], a2 int[]) returns int[] as $$
declare
ret int[];
begin
-- The reason for the kludgy NULL handling comes later.
if a1 is null then
return a2;
elseif a2 is null then
return a1;
end if;
select array_agg(e) into ret
from (
select unnest(a1)
intersect
select unnest(a2)
) as dt(e);
return ret;
end;
$$ language plpgsql;
Then you could do things like this:
=> select array_intersect(ARRAY[2,4,6,8,10], ARRAY[1,2,3,4,5,6,7,8,9,10]);
array_intersect
-----------------
{6,2,4,10,8}
(1 row)
Note that this doesn't guarantee any particular order in the returned array but you can fix that if you care about it. Then you could create your own aggregate function:
-- Pre-9.1
create aggregate array_intersect_agg(
sfunc = array_intersect,
basetype = int[],
stype = int[],
initcond = NULL
);
-- 9.1+ (AFAIK, I don't have 9.1 handy at the moment
-- see the comments below.
create aggregate array_intersect_agg(int[]) (
sfunc = array_intersect,
stype = int[]
);
And now we see why array_intersect does funny and somewhat kludgey things with NULLs. We need an initial value for the aggregation that behaves like the universal set and we can use NULL for that (yes, this smells a bit off but I can't think of anything better off the top of my head).
Once all this is in place, you can do things like this:
> select * from stuff;
a
---------
{1,2,3}
{1,2,3}
{3,4,5}
(3 rows)
> select array_intersect_agg(a) from stuff;
array_intersect_agg
---------------------
{3}
(1 row)
Not exactly simple or efficient but maybe a reasonable starting point and better than nothing at all.
Useful references:
array_agg
create aggregate
create function
PL/pgSQL
unnest

The accepted answer did not work for me. This is how I fixed it.
create or replace function array_intersect(a1 int[], a2 int[]) returns int[] as $$
declare
ret int[];
begin
-- RAISE NOTICE 'a1 = %', a1;
-- RAISE NOTICE 'a2 = %', a2;
if a1 is null then
-- RAISE NOTICE 'a1 is null';
return a2;
-- elseif a2 is null then
-- RAISE NOTICE 'a2 is null';
-- return a1;
end if;
if array_length(a1,1) = 0 then
return '{}'::integer[];
end if;
select array_agg(e) into ret
from (
select unnest(a1)
intersect
select unnest(a2)
) as dt(e);
if ret is null then
return '{}'::integer[];
end if;
return ret;
end;
$$ language plpgsql;

It is bit late to answer this question but maybe somebody will need it so I decided to share something I wrote cause did not found any ready solution for intersection of any number of arrays. So here it is. This function receives array of arrays, if it is only single array, function returns first array, if there are 2 arrays function intersects 2 arrays and returns result, if it is more that 2 arrays, function takes intersection of 2 first arrays, stores it in some variable and loops through all other arrays, intersect each next array with stored result and stores result in variable. if result is null it exists with null. In the and the variable that stores array with interacted data returned from the function.
CREATE OR REPLACE FUNCTION array_intersected(iarray bigint[][])
RETURNS bigint[] AS
$BODY$
declare out_arr bigint[]; set1 bigint[]; set2 bigint[];
BEGIN
--RAISE NOTICE '%', array_length(iarray, 1);
if array_length(iarray, 1) = 1 then
SELECT ARRAY(SELECT unnest(iarray[1:1])) into out_arr;
elseif array_length( iarray, 1) = 2 then
set1 := iarray[1:1];
set2 := iarray[2:2];
SELECT ARRAY(SELECT unnest(set1) INTERSECT SELECT unnest(set2))into out_arr;
elseif array_length(iarray, 1) > 2 then
set1 := iarray[1:1];
set2 := iarray[2:2];
--exit if no common numbers exists int 2 first arrays
SELECT ARRAY(SELECT unnest(set1) INTERSECT SELECT unnest(set2))into out_arr;
if out_arr = NULL then
EXIT;
END IF;
FOR i IN 3 .. array_upper(iarray, 1)
LOOP
set1 := iarray[i:i];
SELECT ARRAY(SELECT unnest(set1) INTERSECT SELECT unnest(out_arr))into out_arr;
if out_arr = NULL then
EXIT;
END IF;
END LOOP;
end if;
return out_arr;
END;
$BODY$
LANGUAGE plpgsql VOLATILE;
Here is the code to validate it works.
select array_intersected(array[[1, 2]]::bigint[][]);
select array_intersected(array[[1, 2],[2, 3]]::bigint[][]);
select array_intersected(array[[1, 2],[2, 3], [2, 4]]::bigint[][]);
select array_intersected(array[[1, 2, 3, 4],[null, null, 4, 3], [3, 1, 4, null]]::bigint[][]);

Related

Scope of column names, aliases, and OUT parameters in PL/pgSQL function

I have a hard time understanding why I can refer to the output columns in returns table(col type).
There is a subtle bug in the below code, the order by var refers to res in returns, not to data1 which we aliased to res. res in where is always null and we get 0 rows.
Why can I refer to the column name in output?
In what cases do I want this?
CREATE OR REPLACE FUNCTION public.test(var INTEGER)
RETURNS table(res int )
LANGUAGE plpgsql
AS $function$
begin
return query
select data1 res
from table_with_data
where res < var;
end
$function$
Why can I refer to the column name in output
From the manual, the section about function parameters:
column_name The name of an output column in the RETURNS TABLE syntax. This is effectively another way of declaring a named OUT parameter, except that RETURNS TABLE also implies RETURNS SETOF.
What this means is that in your case res is effectively a writeable variable, which type you plan to return a set of. As any other variable without a default value assigned, it starts off as null.
In what case do I want this
You can return multiple records from a function of this type with a single return query, but another way is by a series of multiple return query or return next - in the second case, filling out the fields in a record of your output table each time. You could have expected a return statement to end the function, but in this scenario only a single return; without anything added would have that effect.
create table public.test_res (data integer);
CREATE OR REPLACE FUNCTION public.test(var INTEGER)
RETURNS table(res int )
LANGUAGE plpgsql
AS $function$
begin
insert into public.test_res select res;--to inspect its initial value later
select 1 into res;
return next;
return next;--note that res isn't reset after returning next
return query select 2;--doesn't affect the current value of res
return next;--returning something else earlier didn't affect res either
return;--it will finish here
select 3 into res;
return next;
end
$function$;
select * from test(0);
-- res
-------
-- 1
-- 1
-- 2
-- 1
--(4 rows)
table public.test_res; --this was the initial value of res within the function
-- data
--------
-- null
--(1 row)
Which is the most useful with LOOPs
CREATE OR REPLACE FUNCTION public.test(var INTEGER)
RETURNS table(comment text,res int) LANGUAGE plpgsql AS $function$
declare rec record;
array_slice int[];
begin
return query select 'return query returned these multiple records in one go', a from generate_series(1,3,1) a(a);
res:=0;
comment:='loop exit when res>4';
loop exit when res>4;
select res+1 into res;
return next;
end loop;
comment:='while res between 5 and 8 loop';
while res between 5 and 8 loop
select res+2 into res;
return next;
end loop;
comment:='for element in reverse 3 .. -3 by 2 loop';
for element in reverse 3 .. -3 by 2 loop
select element into res;
return next;
end loop;
comment:='for <record> in <expression> loop';
for rec in select pid from pg_stat_activity where state<>'idle' loop
select rec.pid into res;
return next;
end loop;
comment:='foreach array_slice slice 1 in array arr loop';
foreach array_slice SLICE 1 in array ARRAY[[1,2,3],[11,12,13],[21,22,23]] loop
select array_slice[1] into res;
return next;
end loop;
end
$function$;
Example results
select * from public.test(0);
-- comment | res
----------------------------------------------------------+--------
-- return query returned these multiple records in one go | 1
-- return query returned these multiple records in one go | 2
-- return query returned these multiple records in one go | 3
-- loop exit when res>4 | 1
-- loop exit when res>4 | 2
-- loop exit when res>4 | 3
-- loop exit when res>4 | 4
-- loop exit when res>4 | 5
-- while res between 5 and 8 loop | 7
-- while res between 5 and 8 loop | 9
-- for element in reverse 3 .. -3 by 2 loop | 3
-- for element in reverse 3 .. -3 by 2 loop | 1
-- for element in reverse 3 .. -3 by 2 loop | -1
-- for element in reverse 3 .. -3 by 2 loop | -3
-- for <record> in <expression> loop | 118786
-- foreach array_slice slice 1 in array arr loop | 1
-- foreach array_slice slice 1 in array arr loop | 11
-- foreach array_slice slice 1 in array arr loop | 21
--(18 rows)
True, OUT parameters (including field names in a RETURNS TABLE (...) clause) are visible in all SQL DML statements in a PL/pgSQL function body, just like other variables. Find details in the manual chapters Variable Substitution and Returning from a Function for PL/pgSQL.
However, a more fundamental misunderstanding comes first here. The syntax of your nested SELECT is invalid to begin with. The PL/pgSQL variable happens to mask this problem (with a different problem). In SQL, you cannot refer to output column names (column aliases in the SELECT clause) in the WHERE clause. This is invalid:
select data1 res
from table_with_data
where res < var;
The manual:
An output column's name can be used to refer to the column's value in
ORDER BY and GROUP BY clauses, but not in the WHERE or HAVING clauses;
there you must write out the expression instead.
This is different for ORDER BY, which you mention in the text, but don't include in the query. See:
GROUP BY + CASE statement
Fixing immediate issue
Could be repaired like this:
CREATE OR REPLACE FUNCTION public.test1(var int)
RETURNS TABLE(res int)
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN QUERY
SELECT data1 AS res -- column alias is just noise (or documentation)
FROM table_with_data
WHERE data1 < var; -- original column name!
END
$func$
fiddle
See:
Real number comparison for trigram similarity
The column alias is just noise in this case. The name of the column returned from the function is res in any case - as defined in the RETURNS TABLE clause.
Aside: It's recommended not to omit the AS keyword for column aliases (unlike table aliases). See:
Query to ORDER BY the number of rows returned from another SELECT
If there was actual ambiguity between column and variable name - say, you declared an OUT parameter or variable named data1 - you'd get an error message like this:
ERROR: column reference "data1" is ambiguous
LINE 2: select data1
^
DETAIL: It could refer to either a PL/pgSQL variable or a table column.
Brute force fix
Could be fixed with a special command at the start of the function body:
CREATE OR REPLACE FUNCTION public.test3(var int)
RETURNS TABLE(data1 int)
LANGUAGE plpgsql AS
$func$
#variable_conflict use_column -- ! to resolve conflicts
BEGIN
RETURN QUERY
SELECT data1
FROM table_with_data
WHERE data1 < var; -- !
END
$func$
See:
Naming conflict between function parameter and result of JOIN with USING clause
Proper fix
Table-qualify column names, and avoid conflicting variable names to begin with.
CREATE OR REPLACE FUNCTION public.test4(_var int)
RETURNS TABLE(res int)
LANGUAGE plpgsql STABLE AS
$func$
BEGIN
RETURN QUERY
SELECT t.data1 -- table-qualify column name
FROM table_with_data t
WHERE t.data1 < _var; -- !
END
$func$
Example:
Calling a PostgreSQL function from Java

How to implement a function that check a condition

I can't use boolean in a sql query.
Therefore I can't create a function that return true or false and use it to test a condition.
I must create a function that return something (1 for instance) and test it. Like that:
WITH
FUNCTION f (input INTEGER)
RETURN INTEGER
IS
BEGIN
RETURN CASE WHEN input = 1 THEN 1 ELSE 0 END;
END;
A AS (SELECT 1 a FROM DUAL)
SELECT *
FROM a
WHERE f(a.a) = 1
instead of that:
WITH
FUNCTION f (input INTEGER)
RETURN boolean
IS
BEGIN
RETURN input = 1 ;
END;
A AS (SELECT 1 a FROM DUAL)
SELECT *
FROM a
WHERE f(a.a)
Unless there is another way?
code
I've tried to use a macro but to no avail
WITH
FUNCTION ft
RETURN VARCHAR2 SQL_MACRO
IS
BEGIN
RETURN q'{
SELECT 1
FROM dual
}';
END;
FUNCTION fc
RETURN VARCHAR2 SQL_MACRO
IS
BEGIN
RETURN q'{
1=1
}';
END;
SELECT *
FROM ft()
WHERE fc()
ORA-00920: invalid relational operator
code
The BOOLEAN data type is a PL/SQL only data type and is not supported in Oracle SQL statements.
Use two constants:
0 for false, 1 (or non-zero) for true (as per the C language).
0 for no errors, non-zero for errors (as per Unix program exit codes).
'Y' for yes, 'N' for no.
'success' and 'failure'
etc.
Whatever you return is your personal preference. Document the convention you are going to use and then use it consistently so that everyone on the same project uses the same convention.
Unless there is another way?
No, just pick a convention for truthy/falsy values and stick to that.
WITH
FUNCTION f_check_int ( p_str VARCHAR2 )
RETURN VARCHAR2 ------- Y / N
IS
lv_data NUMBER;
BEGIN
lv_data := TO_NUMBER(p_str);
IF lv_data>0 AND MOD(lv_data,1)=0 THEN
RETURN 'Y';
ELSE
RETURN 'N';
END IF;
EXCEPTION
WHEN VALUE_ERROR THEN
RETURN 'N';
END;
A AS (SELECT 1 a FROM DUAL)
SELECT *
FROM a
WHERE f_check_int(a.a) = 'Y'

combine results from calling same function multiple times

I have a function func1(integer);
it returns rows of: partid,qty
for example:
select * from func1(1);
partid,qty
10 50
20 30
select * from func1(2);
partid,qty
10 5
20 30
11 10
I need to write a function that calls func1 with array and group by the results. func2(integer[]);
for example:
select * from func2(array[1,2]); should give:
partid,qty
10 55
20 60
11 10
I wrote this:
CREATE OR REPLACE FUNCTION func2(listx integer[])
RETURNS SETOF records_d AS
$BODY$
declare
item integer;
begin
foreach item in array listx loop
select * from func1(item);
end loop;
end;
$BODY$
LANGUAGE plpgsql VOLATILE
record_d is type (integer,integer)
This function doesn't work... I don't know how to combine the results from diffrent iterations of func1() and then return them.
You don't need another function for this, you can do that in a single SQL statement:
select f.partid, sum(f.qty)
from unnest(array[1,2]) i, func1(i) f
group by f.partid;
SQLFiddle example: http://sqlfiddle.com/#!15/8b083/1
As it is often with postgres, it have RETURN NEXT clause, which is just what You want.
I think You will have to adjust the code to cast results of innes select into records_d type. But RETURN NEXT is what You want.
CREATE OR REPLACE FUNCTION getAllFoo() RETURNS SETOF foo AS
$BODY$
DECLARE
r foo%rowtype;
BEGIN
FOR r IN SELECT * FROM foo
WHERE fooid > 0
LOOP
-- can do some processing here
RETURN NEXT r; -- return current row of SELECT
END LOOP;
RETURN;
END
$BODY$
LANGUAGE 'plpgsql' ;

How get all matching positions in a string?

I have a column flag_acumu in a table in PostgreSQL with values like:
'SSNSSNNNNNNNNNNNNNNNNNNNNNNNNNNNNSNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN'
I need to show all positions with an 'S'. With this code, I only get the first such position, but not the later ones.
SELECT codn_conce, flag_acumu, position('S' IN flag_acumu) AS the_pos
FROM dh12
WHERE position('S' IN flag_acumu) != 0
ORDER BY the_pos ASC;
How to get all of them?
In Postgres 9.4 or later you can conveniently use unnest() in combination with WITH ORDINALITY:
SELECT *
FROM dh12 d
JOIN unnest(string_to_array(d.flag_acumu, NULL))
WITH ORDINALITY u(elem, the_pos) ON u.elem = 'S'
WHERE d.flag_acumu LIKE '%S%' -- optional, see below
ORDER BY d.codn_conce, u.the_pos;
This returns one row per match.
WHERE d.flag_acumu LIKE '%S%' is optional to quickly eliminate source rows without any matches. Pays if there are more than a few such rows.
Detailed explanation and alternatives for older versions:
PostgreSQL unnest() with element number
Since you didn't specify your needs to a point in which one could answer properly, I'm going with my assumption that you want a list of positions of occurence of a substring (can be more than 1 character long).
Here's the function to do that using:
FOR .. LOOP control structure,
function substr(text, int, int).
CREATE OR REPLACE FUNCTION get_all_positions_of_substring(text, text)
RETURNS text
STABLE
STRICT
LANGUAGE plpgsql
AS $$
DECLARE
output_text TEXT := '';
BEGIN
FOR i IN 1..length($1)
LOOP
IF substr($1, i, length($2)) = $2 THEN
output_text := CONCAT(output_text, ';', i);
END IF;
END LOOP;
-- Remove first semicolon
output_text := substr(output_text, 2, length(output_text));
RETURN output_text;
END;
$$;
Sample call and output
postgres=# select * from get_all_positions_of_substring('soklesocmxsoso','so');
get_all_positions_of_substring
--------------------------------
1;6;11;13
This works too. And a bit faster I think.
create or replace function findAllposition(_pat varchar, _tar varchar)
returns int[] as
$body$
declare _poslist int[]; _pos int;
begin
_pos := position(_pat in _tar);
while (_pos>0)
loop
if array_length(_poslist,1) is null then
_poslist := _poslist || (_pos);
else
_poslist := _poslist || (_pos + _poslist[array_length(_poslist,1)] + 1);
end if;
_tar := substr(_tar, _pos + 1, length(_tar));
_pos := position(_pat in _tar);
end loop;
return _poslist;
end;
$body$
language plpgsql;
Will return a position list which is an int array.
{position1, position2, position3, etc.}

How can I retrieve a column value of a specific row

I'm using PostgreSQL 9.3.
The table partner.partner_statistic contains the following columns:
id reg_count
serial integer
I wrote the function convert(integer):
CREATE FUNCTION convert(d integer) RETURNS integer AS $$
BEGIN
--Do something and return integer result
END
$$ LANGUAGE plpgsql;
And now I need to write a function returned array of integers as follows:
CREATE FUNCTION res() RETURNS integer[] AS $$
<< outerblock >>
DECLARE
arr integer[]; --That array of integers I need to fill in depends on the result of query
r partner.partner_statistic%rowtype;
table_name varchar DEFAULT 'partner.partner_statistic';
BEGIN
FOR r IN
SELECT * FROM partner.partner_statistic offset 0 limit 100
LOOP
--
-- I need to add convert(r[reg_count]) to arr where r[id] = 0 (mod 5)
--
-- How can I do that?
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;
You don't need (and shouldn't use) PL/PgSQL loops for this. Just use an aggregate. I'm kind of guessing about what you mean by "where r[id] = 0 (mod 5) but I'm assuming you mean "where id is evenly divisible by 5". (Note that this is NOT the same thing as "every fifth row" because generated IDs have gaps).
Something like:
SELECT array_agg(r.reg_count)
FROM partner.partner_statistic
WHERE id % 5 = 0
LIMIT 100
probably meets your needs.
If you want to return the value, use RETURN QUERY SELECT ... or preferably use a simple sql language function.
If you want a dynamic table name, use:
RETURN QUERY EXECUTE format('
SELECT array_agg(r.reg_count)
FROM %I
WHERE id % 5 = 0
LIMIT 100', table_name::regclass);