Postgresql function (upsert and delete): how to pass a set of rows of table type to function call - sql

I have a table
CREATE TABLE items(
id SERIAL PRIMARY KEY,
group_id INT NOT NULL,
item_id INT NOT NULL,
name TEXT,
.....
.....
);
I am creating a function that
takes set of row values for a single group_id, fail if multiple group_ids present in in input rows
compares it with matching values in the table (only for that group_id
updates changed values (only for the input group_id)
inserts new values
deletes table rows that are absent in the row input (compare rows with group_id and item_id)(only for the input group_id)
this is my function definition
CREATE OR REPLACE FUNCTION update_items(rows_input items[]) RETURNS boolean as $$
DECLARE
rows items[];
group_id_input integer;
BEGIN
-- get single group_id from input rows, fail if multiple group_id's present in input
-- read items of that group_id in table
-- compare input rows and table rows (of the same group_id)
-- create transaction
-- delete absent rows
-- upsert
-- return success of transaction (boolean)
END;
$$ LANGUAGE plpgsql;
I am trying to call the function in a query
select update_items(
(38,1,1283,"Name1"),
(39,1,1471,"Name2"),
(40,1,1333,"Name3")
);
I get the following error
Failed to run sql query: column "Name1" does not exist
I tried removing the id column values: that gives me the same error
What is the correct way to pass row values to a function that accepts table type array as arguments?

updates changed values
inserts new values deletes table rows that are
absent in the row input (compare rows with group_id and item_id)
If you want do upsert, you must upsert with unique constraint.
So there is two unique constraints. primary key(id), (group_id, item_id).
insert on conflict need consider these two unique constraint.
Since You want pass items[] type to the functions. So it also means that any id that is not in the input function arguments will also be deleted.
drop table if exists items cascade;
begin;
CREATE TABLE items(
id bigint GENERATED BY DEFAULT as identity PRIMARY KEY,
group_id INT NOT NULL,
item_id INT NOT NULL,
name TEXT
,unique(group_id,item_id)
);
insert into items values
(38,1,1283,'original_38'),
(39,1,1471,'original_39'),
(40,1,1333,'original_40'),
(42,1,1332,'original_42');
end;
main function:
CREATE OR REPLACE FUNCTION update_items (in_items items[])
RETURNS boolean
AS $FUNC$
DECLARE
iter items;
saved_ids bigint[];
BEGIN
saved_ids := (SELECT ARRAY (SELECT (unnest(in_items)).id));
DELETE FROM items
WHERE NOT (id = ANY (saved_ids));
FOREACH iter IN ARRAY in_items LOOP
INSERT INTO items
SELECT
iter.*
ON CONFLICT (id)
DO NOTHING;
INSERT INTO items
SELECT
iter.*
ON CONFLICT (group_id,
item_id)
DO UPDATE SET
name = EXCLUDED.name;
RAISE NOTICE 'rec.groupid: %, rec.items_id:%', iter.group_id, iter.item_id;
END LOOP;
RETURN TRUE;
END
$FUNC$
LANGUAGE plpgsql;
call it:
SELECT
*
FROM
update_items ('{"(38, 1, 1283, Name1) "," (39, 1, 1471, Name2) "," (40, 1, 1333, Name3)"}'::items[]);
references:
Iterating over integer[] in PL/pgSQL
How to match elements in an array of composite type?
IN vs ANY operator in PostgreSQL

Here's how I achieved UPSERT with DELETE missing rows, if anyone is looking to do the same.
CREATE OR REPLACE FUNCTION update_items(in_rows items[]) RETURNS INT AS $$
DECLARE
in_groups INTEGER[];
in_group_id INTEGER;
in_item_ids INTEGER[];
BEGIN
-- get single group id from input rows, fail if multiple group ids present in input
in_groups = (SELECT ARRAY (SELECT distinct(group_id) FROM UNNEST(in_rows)));
IF ARRAY_LENGTH(in_groups,1)>1 THEN
RAISE EXCEPTION 'Multiple group_ids found in input items: %', in_groups;
END IF;
in_group_id = in_groups[1];
-- delete items of this group that are absent in in_rows
in_item_ids := (SELECT ARRAY (SELECT (UNNEST(in_rows)).item_id));
DELETE FROM items
WHERE
master_code <> ANY (in_item_ids)
AND group_id = in_group_id;
-- upsert in_rows
INSERT INTO items
SELECT * FROM UNNEST(in_rows)
ON CONFLICT (group_id,item_d)
DO UPDATE SET
parent_group_id = EXCLUDED.parent_group_id,
mat_centre_id = EXCLUDED.mat_centre_id,
NAME = EXCLUDED.NAME,
opening_date = EXCLUDED.opening_date;
RETURN in_group_id;
-- return success of transaction (boolean)
END;
$$ LANGUAGE plpgsql;
This function removes rows that are missing from your in_rows

Related

Postgres UNIQUE CONSTRAINT/INDEX for string array

I'm trying to prevent the user to insert more then 1 unique array of strings into the table.
I have created a Unique Constraint on the array: CONSTRAINT users_uniq UNIQUE(usersArray),
but the user can still insert the same values to the array but in a different order.
My table:
id
usersArray
1
{011,123}
2
{123,011} // should not be possible
Input : {011,123} --> error unique // the right error
Input : {123,011} --> Worked // Should have return an error instead
How can I make the value {123,011} and {011,123} considered the same?
The trigger solution is not transparent as it is actually modifying the data. Here is an alternative. Create array_sort helper function (it might be useful for other cases too) and an unique index using it.
create or replace function array_sort (arr anyarray) returns anyarray immutable as
$$
select array_agg(x order by x) from unnest(arr) x;
$$ language sql;
create table t (arr integer[]);
create unique index tuix on t (array_sort(arr));
Demo
insert into t values ('{1,2,3}'); -- OK
insert into t values ('{2,1,3}'); -- unique violation
select * from t;
arr
{1,2,3}
A trigger which enforces the order of the items in the array could be one approach. Here's an example:
The fiddle
CREATE TABLE test ( arr int ARRAY, unique (arr) );
CREATE FUNCTION test_insert_trig_func()
RETURNS trigger AS $$
BEGIN
NEW.arr := ARRAY(SELECT unnest(NEW.arr) ORDER BY 1);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER test_insert_trig
BEFORE INSERT ON test
FOR EACH ROW
EXECUTE PROCEDURE test_insert_trig_func()
;
INSERT INTO test VALUES ('{1, 2}');
INSERT INTO test VALUES ('{2, 1}'); -- Generates a unique constraint violation
SELECT * FROM test;
The result:
arr
{1,2}

What are the different ways of adding a constraint so that only items that are available on the order date can be inserted?

order.date must be between item.date_from and item.date_to... what are the different ways of doing that?
CREATE TABLE "item" (
"id" SERIAL PRIMARY KEY,
"date_from" DATE NOT NULL,
"date_to" DATE NOT NULL
);
CREATE TABLE "order" (
"id" SERIAL PRIMARY KEY,
"date" DATE NOT NULL
);
CREATE TABLE "order_item" (
"order" INTEGER NOT NULL REFERENCES "order",
"item" INTEGER NOT NULL REFERENCES "item"
);
Check constraints work on simple expressions. For example, a simple sanity check on the order: check( date > '2010-01-01'). There's also exclusion constraints which check no two rows have the same value as defined by the exclusion. But, with the exception of foreign key constraints, constraints don't query other tables.
You can solve this with a trigger on insert and update, and I'll go into that below, but its better to solve this sort of problem with referential integrity. However, I can't think of a way to do that.
You can make a view of available items for the order. Here $1 is the date of the order.
create temporary view items_available_to_order
select *
-- pluralize table names to avoid conflicting with keywords and columns
from items
-- date_from and date_to has become a single daterange when_available
where items.when_available #> $1
Then only insert items from that view.
If you want to go the trigger route (you can do both) write a function which checks whether an order's item is valid. It either raises an exception or returns a trigger. new is the inserted row, or the row after an update.
I changed some of the table and column names and types to avoid common pitfalls.
create function check_item_order_is_valid()
returns trigger
language 'plpgsql'
as $body$
declare
item_is_available boolean;
begin
select
items.when_available #> orders.ordered_on into item_is_available
from item_orders
join items on items.id = new.order_id
join orders on orders.id = new.item_id;
if( not item_is_available) then
raise exception 'Item #% is not available for order #%',
new.item_id, new.order_id;
end if;
return new;
end
$body$
Then define a trigger to call the function when rows are inserted or updated in the item/order table.
create trigger check_item_orders
before insert or update
on item_orders
for each row
execute function check_item_order_is_valid();
Demonstration.
What if the valid range of an item changes? You need an update trigger on item to check that its orders are still valid. Maybe. Depends on your business logic.
A test example:
CREATE OR REPLACE FUNCTION public.item_date()
RETURNS trigger
LANGUAGE plpgsql
AS $function$
DECLARE
order_date date;
from_date date;
to_date date;
BEGIN
select into order_date "date" from "order" where id = new.order;
select into from_date, to_date date_from, date_to from item where id = new.item;
--Use date range to test whether order date is in item date range.
if order_date <# daterange(from_date, to_date, '[]') then
return new;
else
return null;
end if;
END;
$function$
create trigger item_date_check before insert or update on order_item for each row execute function item_date();
insert into item values (1, '09/01/2021', '10/31/2021');
insert into item values (2, '07/01/2021', '08/31/2021');
insert into "order" values (1, '09/05/2021');
insert into order_item values (1, 1);
NOTICE: Order date 2021-09-05, from_date 2021-09-01, to_date 2021-10-31
INSERT 0 1
--Returning NULL causes the INSERT not to happen.
insert into order_item values (1, 2);
NOTICE: Order date 2021-09-05, from_date 2021-07-01, to_date 2021-08-31
INSERT 0 0
Note that I had to quote "order" as that is a reserved word also. You might to take a look at Key(reserved) Words. For range functions/operators see Range Function. For general information on range(s) see Range Types

Is SELECT "faster" than function with nested INSERT?

I'm using a function that inserts a row to a table if it doesn't exist, then returns the id of the row.
Whenever I put the function inside a SELECT statement, with values that don't exist in the table yet, e.g.:
SELECT * FROM table WHERE id = function(123);
... it returns an empty row. However, running it again with the same values will return the row with the values I want to see.
Why does this happen? Is the INSERT running behind the SELECT speed? Or does PostgreSQL cache the table when it didn't exist, and at next run, it displays the result?
Here's a ready to use example of how this issue can occur:
CREATE TABLE IF NOT EXISTS test_table(
id INTEGER,
tvalue boolean
);
CREATE OR REPLACE FUNCTION test_function(user_id INTEGER)
RETURNS integer
LANGUAGE 'plpgsql'
AS $$
DECLARE
__user_id INTEGER;
BEGIN
EXECUTE format('SELECT * FROM test_table WHERE id = $1')
USING user_id
INTO __user_id;
IF __user_id IS NOT NULL THEN
RETURN __user_id;
ELSE
INSERT INTO test_table(id, tvalue)
VALUES (user_id, TRUE)
RETURNING id
INTO __user_id;
RETURN __user_id;
END IF;
END;
$$;
Call:
SELECT * FROM test_table WHERE id = test_function(4);
To reproduce the issue, pass any integer that doesn't exist in the table, yet.
The example is broken in multiple places.
No need for dynamic SQL with EXECUTE.
SELECT * in the function is wrong.
Your table definition should have a UNIQUE or PRIMARY KEY constraint on (id).
Most importantly, the final SELECT statement is bound to fail. Since the function is VOLATILE (has to be), it is evaluated once for every existing row in the table. Even if that worked, it would be a performance nightmare. But it does not. Like #user2864740 commented, there is also a problem with visibility. Postgres checks every existing row against the result of the function, which in turn adds 1 or more rows, and those rows are not yet in the snapshot the SELECT is operating on.
SELECT * FROM test_table WHERE id = test_function(4);
This would work (but see below!):
CREATE TABLE test_table (
id int PRIMARY KEY --!
, tvalue bool
);
CREATE OR REPLACE FUNCTION test_function(_user_id int)
RETURNS test_table LANGUAGE sql AS
$func$
WITH ins AS (
INSERT INTO test_table(id, tvalue)
VALUES (_user_id, TRUE)
ON CONFLICT DO NOTHING
RETURNING *
)
TABLE ins
UNION ALL
SELECT * FROM test_table WHERE id = _user_id
LIMIT 1
$func$;
And replace your SELECT with just:
SELECT * FROM test_function(1);
db<>fiddle here
Related:
Return a value if no record is found
How to use RETURNING with ON CONFLICT in PostgreSQL?
There is still a race condition for concurrent calls. If that can happen, consider:
Is SELECT or INSERT in a function prone to race conditions?

How insert rows with max(order_field) + 1 transactionally in PostgreSQL

I need to insert in a PostgreSQL table a row with a column containing the max value + 1 for this same column on a subset of the rows of the table. That column is used to ordering the rows in that subset.
I´m trying to update the column value in an after insert trigger but I´m obtaining duplicate values for this column in different rows.
What´s the best way to do that avoiding duplicate values for the ordering column in the subset in a concurrent environment with a lot of inserts in a short time?
Thanks in advance
EDIT:
The subset is defined by another column of the same table: this column has the same value for all the related rows.
If that column is used only for ordering then use a sequence:
create table t (
column1 integer,
ordering_column serial
);
http://www.postgresql.org/docs/current/static/datatype-numeric.html#DATATYPE-NUMERIC-TABLE
New transactional-safe answer:
To make it in a transactional-safe way you could use this trigger, which creates sequences for each different "set_id" value:
create or replace function calculate_index() returns trigger
as $$
declare my_indexer_name text;
begin
my_indexer_name = 'my_indexer_name_' || NEW.my_set_id;
if NOT EXISTS (SELECT * FROM pg_class WHERE relname = my_indexer_name)
then
execute 'create sequence ' || my_indexer_name;
end if;
select nextval(my_indexer_name) into NEW.my_index;
return new;
end
$$
language plpgsql;
CREATE TRIGGER my_indexer_trigger
BEFORE INSERT ON my_table FOR EACH ROW
EXECUTE PROCEDURE calculate_index();
Also you could create manually sequences named 'my_indexer_name_1', 'my_indexer_name_2', etc. if your set_id possible values are known beforehand, then you could eliminate the if-then from the trigger function above.
This was my initial and not transactional-safe answer:
I would create a new helper table let's call it set_indexes:
create table set_indexes( set_id integer, max_index integer );
each record has the set_id and the max index value of that set. e.g.:
set_id, max_index
1 53
2 12
3 43
in the trigger code you would:
select max_index + 1 from set_indexes where set_indexes.set_id = NEW.my_set_id
into NEW.my_index;
// Chek if the set_id is new:
if NEW.my_index is null then
insert into set_indexes( set_id, max_index) values (NEW.my_set_id, 1);
NEW.my_index = 0;
else
update set_indexes set max_index = NEW.my_index where set_indexes.set_id = NEW.my_set_id;
end if;

Loop in function does not work as expected

Using PostgreSQL 9.0.4
Below is a very similar structure of my table:
CREATE TABLE departamento
(
id bigserial NOT NULL,
master_fk bigint,
nome character varying(100) NOT NULL
CONSTRAINT departamento_pkey PRIMARY KEY (id),
CONSTRAINT departamento_master_fk_fkey FOREIGN KEY (master_fk)
REFERENCES departamento (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
And the function I created:
CREATE OR REPLACE FUNCTION fn_retornar_dptos_ate_raiz(bigint[])
RETURNS bigint[] AS
$BODY$
DECLARE
lista_ini_dptos ALIAS FOR $1;
dp_row departamento%ROWTYPE;
dpto bigint;
retorno_dptos bigint[];
BEGIN
BEGIN
PERFORM id FROM tbl_temp_dptos;
EXCEPTION
WHEN undefined_table THEN
EXECUTE 'CREATE TEMPORARY TABLE tbl_temp_dptos (id bigint NOT NULL) ON COMMIT DELETE ROWS';
END;
FOR i IN array_lower(lista_ini_dptos, 1)..array_upper(lista_ini_dptos, 1) LOOP
SELECT id, master_fk INTO dp_row FROM departamento WHERE id=lista_ini_dptos[i];
IF dp_row.id IS NOT NULL THEN
EXECUTE 'INSERT INTO tbl_temp_dptos VALUES ($1)' USING dp_row.id;
WHILE dp_row.master_fk IS NOT NULL LOOP
dpto := dp_row.master_fk;
SELECT id, master_fk INTO dp_row FROM departamento WHERE id=lista_ini_dptos[i];
EXECUTE 'INSERT INTO tbl_temp_dptos VALUES ($1)' USING dp_row.id;
END LOOP;
END IF;
END LOOP;
RETURN ARRAY(SELECT id FROM tbl_temp_dptos);
END;
$BODY$
LANGUAGE plpgsql VOLATILE
Any questions about the names I can translate ..
What is the idea of the function? I first check if the temporary table already exists (perform), and when the exception occurs I create a temporary table.
Then I take each element in the array and use it to fetch the id and master_fk of a department. If the search is successful (check if id is not null, it is even unnecessary) I insert the id in the temporary table and start a new loop.
The second loop is intended to get all parents of that department which was previously found by performing the previous steps (ie, pick a department and insert it into the temporary table).
At the end of the second loop returns to the first. When this one ends I return bigint[] refers to what was recorded in the temporary table.
My problem is that the function returns me the same list I provide. What am I doing wrong?
There is a lot I would do differently, and to great effect.
Table definition
Starting with the table definition and naming conventions. These are mostly just opinions:
CREATE TEMP TABLE conta (conta_id bigint primary key, ...);
CREATE TEMP TABLE departamento (
dept_id serial PRIMARY KEY
, master_id int REFERENCES departamento (dept_id)
, conta_id bigint NOT NULL REFERENCES conta (conta_id)
, nome text NOT NULL
);
Major points
Are you sure you need a bigserial for departments? There are hardly that many on this planet. A plain serial should suffice.
I hardly ever use character varying with a length restriction. Unlike with some other RDBMS there is no performance gain whatsoever by using a restriction. Add a CHECK constraint if you really need to enforce a maximum length. I just use text, mostly and save myself the trouble.
I suggest a naming convention where the foreign key column shares the name with the referenced column, so master_id instead of master_fk, etc. Also allows to use USING in joins.
And I rarely use the non-descriptive column name id. Using dept_id instead here.
PL/pgSQL function
It can be largely simplified to:
CREATE OR REPLACE FUNCTION f_retornar_plpgsql(lista_ini_depts VARIADIC int[])
RETURNS int[] AS
$func$
DECLARE
_row departamento; -- %ROWTYPE is just noise
BEGIN
IF NOT EXISTS ( -- simpler in 9.1+, see below
SELECT FROM pg_catalog.pg_class
WHERE relnamespace = pg_my_temp_schema()
AND relname = 'tbl_temp_dptos') THEN
CREATE TEMP TABLE tbl_temp_dptos (dept_id bigint NOT NULL)
ON COMMIT DELETE ROWS;
END IF;
FOR i IN array_lower(lista_ini_depts, 1) -- simpler in 9.1+, see below
.. array_upper(lista_ini_depts, 1) LOOP
SELECT * INTO _row -- since rowtype is defined, * is best
FROM departamento
WHERE dept_id = lista_ini_depts[i];
CONTINUE WHEN NOT FOUND;
INSERT INTO tbl_temp_dptos VALUES (_row.dept_id);
LOOP
SELECT * INTO _row
FROM departamento
WHERE dept_id = _row.master_id;
EXIT WHEN NOT FOUND;
INSERT INTO tbl_temp_dptos
SELECT _row.dept_id
WHERE NOT EXISTS (
SELECT FROM tbl_temp_dptos
WHERE dept_id =_row.dept_id);
END LOOP;
END LOOP;
RETURN ARRAY(SELECT dept_id FROM tbl_temp_dptos);
END
$func$ LANGUAGE plpgsql;
Call:
SELECT f_retornar_plpgsql(2, 5);
Or:
SELECT f_retornar_plpgsql(VARIADIC '{2,5}');
ALIAS FOR $1 is outdated syntax and discouraged. Use function parameters instead.
The VARIADIC parameter makes it more convenient to call. Related:
Pass multiple values in single parameter
You don't need EXECUTE for queries without dynamic elements. Nothing to gain here.
You don't need exception handling to create a table. Quoting the manual here:
Tip: A block containing an EXCEPTION clause is significantly more
expensive to enter and exit than a block without one. Therefore, don't
use EXCEPTION without need.
Postgres 9.1 or later has CREATE TEMP TABLE IF NOT EXISTS. I use a workaround for 9.0 to conditionally create the temp table.
Postgres 9.1 also offer FOREACH to loop through an arrays.
All that said, here comes the bummer: you don't need most of this.
SQL function with rCTE
Even in Postgres 9.0, a recursive CTE makes this a whole lot simpler:
CREATE OR REPLACE FUNCTION f_retornar_sql(lista_ini_depts VARIADIC int[])
RETURNS int[] AS
$func$
WITH RECURSIVE cte AS (
SELECT dept_id, master_id
FROM unnest($1) AS t(dept_id)
JOIN departamento USING (dept_id)
UNION ALL
SELECT d.dept_id, d.master_id
FROM cte
JOIN departamento d ON d.dept_id = cte.master_id
)
SELECT ARRAY(SELECT DISTINCT dept_id FROM cte) -- distinct values
$func$ LANGUAGE sql;
Same call.
Closely related answer with explanation:
Tree Structure and Recursion
SQL Fiddle demonstrating both.
I managed to fix my code. At the end of this response is its final form, but if you have any suggestions for improvement are welcome. Here are the changes:
1 - I have provided the essential structure of my table, but in reality it is much bigger. Before master_fk field, there is a field called account_fk, and because of the variable department dp_row%**ROWTYPE** the entire structure of my table is copied to the variable, so if I fill only the first two fields, i.e., id and account_fk, then master_fk that is the third field will be null.
2 - #Nicolas was right, and I ended up using the variable dpto for the second loop. And I had forgotten to fill it inside the loop. Besides using it in the search done within the loop.
3 - I added an if statement to make sure that would not have duplicates in the temporary table.
Correction in the structure of my table:
CREATE TABLE departamento
(
id bigserial NOT NULL,
account_fk bigint NOT NULL,
master_fk bigint,
nome character varying(100) NOT NULL,
CONSTRAINT departamento_pkey PRIMARY KEY (id),
CONSTRAINT departamento_account_fk_fkey FOREIGN KEY (account_fk)
REFERENCES conta (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT departamento_master_fk_fkey FOREIGN KEY (master_fk)
REFERENCES departamento (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
My function as it is now:
CREATE OR REPLACE FUNCTION fn_retornar_dptos_ate_raiz(bigint[]) RETURNS bigint[] AS
$BODY$
DECLARE
lista_ini_dptos ALIAS FOR $1;
dp_row departamento%ROWTYPE;
dpto bigint;
BEGIN
BEGIN
PERFORM id FROM tbl_temp_dptos;
EXCEPTION
WHEN undefined_table THEN
EXECUTE 'CREATE TEMPORARY TABLE tbl_temp_dptos (id bigint NOT NULL) ON COMMIT DELETE ROWS';
END;
FOR i IN array_lower(lista_ini_dptos, 1)..array_upper(lista_ini_dptos, 1) LOOP
SELECT id, conta_fk, master_fk INTO dp_row FROM departamento WHERE id=lista_ini_dptos[i];
EXECUTE 'INSERT INTO tbl_temp_dptos VALUES ($1)' USING dp_row.id;
dpto := dp_row.master_fk;
-- RAISE NOTICE 'dp_row: (%); ', dp_row.master_fk;
WHILE dpto IS NOT NULL LOOP
SELECT id, conta_fk, master_fk INTO dp_row FROM departamento WHERE id=dpto;
IF NOT(select exists(select 1 from tbl_temp_dptos where id=dp_row.id limit 1)) THEN
EXECUTE 'INSERT INTO tbl_temp_dptos VALUES ($1)' USING dp_row.id;
END IF;
dpto := dp_row.master_fk;
-- RAISE NOTICE 'dp_row: (%); ', dp_row.master_fk;
END LOOP;
END LOOP;
RETURN ARRAY(SELECT id FROM tbl_temp_dptos);
END;
$BODY$
LANGUAGE plpgsql VOLATILE