Postgres UNIQUE CONSTRAINT/INDEX for string array - sql

I'm trying to prevent the user to insert more then 1 unique array of strings into the table.
I have created a Unique Constraint on the array: CONSTRAINT users_uniq UNIQUE(usersArray),
but the user can still insert the same values to the array but in a different order.
My table:
id
usersArray
1
{011,123}
2
{123,011} // should not be possible
Input : {011,123} --> error unique // the right error
Input : {123,011} --> Worked // Should have return an error instead
How can I make the value {123,011} and {011,123} considered the same?

The trigger solution is not transparent as it is actually modifying the data. Here is an alternative. Create array_sort helper function (it might be useful for other cases too) and an unique index using it.
create or replace function array_sort (arr anyarray) returns anyarray immutable as
$$
select array_agg(x order by x) from unnest(arr) x;
$$ language sql;
create table t (arr integer[]);
create unique index tuix on t (array_sort(arr));
Demo
insert into t values ('{1,2,3}'); -- OK
insert into t values ('{2,1,3}'); -- unique violation
select * from t;
arr
{1,2,3}

A trigger which enforces the order of the items in the array could be one approach. Here's an example:
The fiddle
CREATE TABLE test ( arr int ARRAY, unique (arr) );
CREATE FUNCTION test_insert_trig_func()
RETURNS trigger AS $$
BEGIN
NEW.arr := ARRAY(SELECT unnest(NEW.arr) ORDER BY 1);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER test_insert_trig
BEFORE INSERT ON test
FOR EACH ROW
EXECUTE PROCEDURE test_insert_trig_func()
;
INSERT INTO test VALUES ('{1, 2}');
INSERT INTO test VALUES ('{2, 1}'); -- Generates a unique constraint violation
SELECT * FROM test;
The result:
arr
{1,2}

Related

Postgresql function (upsert and delete): how to pass a set of rows of table type to function call

I have a table
CREATE TABLE items(
id SERIAL PRIMARY KEY,
group_id INT NOT NULL,
item_id INT NOT NULL,
name TEXT,
.....
.....
);
I am creating a function that
takes set of row values for a single group_id, fail if multiple group_ids present in in input rows
compares it with matching values in the table (only for that group_id
updates changed values (only for the input group_id)
inserts new values
deletes table rows that are absent in the row input (compare rows with group_id and item_id)(only for the input group_id)
this is my function definition
CREATE OR REPLACE FUNCTION update_items(rows_input items[]) RETURNS boolean as $$
DECLARE
rows items[];
group_id_input integer;
BEGIN
-- get single group_id from input rows, fail if multiple group_id's present in input
-- read items of that group_id in table
-- compare input rows and table rows (of the same group_id)
-- create transaction
-- delete absent rows
-- upsert
-- return success of transaction (boolean)
END;
$$ LANGUAGE plpgsql;
I am trying to call the function in a query
select update_items(
(38,1,1283,"Name1"),
(39,1,1471,"Name2"),
(40,1,1333,"Name3")
);
I get the following error
Failed to run sql query: column "Name1" does not exist
I tried removing the id column values: that gives me the same error
What is the correct way to pass row values to a function that accepts table type array as arguments?
updates changed values
inserts new values deletes table rows that are
absent in the row input (compare rows with group_id and item_id)
If you want do upsert, you must upsert with unique constraint.
So there is two unique constraints. primary key(id), (group_id, item_id).
insert on conflict need consider these two unique constraint.
Since You want pass items[] type to the functions. So it also means that any id that is not in the input function arguments will also be deleted.
drop table if exists items cascade;
begin;
CREATE TABLE items(
id bigint GENERATED BY DEFAULT as identity PRIMARY KEY,
group_id INT NOT NULL,
item_id INT NOT NULL,
name TEXT
,unique(group_id,item_id)
);
insert into items values
(38,1,1283,'original_38'),
(39,1,1471,'original_39'),
(40,1,1333,'original_40'),
(42,1,1332,'original_42');
end;
main function:
CREATE OR REPLACE FUNCTION update_items (in_items items[])
RETURNS boolean
AS $FUNC$
DECLARE
iter items;
saved_ids bigint[];
BEGIN
saved_ids := (SELECT ARRAY (SELECT (unnest(in_items)).id));
DELETE FROM items
WHERE NOT (id = ANY (saved_ids));
FOREACH iter IN ARRAY in_items LOOP
INSERT INTO items
SELECT
iter.*
ON CONFLICT (id)
DO NOTHING;
INSERT INTO items
SELECT
iter.*
ON CONFLICT (group_id,
item_id)
DO UPDATE SET
name = EXCLUDED.name;
RAISE NOTICE 'rec.groupid: %, rec.items_id:%', iter.group_id, iter.item_id;
END LOOP;
RETURN TRUE;
END
$FUNC$
LANGUAGE plpgsql;
call it:
SELECT
*
FROM
update_items ('{"(38, 1, 1283, Name1) "," (39, 1, 1471, Name2) "," (40, 1, 1333, Name3)"}'::items[]);
references:
Iterating over integer[] in PL/pgSQL
How to match elements in an array of composite type?
IN vs ANY operator in PostgreSQL
Here's how I achieved UPSERT with DELETE missing rows, if anyone is looking to do the same.
CREATE OR REPLACE FUNCTION update_items(in_rows items[]) RETURNS INT AS $$
DECLARE
in_groups INTEGER[];
in_group_id INTEGER;
in_item_ids INTEGER[];
BEGIN
-- get single group id from input rows, fail if multiple group ids present in input
in_groups = (SELECT ARRAY (SELECT distinct(group_id) FROM UNNEST(in_rows)));
IF ARRAY_LENGTH(in_groups,1)>1 THEN
RAISE EXCEPTION 'Multiple group_ids found in input items: %', in_groups;
END IF;
in_group_id = in_groups[1];
-- delete items of this group that are absent in in_rows
in_item_ids := (SELECT ARRAY (SELECT (UNNEST(in_rows)).item_id));
DELETE FROM items
WHERE
master_code <> ANY (in_item_ids)
AND group_id = in_group_id;
-- upsert in_rows
INSERT INTO items
SELECT * FROM UNNEST(in_rows)
ON CONFLICT (group_id,item_d)
DO UPDATE SET
parent_group_id = EXCLUDED.parent_group_id,
mat_centre_id = EXCLUDED.mat_centre_id,
NAME = EXCLUDED.NAME,
opening_date = EXCLUDED.opening_date;
RETURN in_group_id;
-- return success of transaction (boolean)
END;
$$ LANGUAGE plpgsql;
This function removes rows that are missing from your in_rows

Dynamic query that uses CTE gets "syntax error at end of input"

I have a table that looks like this:
CREATE TABLE label (
hid UUID PRIMARY KEY DEFAULT UUID_GENERATE_V4(),
name TEXT NOT NULL UNIQUE
);
I want to create a function that takes a list of names and inserts multiple rows into the table, ignoring duplicate names, and returns an array of the IDs generated for the rows it inserted.
This works:
CREATE OR REPLACE FUNCTION insert_label(nms TEXT[])
RETURNS UUID[]
AS $$
DECLARE
ids UUID[];
BEGIN
CREATE TEMP TABLE tmp_names(name TEXT);
INSERT INTO tmp_names SELECT UNNEST(nms);
WITH new_names AS (
INSERT INTO label(name)
SELECT tn.name
FROM tmp_names tn
WHERE NOT EXISTS(SELECT 1 FROM label h WHERE h.name = tn.name)
RETURNING hid
)
SELECT ARRAY_AGG(hid) INTO ids
FROM new_names;
DROP TABLE tmp_names;
RETURN ids;
END;
$$ LANGUAGE PLPGSQL;
I have many tables with the exact same columns as the label table, so I would like to have a function that can insert into any of them. I'd like to create a dynamic query to do that. I tried that, but this does not work:
CREATE OR REPLACE FUNCTION insert_label(h_tbl REGCLASS, nms TEXT[])
RETURNS UUID[]
AS $$
DECLARE
ids UUID[];
query_str TEXT;
BEGIN
CREATE TEMP TABLE tmp_names(name TEXT);
INSERT INTO tmp_names SELECT UNNEST(nms);
query_str := FORMAT('WITH new_names AS ( INSERT INTO %1$I(name) SELECT tn.name FROM tmp_names tn WHERE NOT EXISTS(SELECT 1 FROM %1$I h WHERE h.name = tn.name) RETURNING hid)', h_tbl);
EXECUTE query_str;
SELECT ARRAY_AGG(hid) INTO ids FROM new_names;
DROP TABLE tmp_names;
RETURN ids;
END;
$$ LANGUAGE PLPGSQL;
This is the output I get when I run that function:
psql=# select insert_label('label', array['how', 'now', 'brown', 'cow']);
ERROR: syntax error at end of input
LINE 1: ...SELECT 1 FROM label h WHERE h.name = tn.name) RETURNING hid)
^
QUERY: WITH new_names AS ( INSERT INTO label(name) SELECT tn.name FROM tmp_names tn WHERE NOT EXISTS(SELECT 1 FROM label h WHERE h.name = tn.name) RETURNING hid)
CONTEXT: PL/pgSQL function insert_label(regclass,text[]) line 19 at EXECUTE
The query generated by the dynamic SQL looks like it should be exactly the same as the query from static SQL.
I got the function to work by changing the return value from an array of UUIDs to a table of UUIDs and not using CTE:
CREATE OR REPLACE FUNCTION insert_label(h_tbl REGCLASS, nms TEXT[])
RETURNS TABLE (hid UUID)
AS $$
DECLARE
query_str TEXT;
BEGIN
CREATE TEMP TABLE tmp_names(name TEXT);
INSERT INTO tmp_names SELECT UNNEST(nms);
query_str := FORMAT('INSERT INTO %1$I(name) SELECT tn.name FROM tmp_names tn WHERE NOT EXISTS(SELECT 1 FROM %1$I h WHERE h.name = tn.name) RETURNING hid', h_tbl);
RETURN QUERY EXECUTE query_str;
DROP TABLE tmp_names;
RETURN;
END;
$$ LANGUAGE PLPGSQL;
I don't know if one way is better than the other, returning an array of UUIDs or a table of UUIDs, but at least I got it to work one of those ways. Plus, possibly not using a CTE is more efficient, so it may be better to stick with the version that returns a table of UUIDs.
What I would like to know is why the dynamic query did not work when using a CTE. The query it produced looked like it should have worked.
If anyone can let me know what I did wrong, I would appreciate it.
... why the dynamic query did not work when using a CTE. The query it produced looked like it should have worked.
No, it was only the CTE without (required) outer query. (You had SELECT ARRAY_AGG(hid) INTO ids FROM new_names in the static version.)
There are more problems, but just use this query instead:
INSERT INTO label(name)
SELECT unnest(nms)
ON CONFLICT DO NOTHING
RETURNING hid;
label.name is defined UNIQUE NOT NULL, so this simple UPSERT can replace your function insert_label() completely.
It's much simpler and faster. It also defends against possible duplicates from within your input array that you didn't cover, yet. And it's safe under concurrent write load - as opposed to your original, which might run into race conditions. Related:
How to use RETURNING with ON CONFLICT in PostgreSQL?
I would just use the simple query and replace the table name.
But if you still want a dynamic function:
CREATE OR REPLACE FUNCTION insert_label(_tbl regclass, _nms text[])
RETURNS TABLE (hid uuid)
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN QUERY EXECUTE format(
$$
INSERT INTO %s(name)
SELECT unnest($1)
ON CONFLICT DO NOTHING
RETURNING hid
$$, _tbl)
USING _nms;
END
$func$;
If you don't need an array as result, stick with the set (RETURNS TABLE ...). Simpler.
Pass values (_nms) to EXECUTE in a USING clause.
The tablename (_tbl) is type regclass, so the format specifier %I for format() would be wrong. Use %s instead. See:
Table name as a PostgreSQL function parameter

How to impose this exclusion constraint?

I have a key-value table.
CREATE TABLE keyvalues (
key TEXT NOT NULL,
value TEXT
)
I want to impose a constraint that if a key has an entry with NULL value, it cannot have any other entries.
How do I do that?
To clarify:
I want to allow ("key1", "value1"), ("key1", "value2"). But if I have ("key2", NULL), I want to not allow ("key2", "value3").
You can use a trigger, like this:
CREATE OR REPLACE FUNCTION trigger_function()
RETURNS trigger
LANGUAGE plpgsql
AS $function$
begin
if exists (select 1 from keyvalues key = new.key and value is null) then
RAISE EXCEPTION 'Key-value not allowed';
end if;
RETURN new;
end;
$function$
;
Then you create the trigger on the table
CREATE TRIGGER trigger_on_table
BEFORE INSERT OR UPDATE
ON keyvalues
FOR EACH ROW
EXECUTE PROCEDURE trigger_function();
And test it:
insert INTO keyvalues
SELECT 'a','a'
OK
insert INTO keyvalues
SELECT 'a','b'
OK
insert INTO keyvalues
SELECT 'b',null
OK
insert INTO keyvalues
SELECT 'b','b'
ERROR: Key-value not allowed

PostgreSQL transactional DDL and to_regclass

Following the suggestion at this question, I'm using the to_regclass function to check if a table exists, creating it if it doesn't. However, it appears that if the table was created in the current transaction, to_regclass still returns null.
Is this behaviour expected? Or is this a bug?
Detail
Here's a short example of where this goes wrong:
begin;
create schema test;
create table test.test ( id serial, category integer );
create or replace function test.test_insert () returns trigger as $$
declare
child_table_name text;
table_id text;
begin
child_table_name = concat('test.test_', text(new.category));
table_id = to_regclass(child_table_name::cstring);
if table_id is null then
execute format('create table %I ( primary key (id), check ( category = %L ) ) inherits (test.test)', child_table_name, new.category);
end if;
execute format ('insert into %I values ($1.*)', child_table_name) using new;
return null;
end;
$$ language plpgsql;
create trigger test_insert before insert on test.test for each row execute procedure test.test_insert();
insert into test.test (category) values (1);
insert into test.test (category) values (1);
insert into test.test (category) values (1);
commit;
You're using the %I format specifier incorrectly.
If your category is 1, then you end up calling to_regclass('test.test_1'), i.e. checking for the table test_1 in schema test.
However, format('create table %I', 'test.test_1') will treat the format argument as a single identifier and quote it accordingly, evaluating to 'create table "test.test_1"'. This will create a table called "test.test_1" in your default schema (probably public).
Instead, you need to treat your schema and table names as separate identifiers. Define your table name as:
child_table_name = format('test.%I', 'test_' || new.category);
... and when building your SQL strings, just substitute this value directly (i.e. with %s rather than %I).

How to return a record from function, executed by INSERT/UPDATE rule (trigger)?

Do the following scheme for my database:
create sequence data_sequence;
create table data_table
{
id integer primary key;
field varchar(100);
};
create view data_view as
select id, field from data_table;
create function data_insert(_new data_view) returns data_view as
$$declare
_id integer;
_result data_view%rowtype;
begin
_id := nextval('data_sequence');
insert into data_table(id, field) values(_id, _new.field);
select * into _result from data_view where id = _id;
return _result;
end;
$$
language plpgsql;
create rule insert as on insert to data_view do instead
select data_insert(new);
Then type in psql:
insert into data_view(field) values('abc');
Would like to see something like:
id | field
----+---------
1 | abc
Instead see:
data_insert
-------------
(1, "abc")
Is it possible to fix this somehow?
Thanks for any ideas.
Ultimate idea is to use this in other functions, so that I could obtain id of just inserted record without selecting for it from scratch. Something like:
insert into data_view(field) values('abc') returning id into my_variable
would be nice but doesn't work with error:
ERROR: cannot perform INSERT RETURNING on relation "data_view"
HINT: You need an unconditional ON INSERT DO INSTEAD rule with a RETURNING clause.
I don't really understand that HINT. I use PostgreSQL 8.4.
What you want to do is already built into postgres. It allows you to include a RETURNING clause on INSERT statements.
CREATE TABLE data_table (
id SERIAL,
field VARCHAR(100),
CONSTRAINT data_table_pkey PRIMARY KEY (id)
);
INSERT INTO data_table (field) VALUES ('testing') RETURNING id, field;
If you feel you must use a view, check this thread on the postgres mailing list before going any further.