So far I have come up with this solution that needs further refinement (big thanks to #postgresql on freenode).
The problem I am trying to overcome is an efficient way of storing DNS records whilst maintaining some sort of history. The issue I am currently having is with the wCTE which is inserting new records and deleting old records correctly. It isn't, however, readding records. The wCTE is:
WITH deltas AS (
SELECT o, n FROM (
SELECT id, name, domain_id, class_id, addr FROM record WHERE tld_id = $1
) AS o FULL OUTER JOIN record_temp n
ON (
o.name = n.name AND
o.domain_id = n.domain_id AND
o.class_id = n.class_id AND
o.addr = n.addr
)
WHERE (o.name, o.domain_id, o.class_id, o.addr)
IS DISTINCT FROM (n.name, n.domain_id, n.class_id, n.addr)
), mark_dead AS (
UPDATE record SET alive = FALSE
WHERE id IN (
SELECT (o).id FROM deltas WHERE (o).id IS NOT NULL
) RETURNING *
)
INSERT INTO record (name, domain_id, tld_id, class_id, addr)
SELECT (n).name, (n).domain_id, (n).tld_id, (n).class_id, (n).addr
FROM deltas WHERE
(n).name IS NOT NULL AND
(n).domain_id IS NOT NULL AND
(n).tld_id IS NOT NULL AND
(n).class_id IS NOT NULL AND
(n).addr IS NOT NULL
;
The o result has all the old records that do not exist in record_temp, n has all the records that are new and need to be inserted. I expect I need to add another join which pulls in (an inner join?) results that exist on both tables (which if marked as dead, need to be marked as alive).
The rest of the schema for reference is:
CREATE TABLE record (
id SERIAL,
name VARCHAR(255),
domain_id INT,
tld_id INT,
class_id INT,
addr INET,
alive BOOLEAN DEFAULT TRUE,
PRIMARY KEY (id),
CONSTRAINT fk1 FOREIGN KEY (domain_id) REFERENCES domain (id) MATCH SIMPLE,
CONSTRAINT fk2 FOREIGN KEY (tld_id) REFERENCES tld (id) MATCH SIMPLE,
UNIQUE(name, domain_id, class_id, addr)
);
CREATE TABLE record_history (
id SERIAL,
record_id INT,
history_type record_history_type,
stamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT fk1 FOREIGN KEY (record_id) REFERENCES record (id) MATCH SIMPLE,
PRIMARY KEY(id)
);
CREATE TEMP TABLE record_temp (
name VARCHAR(255),
domain_id INT,
tld_id INT,
class_id INT,
addr INET,
UNIQUE(name, domain_id, class_id, addr)
)
ON COMMIT DROP;
record_history is populated using functions and triggers and is populating how I expect it to, below are these triggers:
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO record_history (record_id, history_type) VALUES (NEW.id, 'added');
RETURN NEW;
END;
$$ language 'plpgsql';
RETURNS TRIGGER AS $$
BEGIN
IF NEW.alive = OLD.alive THEN
RETURN NEW;
END IF;
IF NEW.alive THEN
INSERT INTO record_history (record_id, history_type) VALUES (NEW.id, 'added');
END IF;
IF NOT NEW.alive THEN
INSERT INTO record_history (record_id, history_type) VALUES (NEW.id, 'deleted');
END IF;
RETURN NEW;
END;
$$ language 'plpgsql';
ON record FOR EACH ROW EXECUTE PROCEDURE
add_insert_record_history();
ON record FOR EACH ROW EXECUTE PROCEDURE
add_update_record_history();
I seem to have it working how I want with the following query, which I feel is incredibly unoptimized:
WITH deltas AS (
SELECT o, n FROM (
SELECT id, name, domain_id, class_id, addr FROM record WHERE tld_id = $1
) AS o FULL OUTER JOIN record_temp n
ON (
o.name = n.name AND
o.domain_id = n.domain_id AND
o.class_id = n.class_id AND
o.addr = n.addr
)
WHERE (o.name, o.domain_id, o.class_id, o.addr)
IS DISTINCT FROM (n.name, n.domain_id, n.class_id, n.addr)
), mark_dead AS (
UPDATE record SET alive = FALSE
WHERE id IN (
SELECT (o).id FROM deltas WHERE (o).id IS NOT NULL
) RETURNING *
), mark_alive AS (
UPDATE record SET alive = TRUE
WHERE alive = FALSE AND id IN (
SELECT id FROM (
SELECT id, name, domain_id, class_id, addr FROM record WHERE tld_id = $1
) AS o INNER JOIN record_temp n
ON (
o.name = n.name AND
o.domain_id = n.domain_id AND
o.class_id = n.class_id AND
o.addr = n.addr
)
) RETURNING *
)
INSERT INTO record (name, domain_id, tld_id, class_id, addr)
SELECT (n).name, (n).domain_id, (n).tld_id, (n).class_id, (n).addr
FROM deltas WHERE
(n).name IS NOT NULL AND
(n).domain_id IS NOT NULL AND
(n).tld_id IS NOT NULL AND
(n).class_id IS NOT NULL AND
(n).addr IS NOT NULL
;
Related
THIS is what i try and error
postgres=# INSERT INTO cs222p_interchange.Ad(ad_id, plan, content, pic_num, item_id, seller_user_id, placed_date)
postgres-# VALUES ('ADT32457', 'Gold', 'New games available!', 1, 'F7E1N', '4Z5VC', '2022-11-06');
ERROR: record "new" has no field "user_id"
CONTEXT: SQL statement "INSERT INTO TargetedAds(ad_id, user_id)
SELECT NEW.ad_id, NEW.user_id WHERE (
SELECT category
FROM cs222p_interchange.item i
JOIN cs222p_interchange.ad a ON i.item_id = a.item_id
WHERE ad_id = NEW.ad_id AND (buyer_user_id = NEW.user_id OR seller_user_id = NEW.user_id) )
LIKE (
SELECT category
FROM cs222p_interchange.User u
JOIN cs222p_interchange.Categories c ON u.user_id = c.user_id
WHERE user_id = NEW.user_id)
ON CONFLICT DO NOTHING"
PL/pgSQL function addad() line 1 at SQL statement
This is the trigger
CREATE FUNCTION AddAd() RETURNS Trigger AS
$$
BEGIN
INSERT INTO TargetedAds(ad_id, user_id)
SELECT NEW.ad_id, NEW.user_id
WHERE (
SELECT *
FROM cs222p_interchange.item i
JOIN cs222p_interchange.ad a ON i.item_id = a.item_id
WHERE ad_id = NEW.ad_id AND (buyer_user_id = NEW.user_id OR seller_user_id = NEW.user_id) )
= (
SELECT category
FROM cs222p_interchange.User u
JOIN cs222p_interchange.Categories c ON u.user_id = c.user_id
WHERE user_id = NEW.user_id)
ON CONFLICT DO NOTHING;
RETURN NEW;
END;
$$
LANGUAGE PLPGSQL;
CREATE TRIGGER TargetedAdsLogger AFTER INSERT ON cs222p_interchange.Ad FOR EACH ROW EXECUTE FUNCTION AddAd();
This is the Table
CREATE TABLE TargetedAds(
ad_id text,
user_id text,
PRIMARY KEY (ad_id, user_id),
FOREIGN KEY (ad_id) REFERENCES cs222p_interchange.Ad
(ad_id) ON DELETE CASCADE,
FOREIGN KEY (user_id) REFERENCES cs222p_interchange.Seller(user_id) ON DELETE CASCADE
);
This is what i need to insert
INSERT INTO cs222p_interchange.Ad(ad_id, plan, content, pic_num, item_id, seller_user_id, placed_date)
VALUES ('ADT32457', 'Gold', 'New games available!', 1, 'F7E1N', '4Z5VC', '2022-11-06');
This is the definition of the Ad table:
CREATE TABLE cs222p_interchange.Ad(
ad_id text NOT NULL,
plan text NOT NULL ,
content text ,
pic_num int NOT NULL,
item_id text NOT NULL,
seller_user_id text NOT NULL,
placed_date date NOT NULL,
PRIMARY KEY (ad_id),
FOREIGN KEY(item_id) REFERENCES cs222p_interchange.Item(item_id) ON DELETE CASCADE,
FOREIGN KEY(pic_num, item_id) REFERENCES cs222p_interchange.Picture(pic_num, item_id) ON DELETE CASCADE,
FOREIGN KEY(seller_user_id) REFERENCES cs222p_interchange.Seller(user_id) ON DELETE CASCADE
);
I have checked multiple times and all of my tables and columns are existing.
I need to create a procedure to treat this case in the most performative way possible (is an extremely large amount of data).
I have a table called ORDER_A that every day receives a full load (its truncated, and all records are inserted again).
I have a table called ORDER_B which is a copy of ORDER_A, containing the same data and some additional control dates.
I also have a table MANAGER to save start and finish date, and if the procedure is running.
After all inserts are done in ORDER_A, i want to execute a procedure that, for each record on ORDER_A, must looks for a record with the same identifier (primary key: order_id) in table B.
If a record exists with the same order_id, and any of the other columns have changed, an update must be performed on table B
If a record exists with the same order_id, and no values in the other columns have been modified, nothing should be performed, the record must remain the same in table B.
If there is no record with the same order_id, it must be inserted in table B.
If there is a record on ORDER_B that no longer exists on ORDER_A (it was deleted), the column "flag_deleted" must be updated to "1".
My tables are like this
CREATE TABLE ORDER_A
(
ORDER_ID NUMBER NOT NULL,
ORDER_CODE VARCHAR2(50),
ORDER_STATUS VARCHAR2(20),
ORDER_USER_ID NUMBER,
ORDER_DATE TIMESTAMP(6),
CHECKSUM_CODE VARCHAR2(40),
PRIMARY KEY (ORDER_ID)
);
CREATE TABLE ORDER_B
(
ORDER_ID NUMBER NOT NULL,
ORDER_CODE VARCHAR2(50),
ORDER_STATUS VARCHAR2(20),
ORDER_USER_ID NUMBER,
ORDER_DATE TIMESTAMP(6)
INSERT_AT TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP,
UPDATED_AT TIMESTAMP(6),
CHECKSUM_CODE VARCHAR2(40),
FLAG_DELETED NUMBER(1),
PRIMARY KEY (ORDER_ID)
);
-- index on checksum column for both tables
CREATE INDEX idx_cksum on ORDER_A (CHECKSUM_CODE ASC);
CREATE INDEX idx_cksum on ORDER_B (CHECKSUM_CODE ASC);
-- Manager table
CREATE TABLE MANAGER
(
TABLE_NAME VARCHAR2(40),
PROCEDURE_NAME VARCHAR2(50),
START_TS TIMESTAMP(6),
FINISH_TS TIMESTAMP(6),
IS_RUNNING NUMBER(1)
);
I'm thinking in something like this procedure below, but I'm not sure if it's the best way and how to deal with the delete case
create or replace procedure MERGE_DATA_ORDER
DECLARE
is_running number;
ex_running EXCEPTION;
BEGIN
SELECT IS_RUNNING INTO is_running FROM MANAGER WHERE PROCEDURE_NAME = 'MERGE_DATA_ORDER';
IF is_running = 1
then RAISE ex_running
ELSE
-- Update the flag on manager table
UPDATE MANAGER SET IS_RUNNING = 1, START_TS = SYSTIMESTAMP WHERE PROCEDURE_NAME = 'MERGE_DATA_ORDER';
COMMIT;
-- update all records with a checksum using STANDARD_HASH with MD5
UPDATE ORDER_A
SET CHECKSUM_CODE =
STANDARD_HASH
(
ORDER_ID ||
ORDER_CODE ||
ORDER_STATUS ||
ORDER_USER_ID ||
ORDER_DATE,
'MD5'
);
COMMIT;
-- then, I do a MERGE operation, using the checksum as a comparator
merge into ORDER_B b
using (select a.* from ORDER_A a) m
on (m.ORDER_ID = b.ORDER_ID)
when matched then
update
set
b.ORDER_ID = m.ORDER_ID,
b.ORDER_CODE = m.ORDER_CODE,
b.ORDER_STATUS = m.ORDER_STATUS,
b.ORDER_USER_ID = m.ORDER_USER_ID,
b.ORDER_DATE = m.ORDER_DATE,
b.COD_CHECKSUM = m.COD_CHECKSUM,
b.DAT_UPDATE = SYSTIMESTAMP
where b.CHECKSUM_CODE <> m.CHECKSUM_CODE
when not matched then
insert (
b.ORDER_ID,
b.ORDER_CODE,
b.ORDER_STATUS,
b.ORDER_USER_ID,
b.ORDER_DATE,
b.COD_CHECKSUM
)
values (
m.ORDER_ID,
m.ORDER_CODE,
m.ORDER_STATUS,
m.ORDER_USER_ID,
m.ORDER_DATE,
m.COD_CHECKSUM
);
END IF;
-- set the flag to 0
UPDATE MANAGER SET IS_RUNNING = 0, FINISH_TS = SYSTIMESTAMP WHERE PROCEDURE_NAME = 'MERGE_DATA_ORDER';
COMMIT;
END;
/
I need some help to complete this code, performance tips and deal with the delete issue;
I think you can do this as a single statement as part of the data load. Let's assume that ORDER_A has been loaded (but I will comment on that later). Then you can define the result of the insert/update by doing a full outer join between ORDER_A and ORDER_B, and the use a CASE statement to project the "correct" value from ORDER_A or ORDER_B. Similarly you can project the FLAG_DELTED. It would look something like this. In this example, I am skipping the MD5, but this could be added if really needed - more on that later too
select
case
when ( b.order_id is null ) then a.order_id
else case when (
b.ORDER_ID != m.ORDER_ID or
b.ORDER_CODE != m.ORDER_CODE or
b.ORDER_STATUS != m.ORDER_STATUS or
b.ORDER_USER_ID != m.ORDER_USER_ID or
b.ORDER_DATE != m.ORDER_DATE or
b.DAT_UPDATE != SYSTIMESTAMP ) then b.order_id else a.order_id end
end as newOrder_id
, case when ( b.order_id is null ) then a.order_code
else case when (
b.ORDER_ID != m.ORDER_ID or
b.ORDER_CODE != m.ORDER_CODE or
b.ORDER_STATUS != m.ORDER_STATUS or
b.ORDER_USER_ID != m.ORDER_USER_ID or
b.ORDER_DATE != m.ORDER_DATE or
b.DAT_UPDATE != SYSTIMESTAMP ) then b.order_code else a.order_code end
end as newOrder_code
, case when ( b.order_id is null ) then a.order_status
else case when (
b.ORDER_ID != m.ORDER_ID or
b.ORDER_CODE != m.ORDER_CODE or
b.ORDER_STATUS != m.ORDER_STATUS or
b.ORDER_USER_ID != m.ORDER_USER_ID or
b.ORDER_DATE != m.ORDER_DATE or
b.DAT_UPDATE != SYSTIMESTAMP ) then b.order_status else a.order_status end
end as newOrder_status
/* etc... ( Repeat for all projected columns )
Then for the flag_deleted column */
, case when ( a.order_id is null ) then 1
when ( b.order_id is null ) then 0
else b.flag_deleted
end as newFlag_deleted
from Order_b b
full outer join Order_a a
on b.order_id = a.order_id
It may be possible that ORDER_A could be an external table, so then you would just need to prepend this with a
CREATE TABLE NEW_ORDER_A as select....
And then you have the results you need.
Where you a hemorrhaging performance in your example, is the update of ORDER_A. You are generating redo, undo and losing any compression benefits. You are also maintaining indexes, but indexes are not needed.
Assuming you have resources, you cab now use DIRECT PATH and parallelism, and this would scale pretty well.
Lastly, if you really do need the MD5, you need to add a special character between each column, otherwise is will be ambiguous. For example, the following woukd have the same MD5
COL1 COL2
AA BBB
AAB BB
I am implementing a library management system in SQL. I have the following table structure and some values inserted in them:
create table books
(
IdBook number(5),
NameBook varchar2(35),
primary key(IdBook)
);
create table users
(
IdUsers number(5),
NameUser varchar2(20),
primary key(IdUsers)
);
create table borrowed
(
IdBorrowed number(5),
IdUsers number(5),
IdBook number(5),
DueDate date,
DateReturned date,
constraint fk_borrowed foreign key(IdUsers) references users(IdUsers),
constraint fk_borrowed2 foreign key(IdBook) references books(IdBook)
);
insert into books values(0,'FairyTale');
insert into books values(1,'Crime and Punishment');
insert into books values(2,'Anna Karenina');
insert into books values(3,'Norwegian Wood');
insert into users values(01,'Robb Dora');
insert into users values(02,'Pop Alina');
insert into users values(03,'Grozavescu Teodor');
insert into users values(04,'Popa Alin');
insert into borrowed values(10,02,3,'22-Jan-2017',null);
insert into borrowed values(11,01,1,'25-Jan-2017','19-Dec-2016');
insert into borrowed values(12,01,3,'22-Jan-2017',null);
insert into borrowed values(13,04,2,'22-Jan-2017','13-Dec-2016');
What I want now is that my db to allow "borrowing" books for the users(i.e insert into the borrowed table) that have no unreturned books(i.e date returned is not null) and if they have unreturned books I want to abandon the whole process. I thought to implement this in the following way:
create or replace procedure borrowBook(IdBorrowed in number,IdUsers number,IdBook number,DueDate date,DateReturned date) as begin
if exists (SELECT u.IdUsers, u.NameUser, b.DateReturned
FROM users u, borrowed b
WHERE u.IDUSERS = b.IdUsers and DateReturned is not null),
insert into borrowed values(IdBorrowed,IdUsers,IdBook,DueDate,DateReturned);
end borrowBook;
The above procedure does not check if the parameter I pass to this function is the same as the one in my select and I do not know how to do this and correctly insert a value in my table.
Any help would be much appreciated. Thank in advance!
You should not name your parameters the same as columns also used inside the procedure.
You can also simplify your procedure to a single INSERT statement, no IF required:
create or replace procedure borrowBook(p_idborrowed in number, p_idusers number, p_idbook number, p_duedate date, p_datereturned date)
as
begin
insert into borrowed (idborrowed, idusers, idbook, duedate, datereturned)
select p_idborrowed, p_idusers, p_idbook, p_duedate, p_datereturned
from dual
where not exists (select *
from users u
join borrowed b on u.idusers = b.idusers
and b.datereturned is not null);
end borrowBook;
It's also good coding style to explicitly list the columns for an INSERT statement. And you should get used to the explicit JOIN operator instead of using implicit joins in the where clause.
What about this one:
create or replace procedure borrowBook( p_IdBorrowed in number ,
p_IdUsers number ,
p_IdBook number ,
p_DueDate date ,
p_DateReturned date )
as
begin
if (SELECT COUNT(*)
FROM borrowed
WHERE IDUSERS = p_IdUsers
AND DateReturned IS NULL) = 0 THEN
insert into borrowed values (p_IdBorrowed ,
p_IdUsers ,
p_IdBook ,
p_DueDate ,
p_DateReturned );
end if ;
end borrowBook;
You would seem to want something like this:
create or replace procedure borrowBook (
in_IdBorrowed in number,
in_IdUsers number,
in_IdBook number,
in_DueDate date,
in_DateReturned date
) as
v_flag number;
begin
select (case when exists (select 1
from borrowed b
where b.IdUsers = in_IdUsers and b.DateReturned is not null
)
then 1 else 0
end)
into v_flag
from dual;
if (flag = 0) then
insert into borrowed
values(in_IdBorrowed, in_IdUsers, in_IdBook, in_DueDate, v_DateReturned);
end if
end -- borrowBook;
I have a fully working SQL query. However, it is very very slow. I am looking for a way to optimize it.
CREATE TABLE trajectory_geom (
id SERIAL PRIMARY KEY,
trajectory_id BIGINT,
user_id BIGINT,
geom GEOMETRY(Linestring, 4326)
);
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326)
FROM point p
GROUP BY p.trajectory_id
;
DO $$
DECLARE
urow record;
vrow record;
wrow record;
BEGIN
FOR wrow IN
SELECT DISTINCT(p.user_id) FROM point p
LOOP
raise notice 'User id: %', wrow.user_id;
FOR vrow IN
SELECT DISTINCT(p.trajectory_id) FROM point p WHERE p.user_id = wrow.user_id
LOOP
FOR urow IN
SELECT
analyzed_tr.*
FROM trajectory_start_end_geom analyzed_tr
WHERE
analyzed_tr.user_id = wrow.user_id
AND
ST_Intersects (
(
analyzed_tr.start_geom
)
,
(
SELECT g.geom
FROM trajectory_geom g
WHERE g.trajectory_id = vrow.trajectory_id
)
) = TRUE
LOOP
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
wrow.user_id
WHERE urow.trajectory_id <> vrow.trajectory_id
;
END LOOP;
END LOOP;
END LOOP;
END;
$$;
It has 3 loops...how can I avoid them?
Basically, I am looping all user IDs, for each user looping all trajectories and checking is trajectory interact with any other trajectory of this user.
Schema:
CREATE TABLE public.trajectory_start_end_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_start_end_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
start_geom geometry(Polygon,4326),
end_geom geometry(Polygon,4326),
CONSTRAINT trajectory_start_end_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.trajectory_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
geom geometry(LineString,4326),
CONSTRAINT trajectory_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.point
(
id integer NOT NULL DEFAULT nextval('point_id_seq'::regclass),
user_id bigint,
date date,
"time" time without time zone,
lat double precision,
lon double precision,
trajectory_id integer,
geom geometry(Geometry,4326),
CONSTRAINT point_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
Try this SQL query. Hope this helps.
INSERT INTO trajectories_intercepting_with_starting_point
(initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
TG.trajectory_id AS first_trajectory_id,
TG2.trajectory_id AS last_trajectory_id,
TG.user_id
FROM Trajectory_geom AS TG
JOIN Trajectory_geom AS TG2 ON TG.user_id = TG2.user_id
AND TG.trajectory_id < TG2.trajectory_id
JOIN Trajectory_start_end_geom AS TSE ON TSE.trajectory_id = TG.trajectory_id
WHERE ST_Intersects(TSE.start_geom, TG2.geom) = TRUE
This should do the trick:
WITH vrow AS(
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326) AS geom
FROM point p
GROUP BY p.trajectory_id
RETURNING trajectory_id, user_id, geom
)
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
vrow.user_id
FROM trajectory_start_end_geom AS urow
JOIN vrow
ON urow.user_id = vrow.user_id
AND urow.trajectory_id <> vrow.trajectory_id
AND ST_Intersects(urow.start_geom, vrow.geom)
If you don't need insert into trajectory_geom eliminating it (and the CTE) will speed it up
Simple intro:
I have a database with users and groups.
Every user might be a member of one or more groups.
Every group might have one or more parent groups.
Schema:
CREATE TABLE users(
username VARCHAR(64) NOT NULL PRIMARY KEY,
password VARCHAR(64) NOT NULL,
enabled BOOLEAN NOT NULL);
CREATE TABLE groups (
id bigserial NOT NULL PRIMARY KEY,
group_name VARCHAR(64) NOT NULL);
CREATE TABLE groups_inheritance (
group_id bigint NOT NULL,
parent_group_id bigint NOT NULL,
CONSTRAINT fk_group_inheritance_group FOREIGN KEY(group_id) REFERENCES groups(id),
CONSTRAINT fk_group_inheritance_group_2 FOREIGN KEY(parent_group_id) REFERENCES groups(id),
CONSTRAINT unique_uk_groups_inheritance UNIQUE(group_id, parent_group_id));
CREATE TABLE group_members (
id bigint PRIMARY KEY,
username VARCHAR(64) NOT NULL,
group_id bigint NOT NULL,
CONSTRAINT fk_group_members_username FOREIGN KEY(username) REFERENCES users(username),
CONSTRAINT fk_group_members_group FOREIGN KEY(group_id) REFERENCES groups(id));
I'm looking for a PL/pgSQL function which finds all groups (their names) particular user belongs to.
Example:
group name: People,
group parent: null
group name: Students,
group parent: People
group name: Football_players,
group parent: People
group name: Basketball_players,
group parent: People
user name: Maciej,
groups : Students, Football_players
f("Maciej") = {"Students", "People", "Football_players"}
He belongs to "People" just because he belongs to "Students" or "Football_players". He is not a direct member of "People" group.
Thanks in advance!
WITH RECURSIVE group_ancestry AS (
SELECT group_id, username
FROM group_members
UNION
SELECT groups_inheritance.parent_group_id, username
FROM group_ancestry
JOIN groups_inheritance ON groups_inheritance.group_id = group_ancestry.group_id
)
SELECT username, group_id
FROM group_ancestry
If you have just one level of inheritance (as in example), then you could use such query:
WITH group_ids AS
(
SELECT group_id
FROM group_members
WHERE username LIKE 'Maciej'
)
SELECT group_name
FROM
(SELECT group_id FROM group_ids
UNION
SELECT DISTINCT parent_group_id
FROM groups_inheritance INNER JOIN group_ids USING(group_id)) g
INNER JOIN groups ON id = group_id;
Result:
group_name
------------------
People
Students
Football_players
(3 rows)
PL/pgSQL function:
DROP FUNCTION IF EXISTS f(varchar(64));
CREATE FUNCTION f(username varchar(64))
RETURNS text[] AS $$
DECLARE
gId bigint;
pgId bigint;
gName text;
result text[] = '{}';
BEGIN
FOR gId IN SELECT group_id FROM group_members WHERE username LIKE username
LOOP
SELECT INTO gName group_name FROM groupS WHERE id = gId;
result := result || gName;
FOR pgId IN SELECT parent_group_id FROM groups_inheritance WHERE group_id = gId
LOOP
SELECT INTO gName group_name FROM groups WHERE id = pgId;
IF NOT (result #> ARRAY[gName]) THEN
result := result || gName;
END IF;
END LOOP;
END LOOP;
RETURN result;
END $$
LANGUAGE 'plpgsql';
Result:
SELECT f('Maciej');
f
------------------------------------
{Students,People,Football_players}
(1 row)
However for nested parent groups I think that recursion should be suitable.
EDIT:
Here is recursion-based variant for nested parent groups:
CREATE OR REPLACE FUNCTION f_recursive(gIdParam bigint, resultArrayParam bigint[])
RETURNS bigint[] AS $$
DECLARE
pgId bigint;
resultArray bigint[];
BEGIN
FOR pgId IN SELECT parent_group_id FROM groups_inheritance WHERE group_id = gIdParam
LOOP
IF NOT (resultArrayParam #> ARRAY[pgId]) THEN
resultArray := resultArray || pgId;
resultArray := resultArray || f_recursive(pgId, resultArray);
END IF;
END LOOP;
RETURN resultArray;
END $$
LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION f(usernameParam varchar(64))
RETURNS text[] AS $$
DECLARE
gId bigint;
resultArray bigint[];
BEGIN
FOR gId IN SELECT group_id FROM group_members WHERE username LIKE usernameParam
LOOP
resultArray := resultArray || gId;
resultArray := resultArray || f_recursive(gId, resultArray);
END LOOP;
RETURN array_agg(group_name)
FROM groups INNER JOIN (SELECT unnest(resultArray)) u ON unnest = id;
END $$
LANGUAGE 'plpgsql';
Example insert:
INSERT INTO groups (id, group_name) VALUES
(1, 'People'), (2, 'Workers'), (3, 'Programmers'),
(4, 'AI-Programmers'), (5, 'Administators'), (6, 'Managers');
INSERT INTO groups_inheritance (group_id, parent_group_id) VALUES
(2, 1), (3, 2), (4, 3), (5, 2), (6, 2);
INSERT INTO users (username, password, enabled) VALUES
('Maciej', '12345', true);
INSERT INTO group_members (id, username, group_id) VALUES
(1, 'Maciej', 4), (2, 'Maciej', 5);
Result:
SELECT f('Maciej');
f
-----------------------------------------------------------
{AI-Programmers,Programmers,Workers,People,Administators}
(1 row)
Another way is to use WITH query along with RECURSIVE modifier as #araqnid shown.