PostgreSQL: Slow performance of user-defined function - sql

My function named stat() reads from 2 tables on PostgreSQL 11.
Table T has ~1,000,000 rows, the table D has ~3,000 rows.
My function stat() runs 1.5 secs and it is slow for my use-case:
select * from stat('2019-01-01', '2019-10-01','UTC');
To improve performance I tried to create different indexes (code below), but it did not help.
I was able to improve performance when I put the hardcoded numbers '2019-01-01', '2019-10-01' instead time_start and time_end in the body of stat().
In this case it runs 0.5 sec. But this is not the solution.
CREATE TABLE T(
id SERIAL PRIMARY KEY,
time TIMESTAMP WITH TIME zone NOT NULL,
ext_id INTEGER
);
CREATE TABLE D(
id SERIAL PRIMARY KEY,
time TIMESTAMP WITH TIME zone NOT NULL,
ext_id INTEGER NOT NULL
);
CREATE INDEX t_time_idx ON T(time);
CREATE INDEX d_time_idx ON D(time);
CREATE INDEX t_ext_idx ON T(ext_id);
CREATE INDEX d_ext_idx ON D(ext_id);
CREATE OR REPLACE FUNCTION stat(time_start varchar, time_end varchar, tz varchar)
RETURNS TABLE (result float)
AS $$
DECLARE
time_points INTEGER;
capacity INTEGER;
BEGIN
time_points := 1000;
capacity := 12;
RETURN QUERY
SELECT (total::float / (capacity * time_points))::float as result
FROM (
SELECT count(*)::float AS total FROM T
INNER JOIN (
SELECT * FROM (
SELECT ext _id, name, ROW_NUMBER() OVER(PARTITION BY ext_id ORDER BY time desc) AS rk
FROM D WHERE time at time zone tz < time_end::timestamp
) InB WHERE rk = 1
) D_INFO
ON T.ext_id = D_INFO.ext_id
WHERE T.time at time zone tz between time_start::timestamp and time_end::timestamp
) B;
END;
$$
LANGUAGE plpgsql;
Usage:
select * from stat('2019-01-01', '2019-10-01','UTC'); --> takes 1.5 sec, too slow
What I tried:
ANALYZE T;
ANALYZE D;
I created different indexes for T and D tables
CREATE INDEX covering_t_time_ext_idx ON t(ext_id) INCLUDE (time);
CREATE INDEX t_time_ext_idx ON T(time) INCLUDE (ext_id);
CREATE INDEX t_time_ext_multicolumn_idx ON t(time, ext_id);
CREATE INDEX t_time_ext_multicolumn2_idx ON t(ext_id, time);
but it did not help to improve performance.

function.
CREATE OR REPLACE FUNCTION stat(time_start varchar, time_end varchar, tz varchar)
RETURNS TABLE (result float)
AS $$
DECLARE
time_points INTEGER;
capacity INTEGER;
BEGIN
time_points := 1000;
capacity := 12;
RETURN QUERY
SELECT (total::float / (capacity * time_points))::float as result
FROM (
SELECT count(*)::float AS total
FROM T
WHERE T.time at time zone tz between time_start::timestamp and time_end::timestamp
AND EXISTS (
SELECT 1
FROM D
WHERE D.ext_id = T.ext_id
AND D.time at time zone tz < time_end::timestamp
)
) B;
END;
$$
LANGUAGE plpgsql;

I solve this by casting the input parameters:
(time_start varchar, time_end varchar)
into intermediate variables with type timestamp:
DECLARE
start_time timestamp;
end_time timestamp;
BEGIN
start_time := time_start::timestamp;
end_time := time_end::timestamp;
and using these intermediate variables in the SQL instead doing this casting in SQL.

Related

ERROR: function pg_catalog.extract(unknown, integer) does not exist

I am writing an SQL query for creating the partitions which looks like:
DO
$$
DECLARE
table_name text := 'table_1';
start_date date := (SELECT MIN(create_date)
FROM db.table);
end_date date := (SELECT MAX(create_date)
FROM db.table);
partition_interval interval := '1 day';
partition_column_value text;
BEGIN
FOR partition_column_value IN SELECT start_date +
(generate_series * extract(day from partition_interval)::integer)::date
FROM generate_series(0, extract(day from end_date - start_date::date) /
extract(day from partition_interval))
LOOP
EXECUTE format(
'create table if not exists %1$s_%2$s partition of %1$s for values in (%2$s) partition by list (create_date)',
table_name, partition_column_value::date);
END LOOP;
END
$$;
I get an error:
[42883] ERROR: function pg_catalog.extract(unknown, integer) does not exist
Hint: No function matches the given name and argument types. You might need to add explicit type casts.
Where: PL/pgSQL function inline_code_block line 9 at FOR over SELECT rows
The immediate cause of the error msg is this:
extract(day from end_date - start_date::date)
It's nonsense to cast start_date::date, start_date being type date to begin with. More importantly, date - date yields integer (not interval like you might assume). And extract() does not operate on integer input.
I removed more confusion and noise to arrive at this:
DO
$do$
DECLARE
table_name text := 'table_1';
partition_interval integer := 1; -- given in days!!
start_date date;
end_date date;
partition_column_value text;
BEGIN
SELECT INTO start_date, end_date -- two assignments for the price of one
min(create_date), max(create_date)
FROM db.table;
FOR partition_column_value IN
SELECT start_date + g * partition_interval -- date + int → date
FROM generate_series(0, (end_date - start_date) -- date - date → int
/ partition_interval) g
LOOP
EXECUTE format(
'CREATE TABLE IF NOT EXISTS %1$I PARTITION OF %1$I
FOR VALUES IN (%3$L) PARTITION BY LIST (create_date)'
, table_name || to_char(partition_column_value, '"_"yyyymmdd') -- !
, table_name
, partition_column_value::text -- only covers single day!!
);
END LOOP;
END
$do$;
This should work.
But it only makes sense for the example interval of '1 day'. For longer intervals, concatenate the list of days per partition or switch to range partitioning ...

How to compare two values in PL/pgSQL?

I have this code where I try to load a dimension table if the date of the current day has not been loaded yet. It is not loading any record, is my comparison wrong?
CREATE OR REPLACE PROCEDURE load_dimDate()
LANGUAGE plpgsql AS
$$
DECLARE
_date date := get_fecha();
_year int = get_year();
_month int = get_month();
_day int = get_day();
BEGIN
if _date <> (SELECT MAX(date) from dimDate) then
INSERT INTO dimfechas(date, year, month, day)
VALUES(_date, _year, _month, _day);
end if;
END
$$;
Both the variable _date and the select statement are of type DATE.

Parse returned record type into multiple columns in PostgreSql

I have Postgresql function which returns multiple values as record. I need to accept record type and parse as multiple columns.
I call function inside select statement like below:
SELECT bank_id, myfunction(document_id,'DF') FROM timeline_chronicle WHERE document_id = 102;
Above statement returns like this:
--------------------------------------
bank_id | record
----------------------------------------
9006 | (DOCUMENT_IS_ACCEPTED,"2020-07-03 16:37:28","2020-07-03 16:41:58",270)
----------------------------------------------------------------------------------
But i need below version
--------------------------------------
bank_id | event_type | start_time | end_time | difference |
-----------------------------------------------------------------------
9006 | DOCUMENT_IS_ACCEPTED | 2020-07-03 16:37:28 | 2020-07-03 16:41:58 | 270
----------------------------------------
In addition I tried below version but anyway not working:
SELECT bank_id, myfunction(document_id,'DF') as (event_type text,start_time TIMESTAMP,end_time TIMESTAMP,difference integer) FROM timeline_chronicle WHERE document_id = 102;
Postgre Version is: 9.6.18. My Postgre Function is below:
CREATE OR REPLACE FUNCTION myfunction(doc_id integer,oper_name2 text)
RETURNS RECORD AS $$
DECLARE
start_tim timestamp(0) without time zone;
end_tim timestamp(0) without time zone;
event_typo text;
tim_diff integer;
ret RECORD;
BEGIN
start_tim := (SELECT to_timestamp(MIN(created_at))::timestamp FROM table WHERE document_id = 62);
SELECT event_type,created_at INTO event_typo,end_tim FROM (
SELECT document_id,oper_name,event_type,to_timestamp(created_at)::timestamp as created_at,
case when event_type in ('DOCUMENT_IS_DENY','DOCUMENT_IS_ACCEPTED') then 2 else 1 end as status
FROM table
WHERE document_id = 62 order by created_at asc
) s WHERE status = 2 order by created_at asc limit 1;
tim_diff:= (select extract(epoch from (end_tim-start_tim)));
SELECT event_typo,start_tim,end_tim,tim_diff INTO ret;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
You need a typed record in order to see column names. For that you need to create a proper return type:
create type function_result as
(
event_type text,
start_time TIMESTAMP,
end_time TIMESTAMP,
difference bigint
);
Then in your function use:
return (event_typo,start_tim,end_tim,tim_diff)::function_result;
(no need for the ret variable)
Then you can expand the record in the query into columns using:
SELECT bank_id, (myfunction(document_id,'DF')).*
FROM timeline_chronicle
WHERE document_id = 102;
Note that:
tim_diff := (select extract(epoch from (end_tim-start_tim)));
can be simplified to:
tim_diff := extract(epoch from (end_tim-start_tim));

PostgreSQL function Return table

i want to setup a function on PostgreSQL which returns a table. This is the source code of the function:
CREATE OR REPLACE FUNCTION feiertag(inDate Date)
RETURNS TABLE (eingabeDatum DATE, f_heute INT, f_1 INT, f_2 INT, f_3 INT, f_5 INT)
AS $$
DECLARE
f_heute integer := 0;
f_1 integer := 0;
f_2 integer := 0;
f_3 integer := 0;
f_5 integer := 0;
BEGIN
SELECT 1 INTO f_heute FROM feiertage where datum = inDate;
SELECT 1 INTO f_1 FROM feiertage where datum = (inDate + interval '1' day);
SELECT 1 INTO f_2 FROM feiertage where datum = (inDate + interval '2' day);
SELECT 1 INTO f_3 FROM feiertage where datum = (inDate + interval '3' day);
SELECT 1 INTO f_5 FROM feiertage where datum = (inDate + interval '5' day);
RETURN QUERY SELECT inDate as eingabeDatum, coalesce(f_heute, 0) as f_heute, coalesce(f_1,0) as f_1, coalesce(f_2,0) as f_2, coalesce(f_3,0) as f_3, coalesce(f_5,0) as f_5 ;
END;
$$ LANGUAGE plpgsql;
Calling the function returns only one column with ',' separated values:
psql (9.5.12)
Type "help" for help.
tarec=> select feiertag('2017-01-01');
feiertag
------------------------
(2017-01-01,1,0,0,0,0)
(1 row)
I expected differnt columns (one for each value as the table is specified at the beginning of the function) and not only one with all values. Does anybody know why this is happening and how i could fix this?
Thanks
Timo
Use
SELECT *
FROM feiertag('2017-01-01');
instead of
SELECT feiertag('2017-01-01');
to get the result as a table.
(Treat the function as if it were a table.)

Return multiple fields as a record in PostgreSQL with PL/pgSQL

I am writing a SP, using PL/pgSQL.
I want to return a record, comprised of fields from several different tables. Could look something like this:
CREATE OR REPLACE FUNCTION get_object_fields(name text)
RETURNS RECORD AS $$
BEGIN
-- fetch fields f1, f2 and f3 from table t1
-- fetch fields f4, f5 from table t2
-- fetch fields f6, f7 and f8 from table t3
-- return fields f1 ... f8 as a record
END
$$ language plpgsql;
How may I return the fields from different tables as fields in a single record?
[Edit]
I have realized that the example I gave above was slightly too simplistic. Some of the fields I need to be retrieving, will be saved as separate rows in the database table being queried, but I want to return them in the 'flattened' record structure.
The code below should help illustrate further:
CREATE TABLE user (id int, school_id int, name varchar(32));
CREATE TYPE my_type AS (
user1_id int,
user1_name varchar(32),
user2_id int,
user2_name varchar(32)
);
CREATE OR REPLACE FUNCTION get_two_users_from_school(schoolid int)
RETURNS my_type AS $$
DECLARE
result my_type;
temp_result user;
BEGIN
-- for purpose of this question assume 2 rows returned
SELECT id, name INTO temp_result FROM user where school_id = schoolid LIMIT 2;
-- Will the (pseudo)code below work?:
result.user1_id := temp_result[0].id ;
result.user1_name := temp_result[0].name ;
result.user2_id := temp_result[1].id ;
result.user2_name := temp_result[1].name ;
return result ;
END
$$ language plpgsql
Don't use CREATE TYPE to return a polymorphic result. Use and abuse the RECORD type instead. Check it out:
CREATE FUNCTION test_ret(a TEXT, b TEXT) RETURNS RECORD AS $$
DECLARE
ret RECORD;
BEGIN
-- Arbitrary expression to change the first parameter
IF LENGTH(a) < LENGTH(b) THEN
SELECT TRUE, a || b, 'a shorter than b' INTO ret;
ELSE
SELECT FALSE, b || a INTO ret;
END IF;
RETURN ret;
END;$$ LANGUAGE plpgsql;
Pay attention to the fact that it can optionally return two or three columns depending on the input.
test=> SELECT test_ret('foo','barbaz');
test_ret
----------------------------------
(t,foobarbaz,"a shorter than b")
(1 row)
test=> SELECT test_ret('barbaz','foo');
test_ret
----------------------------------
(f,foobarbaz)
(1 row)
This does wreak havoc on code, so do use a consistent number of columns, but it's ridiculously handy for returning optional error messages with the first parameter returning the success of the operation. Rewritten using a consistent number of columns:
CREATE FUNCTION test_ret(a TEXT, b TEXT) RETURNS RECORD AS $$
DECLARE
ret RECORD;
BEGIN
-- Note the CASTING being done for the 2nd and 3rd elements of the RECORD
IF LENGTH(a) < LENGTH(b) THEN
ret := (TRUE, (a || b)::TEXT, 'a shorter than b'::TEXT);
ELSE
ret := (FALSE, (b || a)::TEXT, NULL::TEXT);
END IF;
RETURN ret;
END;$$ LANGUAGE plpgsql;
Almost to epic hotness:
test=> SELECT test_ret('foobar','bar');
test_ret
----------------
(f,barfoobar,)
(1 row)
test=> SELECT test_ret('foo','barbaz');
test_ret
----------------------------------
(t,foobarbaz,"a shorter than b")
(1 row)
But how do you split that out in to multiple rows so that your ORM layer of choice can convert the values in to your language of choice's native data types? The hotness:
test=> SELECT a, b, c FROM test_ret('foo','barbaz') AS (a BOOL, b TEXT, c TEXT);
a | b | c
---+-----------+------------------
t | foobarbaz | a shorter than b
(1 row)
test=> SELECT a, b, c FROM test_ret('foobar','bar') AS (a BOOL, b TEXT, c TEXT);
a | b | c
---+-----------+---
f | barfoobar |
(1 row)
This is one of the coolest and most underused features in PostgreSQL. Please spread the word.
You need to define a new type and define your function to return that type.
CREATE TYPE my_type AS (f1 varchar(10), f2 varchar(10) /* , ... */ );
CREATE OR REPLACE FUNCTION get_object_fields(name text)
RETURNS my_type
AS
$$
DECLARE
result_record my_type;
BEGIN
SELECT f1, f2, f3
INTO result_record.f1, result_record.f2, result_record.f3
FROM table1
WHERE pk_col = 42;
SELECT f3
INTO result_record.f3
FROM table2
WHERE pk_col = 24;
RETURN result_record;
END
$$ LANGUAGE plpgsql;
If you want to return more than one record you need to define the function as returns setof my_type
Update
Another option is to use RETURNS TABLE() instead of creating a TYPE which was introduced in Postgres 8.4
CREATE OR REPLACE FUNCTION get_object_fields(name text)
RETURNS TABLE (f1 varchar(10), f2 varchar(10) /* , ... */ )
...
To return a single row
Simpler with OUT parameters:
CREATE OR REPLACE FUNCTION get_object_fields(_school_id int
, OUT user1_id int
, OUT user1_name varchar(32)
, OUT user2_id int
, OUT user2_name varchar(32)) AS
$func$
BEGIN
SELECT INTO user1_id, user1_name
u.id, u.name
FROM users u
WHERE u.school_id = _school_id
LIMIT 1; -- make sure query returns 1 row - better in a more deterministic way?
user2_id := user1_id + 1; -- some calculation
SELECT INTO user2_name
u.name
FROM users u
WHERE u.id = user2_id;
END
$func$ LANGUAGE plpgsql;
Call:
SELECT * FROM get_object_fields(1);
You don't need to create a type just for the sake of this plpgsql function. It may be useful if you want to bind multiple functions to the same composite type. Else, OUT parameters do the job.
There is no RETURN statement. OUT parameters are returned automatically with this form that returns a single row. RETURN is optional.
Since OUT parameters are visible everywhere inside the function body (and can be used just like any other variable), make sure to table-qualify columns of the same name to avoid naming conflicts! (Better yet, use distinct names to begin with.)
Simpler yet - also to return 0-n rows
Typically, this can be simpler and faster if queries in the function body can be combined. And you can use RETURNS TABLE() (since Postgres 8.4, long before the question was asked) to return 0-n rows.
The example from above can be written as:
CREATE OR REPLACE FUNCTION get_object_fields2(_school_id int)
RETURNS TABLE (user1_id int
, user1_name varchar(32)
, user2_id int
, user2_name varchar(32)) AS
$func$
BEGIN
RETURN QUERY
SELECT u1.id, u1.name, u2.id, u2.name
FROM users u1
JOIN users u2 ON u2.id = u1.id + 1
WHERE u1.school_id = _school_id
LIMIT 1; -- may be optional
END
$func$ LANGUAGE plpgsql;
Call:
SELECT * FROM get_object_fields2(1);
RETURNS TABLE is effectively the same as having a bunch of OUT parameters combined with RETURNS SETOF record, just shorter.
The major difference: this function can return 0, 1 or many rows, while the first version always returns 1 row.
Add LIMIT 1 like demonstrated to only allow 0 or 1 row.
RETURN QUERY is simple way to return results from a query directly.
You can use multiple instances in a single function to add more rows to the output.
db<>fiddle here (demonstrating both)
Varying row-type
If your function is supposed to dynamically return results with a different row-type depending on the input, read more here:
Refactor a PL/pgSQL function to return the output of various SELECT queries
If you have a table with this exact record layout, use its name as a type, otherwise you will have to declare the type explicitly:
CREATE OR REPLACE FUNCTION get_object_fields
(
name text
)
RETURNS mytable
AS
$$
DECLARE f1 INT;
DECLARE f2 INT;
…
DECLARE f8 INT;
DECLARE retval mytable;
BEGIN
-- fetch fields f1, f2 and f3 from table t1
-- fetch fields f4, f5 from table t2
-- fetch fields f6, f7 and f8 from table t3
retval := (f1, f2, …, f8);
RETURN retval;
END
$$ language plpgsql;
You can achieve this by using simply as a returns set of records using return query.
CREATE OR REPLACE FUNCTION schemaName.get_two_users_from_school(schoolid bigint)
RETURNS SETOF record
LANGUAGE plpgsql
AS $function$
begin
return query
SELECT id, name FROM schemaName.user where school_id = schoolid;
end;
$function$
And call this function as : select * from schemaName.get_two_users_from_school(schoolid) as x(a bigint, b varchar);
you can do this using OUT parameter and CROSS JOIN
CREATE OR REPLACE FUNCTION get_object_fields(my_name text, OUT f1 text, OUT f2 text)
AS $$
SELECT t1.name, t2.name
FROM table1 t1
CROSS JOIN table2 t2
WHERE t1.name = my_name AND t2.name = my_name;
$$ LANGUAGE SQL;
then use it as a table:
select get_object_fields( 'Pending') ;
get_object_fields
-------------------
(Pending,code)
(1 row)
or
select * from get_object_fields( 'Pending');
f1 | f
---------+---------
Pending | code
(1 row)
or
select (get_object_fields( 'Pending')).f1;
f1
---------
Pending
(1 row)
CREATE TABLE users(user_id int, school_id int, name text);
insert into users values (1, 10,'alice')
,(5, 10,'boy')
,(13, 10,'cassey')
,(17, 10,'delores')
,(4, 11,'elaine');
I setted the user_id as arbitrary int. The function input parameter is the school_id. So if the school_id is 10 you hope to get the following result:
user_id | name | user_id | name
---------+-------+---------+------
1 | alice | 5 | boy
So your query should be something like:
with a as (
select u1.user_id,
u1.name from users u1
where school_id = 10 order by user_id limit 1),
b as
(select u2.user_id,u2.name from users u2
where school_id = 10 order by user_id limit 1 offset 1 )
select * from a cross JOIN b ;
So let's wrap the query to the plpgsql function.
CREATE OR REPLACE FUNCTION
get_object_fields2(_school_id int)
RETURNS TABLE (user1_id int
, user1_name text
, user2_id int
, user2_name text)
LANGUAGE plpgsql AS
$func$
DECLARE countu integer;
BEGIN
countu := (
select count(*) from users where school_id = _school_id);
IF countu >= 2 THEN
RETURN QUERY
with a as (
select u1.user_id,
u1.name from users u1
where school_id = _school_id
order by user_id limit 1),
b as(
select u2.user_id,u2.name from users u2
where school_id = _school_id
order by user_id limit 1 offset 1 )
select * from a cross JOIN b;
elseif countu = 1 then
return query
select u1.user_id, u1.name,u1.user_id, u1.name
from users u1 where school_id = _school_id;
else
RAISE EXCEPTION 'not found';
end if;
END
$func$;