How to compare two values in PL/pgSQL? - sql

I have this code where I try to load a dimension table if the date of the current day has not been loaded yet. It is not loading any record, is my comparison wrong?
CREATE OR REPLACE PROCEDURE load_dimDate()
LANGUAGE plpgsql AS
$$
DECLARE
_date date := get_fecha();
_year int = get_year();
_month int = get_month();
_day int = get_day();
BEGIN
if _date <> (SELECT MAX(date) from dimDate) then
INSERT INTO dimfechas(date, year, month, day)
VALUES(_date, _year, _month, _day);
end if;
END
$$;
Both the variable _date and the select statement are of type DATE.

Related

ERROR: function pg_catalog.extract(unknown, integer) does not exist

I am writing an SQL query for creating the partitions which looks like:
DO
$$
DECLARE
table_name text := 'table_1';
start_date date := (SELECT MIN(create_date)
FROM db.table);
end_date date := (SELECT MAX(create_date)
FROM db.table);
partition_interval interval := '1 day';
partition_column_value text;
BEGIN
FOR partition_column_value IN SELECT start_date +
(generate_series * extract(day from partition_interval)::integer)::date
FROM generate_series(0, extract(day from end_date - start_date::date) /
extract(day from partition_interval))
LOOP
EXECUTE format(
'create table if not exists %1$s_%2$s partition of %1$s for values in (%2$s) partition by list (create_date)',
table_name, partition_column_value::date);
END LOOP;
END
$$;
I get an error:
[42883] ERROR: function pg_catalog.extract(unknown, integer) does not exist
Hint: No function matches the given name and argument types. You might need to add explicit type casts.
Where: PL/pgSQL function inline_code_block line 9 at FOR over SELECT rows
The immediate cause of the error msg is this:
extract(day from end_date - start_date::date)
It's nonsense to cast start_date::date, start_date being type date to begin with. More importantly, date - date yields integer (not interval like you might assume). And extract() does not operate on integer input.
I removed more confusion and noise to arrive at this:
DO
$do$
DECLARE
table_name text := 'table_1';
partition_interval integer := 1; -- given in days!!
start_date date;
end_date date;
partition_column_value text;
BEGIN
SELECT INTO start_date, end_date -- two assignments for the price of one
min(create_date), max(create_date)
FROM db.table;
FOR partition_column_value IN
SELECT start_date + g * partition_interval -- date + int → date
FROM generate_series(0, (end_date - start_date) -- date - date → int
/ partition_interval) g
LOOP
EXECUTE format(
'CREATE TABLE IF NOT EXISTS %1$I PARTITION OF %1$I
FOR VALUES IN (%3$L) PARTITION BY LIST (create_date)'
, table_name || to_char(partition_column_value, '"_"yyyymmdd') -- !
, table_name
, partition_column_value::text -- only covers single day!!
);
END LOOP;
END
$do$;
This should work.
But it only makes sense for the example interval of '1 day'. For longer intervals, concatenate the list of days per partition or switch to range partitioning ...

How to create function in Vertca?

I began to study vertiсa. I tryis created customer function but getting error. Help me pliase
My code sql
create or REPLACE FUNCTION itog_report_status.get_warn_lvl(day_count NUMERIC)
returns varchar
AS $function$
DECLARE res varchar ;
BEGIN
select status::varchar
from itog_reports_status.status_warn_level
where days = (
select MIN(days)
from itog_reports_status.status_warn_level
where day_count <= days);
RETURN COALESCE(res, 'RED')
END;

PostgreSQL: Slow performance of user-defined function

My function named stat() reads from 2 tables on PostgreSQL 11.
Table T has ~1,000,000 rows, the table D has ~3,000 rows.
My function stat() runs 1.5 secs and it is slow for my use-case:
select * from stat('2019-01-01', '2019-10-01','UTC');
To improve performance I tried to create different indexes (code below), but it did not help.
I was able to improve performance when I put the hardcoded numbers '2019-01-01', '2019-10-01' instead time_start and time_end in the body of stat().
In this case it runs 0.5 sec. But this is not the solution.
CREATE TABLE T(
id SERIAL PRIMARY KEY,
time TIMESTAMP WITH TIME zone NOT NULL,
ext_id INTEGER
);
CREATE TABLE D(
id SERIAL PRIMARY KEY,
time TIMESTAMP WITH TIME zone NOT NULL,
ext_id INTEGER NOT NULL
);
CREATE INDEX t_time_idx ON T(time);
CREATE INDEX d_time_idx ON D(time);
CREATE INDEX t_ext_idx ON T(ext_id);
CREATE INDEX d_ext_idx ON D(ext_id);
CREATE OR REPLACE FUNCTION stat(time_start varchar, time_end varchar, tz varchar)
RETURNS TABLE (result float)
AS $$
DECLARE
time_points INTEGER;
capacity INTEGER;
BEGIN
time_points := 1000;
capacity := 12;
RETURN QUERY
SELECT (total::float / (capacity * time_points))::float as result
FROM (
SELECT count(*)::float AS total FROM T
INNER JOIN (
SELECT * FROM (
SELECT ext _id, name, ROW_NUMBER() OVER(PARTITION BY ext_id ORDER BY time desc) AS rk
FROM D WHERE time at time zone tz < time_end::timestamp
) InB WHERE rk = 1
) D_INFO
ON T.ext_id = D_INFO.ext_id
WHERE T.time at time zone tz between time_start::timestamp and time_end::timestamp
) B;
END;
$$
LANGUAGE plpgsql;
Usage:
select * from stat('2019-01-01', '2019-10-01','UTC'); --> takes 1.5 sec, too slow
What I tried:
ANALYZE T;
ANALYZE D;
I created different indexes for T and D tables
CREATE INDEX covering_t_time_ext_idx ON t(ext_id) INCLUDE (time);
CREATE INDEX t_time_ext_idx ON T(time) INCLUDE (ext_id);
CREATE INDEX t_time_ext_multicolumn_idx ON t(time, ext_id);
CREATE INDEX t_time_ext_multicolumn2_idx ON t(ext_id, time);
but it did not help to improve performance.
function.
CREATE OR REPLACE FUNCTION stat(time_start varchar, time_end varchar, tz varchar)
RETURNS TABLE (result float)
AS $$
DECLARE
time_points INTEGER;
capacity INTEGER;
BEGIN
time_points := 1000;
capacity := 12;
RETURN QUERY
SELECT (total::float / (capacity * time_points))::float as result
FROM (
SELECT count(*)::float AS total
FROM T
WHERE T.time at time zone tz between time_start::timestamp and time_end::timestamp
AND EXISTS (
SELECT 1
FROM D
WHERE D.ext_id = T.ext_id
AND D.time at time zone tz < time_end::timestamp
)
) B;
END;
$$
LANGUAGE plpgsql;
I solve this by casting the input parameters:
(time_start varchar, time_end varchar)
into intermediate variables with type timestamp:
DECLARE
start_time timestamp;
end_time timestamp;
BEGIN
start_time := time_start::timestamp;
end_time := time_end::timestamp;
and using these intermediate variables in the SQL instead doing this casting in SQL.

Finding Highest Value

I have a function which takes two values and returns setof record, as my assignment requires.
Now I have a different question, and I want to take this function but instead of returning a setof record, I want one of these options, whatever is easier:
Return a table instead of a setof.
Create a table before calling the function (table called "temp"), fill it with values.
Here's my function, which takes a month and a year as parameters and returns the payment a lawyer has to be given on the files he's worked on that month and year:
create or replace function calcbilling( cmonth int, cyear int) returns setof record
as $$
declare r record;
begin
for r in(select billing.fid, billing.lname, (lawyer.hbilling*billing.hours) as totpay
from billing natural join lawyer
where date_part('month',billing.bdate)=cmonth and date_part('year',billing.bdate)=cyear)
loop
return next r;
end loop;
end;
$$language plpgsql;
The function returning table is simpler (and much more handy of course) than original one:
create or replace function calc_billing(cmonth int, cyear int)
returns table (fid integer, lname text, totpay numeric)
as $$
begin
return query
select b.fid, b.lname, l.hbilling* b.hours as totpay
from billing b
natural join lawyer l
where
date_part('month', b.bdate) = cmonth
and date_part('year', b.bdate) = cyear;
end;
$$language plpgsql;
-- use:
select * from calc_billing(1, 2016);
The sql (instead of plpgsql) variant is even simpler:
create or replace function calc_billing(cmonth int, cyear int)
returns table (fid integer, lname text, totpay numeric)
as $$
select b.fid, b.lname, l.hbilling* b.hours as totpay
from billing b
natural join lawyer l
where
date_part('month', b.bdate) = cmonth
and date_part('year', b.bdate) = cyear;
$$language sql;
Note, I have change the function name to make it easier to test (without names collision). Change the column types if necessary to fit your model.

procedure to check if dates of new row overlaps with existing dates in the table

I am trying to write a procedure to check if parameters given (dates) lie between any of the existing dates in the table. And if not insert new row.
CREATE OR REPLACE PROCEDURE test(date1 IN DATE, date2 IN DATE) AS
ddate1 DATE;
ddate2 DATE;
quer VARCHAR2(50);
BEGIN
SELECT fdate, tdate INTO ddate1, ddate2 FROM dataHolder;
IF (ddate1 < date1) AND (ddate2 > date2) THEN
quer := 'invalid';
ELSE
INSERT INTO dataHolder VALUES (date1, date2);
quer := 'success';
END IF;
DBMS_OUTPUT.PUT_LINE(quer);
END;
/
I have tried something like this but when executed I get this error:
ORA-01422: exact fetch returns more than requested number of rows
You are getting that error because your select statement returns more than one record. To simplify the process you could use merge statement and rewrite your procedure as follows:
CREATE OR REPLACE PROCEDURE test(date1 IN DATE, date2 IN DATE) AS
BEGIN
merge into Dataholder dh
using dual
on ((date1 < dh.fdate) and (date2 < dh.tdate))
when not matched then
insert (dh.fdate, dh.tdate)
values(date1, date2);
if sql%rowcount > 0
then
dbms_output.put_line('success');
else
dbms_output.put_line('invalid');
end if;
END;
Your select statement fetches more than record whereas your code expects only one, since you're fetching into single-value variables. You could use BULK COLLECT and collect all the dates into a collection of dates, but I think you can improve on it with the code below:
CREATE OR REPLACE PROCEDURE test(date1 IN DATE, date2 IN DATE) AS
ddate1 DATE;
ddate2 DATE;
invalidRecords NUMBER := 0;
quer VARCHAR2(50);
BEGIN
SELECT COUNT(1) INTO invalidRecords FROM dataHolder WHERE fdate < date1 AND tdate > date2;
IF (invalidRecords > 0) THEN
quer := 'invalid';
ELSE
INSERT INTO dataHolder VALUES (date1, date2);
quer := 'success';
END IF;
DBMS_OUTPUT.PUT_LINE(quer);
END;
/
Since COUNT(1) will always return just one record, it will never throw an ORA-01422 error. Also, it will always return data, so you don't need to worry about NO_DATA_FOUND, as the value 0 will be fetched if there are no invalid records.
Some small optmization of Nuno Guerreiro's answer
SELECT COUNT(1) INTO invalidRecords
FROM dual
WHERE exists
(SELECT 1 FROM dataHolder WHERE fdate < date1 AND tdate > date2);
It will allow to keep out of counting.