How to create dynamic partition table in postgres with bigint column?

How to create dynamic partition table in postgres with bigint column? - sql

I have a master table such as
CREATE TABLE public.user_event_firebase
(
user_id character varying(32) COLLATE pg_catalog."default" NOT NULL,
event_name character varying(255) COLLATE pg_catalog."default" NOT NULL,
"timestamp" bigint NOT NULL,
platform character varying(255) COLLATE pg_catalog."default" NOT NULL,
created_at timestamp without time zone DEFAULT now()
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
GOAL
I want to partition this table by year_month table with "timestamp" column such as user_event_firebase_2018_04 , user_event_firebase_2018_05, user_event_firebase_2018_06. The rows will automation redirect to insert into partition table with timestamp condition.
I created function create partition such as:
CREATE OR REPLACE FUNCTION partition_uef_table( bigint, bigint )
returns void AS $$
DECLARE
create_query text;
index_query text;
BEGIN
FOR create_query, index_query IN SELECT
'create table user_event_firebase_'
|| TO_CHAR( d, 'YYYY_MM' )
|| ' ( check( timestamp >= bigint '''
|| TO_CHAR( d, 'YYYY-MM-DD' )
|| ''' and timestamp < bigint '''
|| TO_CHAR( d + INTERVAL '1 month', 'YYYY-MM-DD' )
|| ''' ) ) inherits ( user_event_firebase );',
'create index user_event_firebase_'
|| TO_CHAR( d, 'YYYY_MM' )
|| '_time on user_event_firebase_'
|| TO_CHAR( d, 'YYYY_MM' )
|| ' ( timestamp );'
FROM generate_series( $1, $2, '1 month' ) AS d
LOOP
EXECUTE create_query;
EXECUTE index_query;
END LOOP;
END;
$$
language plpgsql;
CREATE OR REPLACE FUNCTION test_partition_function_uef()
RETURNS TRIGGER AS $$
BEGIN
EXECUTE 'insert into user_event_firebase_'
|| to_char( NEW.timestamp, 'YYYY_MM' )
|| ' values ( $1, $2, $3, $4 )' USING NEW.user_id, NEW.event_name, NEW.timestamp, NEW.platform;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
with trigger
CREATE TRIGGER test_partition_trigger_uef
BEFORE INSERT
ON user_event_firebase
FOR each ROW
EXECUTE PROCEDURE test_partition_function_uef() ;
I trying with example
SELECT partition_uef_table(1518164237,1520583437) ;
PROBLEM :
ERROR: invalid input syntax for integer: "1 month"
LINE 14: FROM generate_series( $1, $2, '1 month' ) AS d
^
QUERY: SELECT
'create table user_event_firebase_'
|| TO_CHAR( d, 'YYYY_MM' )
|| ' ( check( timestamp >= bigint '''
|| TO_CHAR( d, 'YYYY-MM-DD' )
|| ''' and timestamp < bigint '''
|| TO_CHAR( d + INTERVAL '1 month', 'YYYY-MM-DD' )
|| ''' ) ) inherits ( user_event_firebase );',
'create index user_event_firebase_'
QUESTION:
How to create range for generate_series function in ' 1 month ' , set step property such int or bigint suck because of day of month is diffirence ( 2nd - 28 days, 3rd - 30 days ).
Thank you.

answer to your second question would be opinion based (so I skip it), but to the first would be such:
with args(a1,a2) as (values(1518164237,1520583437))
select d,to_char(d,'YYYY_MM') from args, generate_series(to_timestamp(a1),to_timestamp(a2),'1 month'::interval) d;
gives reult:
d | to_char
------------------------+---------
2018-02-09 08:17:17+00 | 2018_02
2018-03-09 08:17:17+00 | 2018_03
(2 rows)
Use
generate_series(start, stop, step interval) timestamp or timestamp with time zone

Related

ERROR: function pg_catalog.extract(unknown, integer) does not exist

I am writing an SQL query for creating the partitions which looks like:
DO
$$
DECLARE
table_name text := 'table_1';
start_date date := (SELECT MIN(create_date)
FROM db.table);
end_date date := (SELECT MAX(create_date)
FROM db.table);
partition_interval interval := '1 day';
partition_column_value text;
BEGIN
FOR partition_column_value IN SELECT start_date +
(generate_series * extract(day from partition_interval)::integer)::date
FROM generate_series(0, extract(day from end_date - start_date::date) /
extract(day from partition_interval))
LOOP
EXECUTE format(
'create table if not exists %1$s_%2$s partition of %1$s for values in (%2$s) partition by list (create_date)',
table_name, partition_column_value::date);
END LOOP;
END
$$;
I get an error:
[42883] ERROR: function pg_catalog.extract(unknown, integer) does not exist
Hint: No function matches the given name and argument types. You might need to add explicit type casts.
Where: PL/pgSQL function inline_code_block line 9 at FOR over SELECT rows

The immediate cause of the error msg is this:
extract(day from end_date - start_date::date)
It's nonsense to cast start_date::date, start_date being type date to begin with. More importantly, date - date yields integer (not interval like you might assume). And extract() does not operate on integer input.
I removed more confusion and noise to arrive at this:
DO
$do$
DECLARE
table_name text := 'table_1';
partition_interval integer := 1; -- given in days!!
start_date date;
end_date date;
partition_column_value text;
BEGIN
SELECT INTO start_date, end_date -- two assignments for the price of one
min(create_date), max(create_date)
FROM db.table;
FOR partition_column_value IN
SELECT start_date + g * partition_interval -- date + int → date
FROM generate_series(0, (end_date - start_date) -- date - date → int
/ partition_interval) g
LOOP
EXECUTE format(
'CREATE TABLE IF NOT EXISTS %1$I PARTITION OF %1$I
FOR VALUES IN (%3$L) PARTITION BY LIST (create_date)'
, table_name || to_char(partition_column_value, '"_"yyyymmdd') -- !
, table_name
, partition_column_value::text -- only covers single day!!
);
END LOOP;
END
$do$;
This should work.
But it only makes sense for the example interval of '1 day'. For longer intervals, concatenate the list of days per partition or switch to range partitioning ...

Query in PostgreSQL with large quantity of squid access requests

Hello people, I'm using a log daemon (https://github.com/paranormal/blooper) in Squid Proxy to put access log into PostreSQL and I make a Trigger Function:
DECLARE
newtime varchar := EXTRACT (MONTH FROM NEW."time")::varchar;
newyear varchar := EXTRACT (YEAR FROM NEW."time")::varchar;
user_name varchar := REPLACE (NEW.user_name, '.', '_');
partname varchar := newtime || '_' || newyear;
tablename varchar := user_name || '.accesses_' || partname;
BEGIN
IF NEW.user_name IS NOT NULL THEN
EXECUTE 'CREATE SCHEMA IF NOT EXISTS ' || user_name;
EXECUTE 'CREATE TABLE IF NOT EXISTS '
|| tablename
|| '('
|| 'CHECK (user_name = ''' || NEW.user_name || ''' AND EXTRACT(MONTH FROM "time") = ' || newtime || ' AND EXTRACT (YEAR FROM "time") = ' || newyear || ')'
|| ') INHERITS (public.accesses)';
EXECUTE 'CREATE INDEX IF NOT EXISTS access_index_' || partname || '_user_name ON ' || tablename || ' (user_name)';
EXECUTE 'CREATE INDEX IF NOT EXISTS access_index_' || partname || '_time ON ' || tablename || ' ("time")';
EXECUTE 'INSERT INTO ' || tablename || ' SELECT $1.*' USING NEW;
END IF;
RETURN NULL;
END;
The main function of it is make a table partition by user_name and by month-year of the access, inhering from a master clean table:
CREATE TABLE public.accesses
(
id integer NOT NULL DEFAULT nextval('accesses_id_seq'::regclass),
"time" timestamp with time zone NOT NULL,
time_response integer,
mac_source macaddr,
ip_source inet NOT NULL,
ip_destination inet,
user_name character varying(40),
http_status_code numeric(3,0) NOT NULL,
http_reply_size bigint NOT NULL,
http_request_method character varying(15) NOT NULL,
http_request_url character varying(4166) NOT NULL,
http_content_type character varying(100),
squid_hier_code character varying(20),
squid_request_status character varying(50),
user_id integer,
CONSTRAINT accesses_http_request_method_fkey FOREIGN KEY (http_request_method)
REFERENCES public.http_requests (method) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT accesses_http_status_code_fkey FOREIGN KEY (http_status_code)
REFERENCES public.http_statuses (code) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT accesses_user_id_fkey FOREIGN KEY (user_id)
REFERENCES public.users (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
The main problem is get the sum of http_reply_size grouping by user_name and time, my query is:
SELECT
"time",
user_name,
sum(http_reply_size)
FROM
accesses
WHERE
extract(epoch from "time") BETWEEN 1516975122 AND 1516996722
GROUP BY
"time",
user_name
But this query is very slow in the server (3'237'976 rows currently in 2 days only). So, PostgreSQL has something to optimize a query with that need, or I need to use another SQL or NoSQL system.

Try to include a CHECK condition on each partition so doesn't have to scan all tables.
In my case is like this:
CREATE TABLE IF NOT EXISTS ' || table_name || '(
CONSTRAINT ' || pk || ' PRIMARY KEY (avl_id),
CHECK ( event_time >= ''' || begin_time || ''' AND event_time < ''' || end_time || ''' )
) INHERITS (avl_db.avl);
Also don't use extract(epoch from "time") that will need to calculate the value for each row and can't use the index you create for "time"
so use like this to get advantage of the index.
WHERE "time" >= '2018-01-01'::timestamp with time zone
and "time" < '2018-02-01'::timestamp with time zone

PostgreSQL 9.5 Insert/Update while partitioning table

I need to achieve updating (via ON CONFLICT()) row in a partitioned tables.
So far, my tries:
Table creation:
CREATE TABLE public.my_tbl
(
goid character varying(255) NOT NULL,
timestamps timestamp without time zone[],
somenumber numeric[],
CONSTRAINT my_tbl_pkey PRIMARY KEY (goid)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.my_tbl
OWNER TO postgres;
Table Sequence:
CREATE SEQUENCE public.fixations_data_pkey_seq
INCREMENT 1
MINVALUE 1
MAXVALUE 9223372036854775807
START 1
CACHE 1;
ALTER TABLE public.fixations_data_pkey_seq
OWNER TO postgres;
Table partition trigger, which creates new table with name "table_YYYY_MM_DD", where "YYYY_MM_DD" - current date (query execution date):
CREATE OR REPLACE FUNCTION public.my_tbl_insert_trigger()
RETURNS trigger AS
$BODY$
DECLARE
table_master varchar(255) := 'my_tbl';
table_part varchar(255) := '';
BEGIN
-- Partition table name --------------------------------------------------
table_part := table_master
|| '_' || DATE_PART( 'year', NOW() )::TEXT
|| '_' || DATE_PART( 'month', NOW() )::TEXT
|| '_' || DATE_PART( 'day', NOW() )::TEXT;
-- Check if partition exists --------------------------------
PERFORM
1
FROM
pg_class
WHERE
relname = table_part
LIMIT
1;
-- If not exist, create new one --------------------------------------------
IF NOT FOUND
THEN
-- Create parition, which inherits master table --------------------------
EXECUTE '
CREATE TABLE ' || table_part || '
(
goid character varying(255) NOT NULL DEFAULT nextval(''' || table_master || '_pkey_seq''::regclass),
CONSTRAINT ' || table_part || '_pkey PRIMARY KEY (goid)
)
INHERITS ( ' || table_master || ' )
WITH ( OIDS=FALSE )';
-- Create indices for current table-------------------------------
EXECUTE '
CREATE INDEX ' || table_part || '_adid_date_index
ON ' || table_part || '
USING btree
(goid)';
END IF;
-- Insert row into table (without ON CONFLICT)--------------------------------------------
EXECUTE '
INSERT INTO ' || table_part || '
SELECT ( (' || QUOTE_LITERAL(NEW) || ')::' || TG_RELNAME || ' ).*';
RETURN NULL;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
ALTER FUNCTION public.my_tbl_insert_trigger()
OWNER TO postgres;
CREATE TRIGGER my_tbl_insert_trigger
BEFORE INSERT
ON my_tbl
FOR EACH ROW
EXECUTE PROCEDURE my_tbl_insert_trigger();
After this I can insert new rows into table:
INSERT INTO my_tbl (goid, timestamps, somenumber)
VALUES ('qwe123SSsssa3', '{"2016-11-16 00:00:00", "2016-11-16 01:00:00"}', '{3, 12333}')
But when I'm trying to do UPSERT:
INSERT INTO my_tbl (goid, timestamps, somenumber)
VALUES ('qwe123SSsssa3', '{"2016-11-16 02:00:00"}', '{999}')
ON CONFLICT (goid)
DO UPDATE
SET timestamps=array_append(my_tbl.timestamps::timestamp[], '2016-11-16 02:00:00'),
somenumber=array_append(my_tbl.somenumber,'999');
I'm geting DUPLICATE PKEY error.
I guess, that I have to add ON CONFLICT to third EXECUTE in trigger function. But how should I do this?

Well , I've changed my third EXECUTE to :
-- Insert row into table (with ON CONFLICT)--------------------------------------------
EXECUTE '
INSERT INTO ' || table_part || '
SELECT ( (' || QUOTE_LITERAL(NEW) || ')::' || TG_RELNAME || ' ).*
ON CONFLICT (goid)
DO UPDATE
SET timestamps=' || table_part || '.timestamps::timestamp[] || ' || QUOTE_LITERAL(NEW.timestamps) || ',
somenumber=' || table_part || '.somenumber::numeric[] || ' || QUOTE_LITERAL(NEW.somenumber) || '
';
RETURN NULL;
Now, when I execute query:
INSERT INTO my_tbl (goid, timestamps, somenumber)
VALUES ('potato_1', ARRAY['2016-11-16 12:00:00', '2016-11-16 15:00:00']::timestamp[], ARRAY[223, 211]::numeric[]);
there are no any errors, and it extends array-type columns as I expected
I can admit that this is a dirty solution, but it seems that it works.
If someone has a better solution, I'll glad to look at it.

PostgreSQL: Function with multiple date parameter

I'm trying to create a function with multiple parameter as below:
CREATE OR REPLACE FUNCTION select_name_and_date (
IN f_name character,
IN m_name character,
IN l_name character,
IN start_date date,
IN end_date date )
RETURNS TABLE (
start_date date ,first_name character, middle_name character,last_name character ) AS $BODY$
BEGIN RETURN QUERY
select a.start_date, a.first_name, a.middle_name, a.last_name
FROM table1 a
where code in ('NEW', 'OLD')
and ( (a.first_name like '%' || f_name || '%' and a.middle_name like '%' || m_name || '%' and a.last_name like '%' || l_name || '%'))
or ((a.date_applied) between start_date and end_date );
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
When I tried to execute with date, it shows correct result.
select * from select_name_and_date ('Firstname','','','2016-06-27','2016-06-28');
When i tried to remove the value of date, it shows:
ERROR: invalid input syntax for type date: ""
select * from select_name_and_date ('Firstname','','','','');
When I tried to replace with NULL value of the date, it shows: 0 rows retrieved. (when it should have)
select * from select_name_and_date ('Firstname','','',NULL,NULL);
I want to have parameter that not depending on each parameter.

The between operator does not handle nulls. If you want to allow them, you'll to treat them explicitly. E.g., you could rewrite the part of the condition that applies to a.date_applied as follows:
((a.date_applied BETWEEN start_date AND end_date) OR
(start_date IS NULL AND a.date_applied < end_date) OR
(end_date IS NULL AND a.date_applied >= end_date) OR
(start_date IS NULL AND end_date IS NULL))

Declaring timestamp in postgres

I have to insert a timestamp value into a table. I am inserting values by writing an stored procedure.
This is the code to my stored procedure.
CREATE OR REPLACE FUNCTION dataInsert_Schedule() RETURNS boolean As
$$
DECLARE
i integer;
j integer;
dur integer;
tup Channel%rowtype;
BEGIN
FOR tup IN SELECT * FROM Channel
LOOP
for i in 0..6 LOOP --days
for j in 0..23 LOOP --hours
dur = round((random() * 2) + 1);
IF i + dur > 24 then
dur = 24 - i;
END IF;
INSERT INTO Schedule VALUES(tup.Channel_ID, round((random() * 999) + 1),( current_date + (integer to_char(i,'9')) )+ (interval to_char(j,'99') || ' hour'), (interval dur ||' hour'));
i = i + dur - 1;
END LOOP;
END LOOP;
END LOOP;
return true;
END
$$ LANGUAGE plpgsql;
When I write the query Select * From dataInsert_Schedule(); I got the following error :
ERROR: syntax error at or near "to_char"
LINE 1: ...d((random() * 999) + 1),( current_date + (integer to_char( $...
^
QUERY: INSERT INTO Schedule VALUES( $1 , round((random() * 999) + 1),( current_date + (integer to_char( $2 ,'9')) )+ (interval to_char( $3 ,'99') || ' hour'), (interval $4 ||' hour'))
CONTEXT: SQL statement in PL/PgSQL function "datainsert_schedule" near line 15
********** Error **********
ERROR: syntax error at or near "to_char"
SQL state: 42601
Context: SQL statement in PL/PgSQL function "datainsert_schedule" near line 15
I First tried this
INSERT INTO Schedule VALUES(tup.Channel_ID, round((random() * 999) + 1),( current_date + (integer ''||i) )+ (interval (j ||' hour')), (interval dur ||' hour'));
way of inserting, but I was getting the same kind of error.
Why I am getting this error?
And the schedule table is defined as following:
CREATE TABLE Schedule(
Channel_ID Integer REFERENCES Channel(Channel_ID),
Program_ID Integer REFERENCES Program(Program_ID),
Start_Time Timestamp NOT NULL,
Duration Interval NOT NULL,
CONSTRAINT Schedule_Key PRIMARY KEY(Channel_ID, Program_ID)
);

It works for me like this:
select (to_char(1,'99') || ' hour')::interval;
You don't need the to_char:
select (1 || ' hour')::interval;
interval
----------
01:00:00
So this would be it:
INSERT INTO Schedule
VALUES (
tup.Channel_ID,
round((random() * 999) + 1),
(current_date + i::integer) + (j || ' hour')::interval,
(dur ||' hour')::interval
)

A type name may be specified before a string constant to cast it into this type, but it applies only to constants. So integer '123' is fine but integer to_char(something) or interval column_name are not permitted, which is why your query fails.
This is explained in the SQL syntax chapter from the manual, specifically this paragraph: Constants of Other Types.
Excerpt:
A constant of an arbitrary type can be entered using any one of the
following notations:
type 'string'
'string'::type
CAST ( 'string' AS type )
and below:
The ::, CAST(), and function-call syntaxes can also be used to specify
run-time type conversions of arbitrary expressions
The point relevant to the question being that type 'string' notation is not included in the syntaxes that can accept arbitrary expressions, contrary to :: and cast().

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to create dynamic partition table in postgres with bigint column? - sql

Related

ERROR: function pg_catalog.extract(unknown, integer) does not exist

Query in PostgreSQL with large quantity of squid access requests

PostgreSQL 9.5 Insert/Update while partitioning table

PostgreSQL: Function with multiple date parameter

Declaring timestamp in postgres

Categories

Resources