I need to split the table into partitions, namely into three partitions according to the EVENT_TIME field, where the first partition is an interval of a week from today, the second last week and the third partition is history, where data that does not pass into the first two partitions are placed.
In addition, I need to add a trigger that will clear the history every two weeks.
CREATE TABLE EVENTS_LOG_TEST_PARTITION
(
ID NUMBER,
METHOD NVARCHAR2(100),
INPUT CLOB,
EVENT_TIME TIMESTAMP(6),
STATUS NVARCHAR2(100),
MESSAGE NVARCHAR2(200)
)
PARTITION BY RANGE (EVENT_TIME)
(
PARTITION CURRENT_WEEK VALUES LESS THAN (TO_DATE(TO_CHAR(CURRENT_TIMESTAMP), 'dd-MM-yyyy HH24:mi:ss'))
)
ENABLE ROW MOVEMENT;
I know that this is not a valid request, so I am writing, please help
It sounds like you want to keep a rolling 2-3 weeks' worth of data. In which case you can use interval partitioning, dropping the oldest partition each week.
Interval partitioning creates a new partition whenever you insert a row with a value greater than the current highest partition boundary.
All you need to define is an initial partition and the time interval. You can choose any value in the past as the boundary for the initial partition.
For example:
create table events_log_test_partition (
id number,
method nvarchar2(100),
input clob,
event_time timestamp(6),
status nvarchar2(100),
message nvarchar2(200)
) partition by range (event_time)
interval ( interval '7' day ) (
partition p_init values less than ( date'2021-01-04' )
);
insert into events_log_test_partition
values ( 1, 'test', 'test', systimestamp - 14, 'test', 'test' );
insert into events_log_test_partition
values ( 2, 'test', 'test', systimestamp, 'test', 'test' );
select partition_name, high_value
from user_tab_partitions
where table_name = 'EVENTS_LOG_TEST_PARTITION';
/*
PARTITION_NAME HIGH_VALUE
P_INIT TIMESTAMP' 2021-01-04 00:00:00'
SYS_P6002 TIMESTAMP' 2021-08-23 00:00:00'
SYS_P6005 TIMESTAMP' 2021-09-06 00:00:00'
*/
select * from events_log_test_partition
partition for ( date'2021-08-18' );
/*
ID METHOD INPUT EVENT_TIME STATUS MESSAGE
1 test test 18-AUG-2021 13.09.17.000000000 test test
*/
select * from events_log_test_partition
partition for ( date'2021-09-01' );
/*
ID METHOD INPUT EVENT_TIME STATUS MESSAGE
2 test test 01-SEP-2021 13.09.17.516073000 test test
*/
alter table events_log_test_partition
drop partition for ( date'2021-08-18' );
select partition_name, high_value
from user_tab_partitions
where table_name = 'EVENTS_LOG_TEST_PARTITION';
/*
PARTITION_NAME HIGH_VALUE
P_INIT TIMESTAMP' 2021-01-04 00:00:00'
SYS_P6005 TIMESTAMP' 2021-09-06 00:00:00'
*/
Related
create table TEST_TABLE_2
(
report_month DATE,
name varchar(128)
)
partition by list (REPORT_MONTH)
(
partition TEST_PART_2022_05_31 values (TO_DATE(' 2022-05-31 00:00:00', 'SYYYY-MM-DD HH24:MI:SS', 'NLS_CALENDAR=GREGORIAN'))
tablespace TEST_TABLESPACE,
partition MONTH_UNKNOWN values (default)
tablespace TEST_TABLESPACE
);
create table TEST_TABLE_1
(
report_month DATE,
name varchar(128)
)
partition by list (REPORT_MONTH)
(
partition TEST_PART_2022_05_31 values (TO_DATE(' 2022-05-31 00:00:00', 'SYYYY-MM-DD HH24:MI:SS', 'NLS_CALENDAR=GREGORIAN'))
tablespace TEST_TABLESPACE,
partition MONTH_UNKNOWN values (default)
tablespace TEST_TABLESPACE
);
Advise me please, How I can to exchange partition TEST_PART_2022_05_31 from TEST_TABLE_2 with partition TEST_PART_2022_05_31 in TEST_TABLE_1?
WHen I exec this script
ALTER TABLE ADS.test_table_1
EXCHANGE PARTITION TEST_PART_2022_05_31
WITH TABLE ADS.test_table_2
I get Error: ORA-14095: ALTER TABLE EXCHANGE requires a non-partitioned, non-clustered table
Are you looking for something like this?
create table t (
c1, c2, c3
) partition by range ( c2 )
interval ( interval '1' month ) (
partition p0 values less than ( date'2022-02-01' )
)
as
select level, date'2022-01-01' + level, 'remove'
from dual
connect by level <= 100;
create table temp
for exchange with table t;
select count(*) from temp;
0
alter table t
exchange partition p0
with table temp;
select count(*) from temp;
100
I have the following table :
CREATE TABLE "ICTPART_DAILY_SUMMARY"
(
"EVENT_START_DATE" DATE
"ACCOUNTING_METHOD" CHAR(1),
......etc
)
PARTITION BY RANGE ("EVENT_START_DATE")
(PARTITION "ICTPART_DAY_SUM_P20220218" VALUES LESS THAN (TO_DATE(' 2022-02-19 00:00:00', 'SYYYY-MM-DD HH24:MI:SS', 'NLS_CALENDAR=GREGORIAN'))
NOCOMPRESS LOGGING
TABLESPACE "TBS_ICTQUO_D" ,
PARTITION "ICTPART_DAY_SUM_P20220219" VALUES LESS THAN (TO_DATE(' 2022-02-20 00:00:00', 'SYYYY-MM-DD HH24:MI:SS', 'NLS_CALENDAR=GREGORIAN'))
NOCOMPRESS LOGGING
TABLESPACE "TBS_ICTQUO_D" ,
PARTITION "ICTPART_DAY_SUM_PMAX" VALUES LESS THAN (MAXVALUE)
NOCOMPRESS LOGGING
TABLESPACE "TBS_ICTQUO_D" ) ;
This table is being partitioned everyday as you can see by an old ksh script.
i want to continue the same work with an sql script who run every first of the month and alter that table and do the same daily partition for 30 days.
for exemple : we are in 01/03/2022, i want to added partition for 01/03/2022 , 02/03/2022, 03/03/2022 ..... etc until 31/03/2022
that sql script will be putted in a ETL who run every first of the month and do that partition
Why not get rid of the ksh script and use INTERVAL PARTITIONs. This way a new PARTITION will be automatically created when a new day is inserted in the table.
CREATE TABLE t1 (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
dt DATE
)
PARTITION BY RANGE (dt)
INTERVAL (NUMTODSINTERVAL(1,'DAY'))
(
PARTITION OLD_DATA values LESS THAN (TO_DATE('2022-01-01','YYYY-MM-DD'))
);
/
INSERT into t1 (dt)
with dt (dt, interv) as (
select date '2022-01-01', numtodsinterval(1,'DAY') from dual
union all
select dt.dt + interv, interv from dt
where dt.dt + interv < date '2022-02-01')
select dt from dt;
/
I'm a newbie to BigQuery and trying to figure out a solution to this scenario:
Transaction data is ingested to BQ table that is partitioned on ingest_date
Business key for a transaction is trade_id but a transaction can also have versions so business key for a record is trade_id + trade_version
There may be duplicates on business key so the technical key is trade_id + trade_version_id + ingest_timestamp
Transaction contains a business timestamp when it was actually executed but this may differ from ingestion timestamp as data may arrive late up to several days or weeks
The data is to be enriched and transformed in several steps and finally end up in a table available for consumption downstream
Consumption table must be partitioned on business date for good performance on filtering and also contain a flag to enable filtering on latest version only
Have googled quite a lot on this but haven't seen any clear solution or pattern to use so I did some elaboration on my own and came up with a solution that it would be great to get some reviews and comments on (not 100% sure it works).
Also read briefly about DBT and Dataform, have these tools automated solutions to this?
Here it is:
The small data model I've worked with: https://i.stack.imgur.com/P1baT.jpg
---------------------------------------------------------------------------------------------
-- Table DDL's
CREATE OR REPLACE TABLE `<dataset>.raw`
(
raw_key STRING,
ingest_date DATE,
ingest_timestamp TIMESTAMP,
trade_id STRING,
trade_version STRING,
business_date DATE,
business_timestamp TIMESTAMP
)
PARTITION BY ingest_date
OPTIONS (
require_partition_filter=true
)
CREATE OR REPLACE TABLE `<dataset>.partition_ingest2business`
(
ingest_date DATE,
business_date DATE,
ingest_timestamp TIMESTAMP
)
CREATE OR REPLACE TABLE `<dataset>.consume`
(
trade_id STRING,
trade_version STRING,
business_date DATE,
latest_version BOOL,
raw_key STRING,
ingest_date DATE,
version_desc INT64
)
PARTITION BY business_date
OPTIONS (
require_partition_filter=true
)
CREATE OR REPLACE TABLE `<dataset>.consume_ctrl`
(
partition_ingest_timestamp TIMESTAMP
)
---------------------------------------------------------------------------------------------
-- Some test data
/*
delete from <dataset>.raw where ingest_date > '1899-01-01';
delete from <dataset>.transform where ingest_date > '1899-01-01';
delete from <dataset>.partition_ingest2business where ingest_date > '1899-01-01';
*/
insert into <dataset>.raw select 'raw_key_001', cast('2000-01-01' as date), cast('2000-01-01 00:00:01' as timestamp), 'trade_id_001', 'trade_version_001_01', cast('2000-01-01' as date), cast('2000-01-01 00:00:01' as timestamp);
insert into <dataset>.raw select 'raw_key_002', cast('2000-01-01' as date), cast('2000-01-01 00:00:02' as timestamp), 'trade_id_002', 'trade_version_002_01', cast('2000-01-01' as date), cast('2000-01-01 00:00:02' as timestamp);
insert into <dataset>.raw select 'raw_key_003', cast('2000-01-03' as date), cast('2000-01-03 00:00:01' as timestamp), 'trade_id_003', 'trade_version_003_01', cast('2000-01-03' as date), cast('2000-01-03 00:00:01' as timestamp);
insert into <dataset>.raw select 'raw_key_004', cast('2000-01-03' as date), cast('2000-01-03 00:00:02' as timestamp), 'trade_id_004', 'trade_version_004_01', cast('2000-01-03' as date), cast('2000-01-03 00:00:02' as timestamp);
insert into <dataset>.raw select 'raw_key_005', cast('2000-01-03' as date), cast('2000-01-03 00:00:03' as timestamp), 'trade_id_005', 'trade_version_005_01', cast('2000-01-03' as date), cast('2000-01-03 00:00:03' as timestamp);
insert into <dataset>.raw select 'raw_key_006', cast('2000-01-03' as date), cast('2000-01-03 00:00:04' as timestamp), 'trade_id_006', 'trade_version_006_01', cast('2000-01-03' as date), cast('2000-01-03 00:00:04' as timestamp);
---------------------------------------------------------------------------------------------
-- Incremental load of partition_ingest2business table
declare max_ingest_timestamp timestamp default
(
select max(ingest_timestamp)
from <dataset>.partition_ingest2business
);
if max_ingest_timestamp is null then
-- Destination table is empty, set partiton filer from source table
set max_ingest_timestamp = cast('1899-01-01' as timestamp);
end if;
select max_ingest_timestamp, cast(max_ingest_timestamp as date);
select
ingest_date
,business_date
,max(ingest_timestamp)
from <dataset>.raw
where
ingest_date >= cast(max_ingest_timestamp as date)
group by
ingest_date
,business_date;
-- Merge into destination table
merge <dataset>.partition_ingest2business trgt
using
(
select
ingest_date
,business_date
,max(ingest_timestamp) as ingest_timestamp
from <dataset>.raw
where
ingest_date >= cast(max_ingest_timestamp as date)
and ingest_timestamp > max_ingest_timestamp
group by
ingest_date
,business_date
) src
on trgt.ingest_date = src.ingest_date and trgt.business_date = src.business_date
when not matched then
insert(ingest_date, business_date, ingest_timestamp) values (src.ingest_date, src.business_date, src.ingest_timestamp)
when matched and trgt.ingest_timestamp != src.ingest_timestamp then
update set ingest_timestamp = src.ingest_timestamp;
---------------------------------------------------------------------------------------------
-- Incremental load of consume table with partition overwrite
declare ingest_date_filter array<date>;
declare business_date_filter array<date>;
declare partition_ingest_timestamp timestamp default
(
select partition_ingest_timestamp
from <dataset>.consume_ctrl
);
declare max_ingest_timestamp timestamp default
(
select max(ingest_timestamp)
from <dataset>.partition_ingest2business
);
if partition_ingest_timestamp is null then
set partition_ingest_timestamp = cast('1899-01-01' as timestamp);
insert into <dataset>.consume_ctrl (partition_ingest_timestamp) values (cast('1899-01-01' as timestamp));
end if;
set business_date_filter =
(
select ARRAY_AGG(distinct business_date)
from <dataset>.partition_ingest2business
where
ingest_timestamp > partition_ingest_timestamp
and ingest_timestamp <= max_ingest_timestamp
);
set ingest_date_filter =
(
select ARRAY_AGG(distinct ingest_date)
from <dataset>.partition_ingest2business
where business_date in UNNEST(business_date_filter)
);
if ARRAY_LENGTH(business_date_filter) > 0 then
merge <dataset>.consume trgt using
(
select
trade_id
,trade_version
,business_date
,row_number() over (partition by trade_id order by trade_version desc, business_timestamp desc, ingest_timestamp desc) as version_desc
,raw_key
,ingest_date
from <dataset>.raw
where
ingest_date in unnest(ingest_date_filter)
and ingest_timestamp <= max_ingest_timestamp
) src
on false
when not matched and business_date in unnest(business_date_filter) then
insert(trade_id, trade_version, business_date, version_desc, raw_key, ingest_date) values(trade_id, trade_version, business_date, version_desc, raw_key, ingest_date)
when not matched by source and business_date in unnest(business_date_filter) then
delete;
-- Update consume_ctrl with max_ingest_timestamp to be used next execution
update <dataset>.consume_ctrl set partition_ingest_timestamp = max_ingest_timestamp where 1 = 1;
end if;
You haven't asked a specific question(s) but some responses...
As you are aware I think, you are using standard partitioning not ingestion-time partitioning which would look like this...
PARTITION BY _PARTITIONDATE
I am a bit wary of row level operations like merge on BigQuery on big fact tables although performance improvements have been released in recent months. Analytical columnar databases generally excel in append rather than merge use cases
Due to currently limitations of BigQuery materialized views (partitioning must be same as underlying table) you do indeed need separate tables if you wish to have both PARTITION BY ingest_date and business_date tables available
For .consume you could add clustering, it does have performance/on-demand cost benefits, sometimes massive query cost reductions depending on cardinality of the chosen columns
PARTITION BY business_date
CLUSTER BY latest_version
I'm more familiar with Dataform (than DBT) which does not automate complex transformations except where Packages are provided and/or you write your own in javascript. Here is a link explaining how to solve a common complex transformation problem SCD's by utilising a SCD package provided by dataform
https://docs.dataform.co/packages/dataform-scd
Dataform is excellent for refactoring/documentation/assertions/testing/deployment SQL tranforms (using SQLX). New Dataform Cloud users are put on a waitlist since May 12 after acquistion by GCP however you can still install Dataform and use Dataform CLI.
This is my insert trigger on Table_A where I store parameters to my system. When I do insert to the table, I want to change end_date of last record in order to keep record versioning.
create or replace trigger parameter_version
before insert
on parameters
for each row
declare
v_is_exist number := 0;
v_rowid rowid;
begin
select count(*) into v_is_exist from parameters where name = :new.name; -- check if parameter exist
select rowid into v_rowid from parameters where name = :new.name and end_date is null; -- record rowid, which sholud be changed
if v_is_exist <> 0 then
set end_date = :new.start_date - 1
end if;
end;
Situation in table before insert is:
| id | name | value | start_date | end_date |
-----------------------------------------------
| 1 |Par_A | 10 | 2016-09-01 | 2016-10-01 |
-----------------------------------------------
| 2 |Par_A | 20 | 2016-10-02 | 2016-10-03 |
-----------------------------------------------
| 3 |Par_A | 30 | 2016-10-05 | <null> |
-----------------------------------------------
Record with id=3 should set end_date on :new_start_date - 1 (close version) and in inserting record I have a next param version with start_date = sysdate.
I've got an ORA-04091 error 'table name is mutating, trigger/function may not see it'.
I know that this case is hard and probably impossible but maybe someone know the solution?
Or maybe exists another solution that case?
You can handle this with an After Statement trigger with the LEAD Analytic Function:
DROP TABLE demo;
CREATE TABLE demo( id NUMBER
, name VARCHAR2( 30 )
, VALUE NUMBER
, start_date DATE
, end_date DATE
);
INSERT INTO demo( id, name, VALUE, start_date, end_date )
VALUES ( 1, 'Par_A', 10, TO_DATE( '2016-09-01', 'YYYY-MM-DD' ), TO_DATE( '2016-10-01', 'YYYY-MM-DD' ) );
INSERT INTO demo( id, name, VALUE, start_date, end_date )
VALUES ( 2, 'Par_A', 20, TO_DATE( '2016-10-02', 'YYYY-MM-DD' ), TO_DATE( '2016-10-04', 'YYYY-MM-DD' ) );
INSERT INTO demo( id, name, VALUE, start_date )
VALUES ( 3, 'Par_A', 30, TO_DATE( '2016-10-05', 'YYYY-MM-DD' ) );
INSERT INTO demo( id, name, VALUE, start_date )
VALUES ( 4, 'Par_A', 40, TO_DATE( '2016-10-07', 'YYYY-MM-DD' ) );
INSERT INTO demo( id, name, VALUE, start_date )
VALUES ( 5, 'Par_A', 50, TO_DATE( '2016-10-11', 'YYYY-MM-DD' ) );
COMMIT;
SELECT id
, name
, start_date
, end_date
, LEAD( start_date ) OVER( PARTITION BY name ORDER BY start_date ) - 1 AS new_date
FROM demo
WHERE end_date IS NULL
ORDER BY id;
CREATE OR REPLACE TRIGGER demo_aius
AFTER INSERT OR UPDATE
ON demo
REFERENCING NEW AS new OLD AS old
DECLARE
CURSOR c_todo
IS
SELECT id, new_date
FROM (SELECT id
, name
, start_date
, end_date
, LEAD( start_date ) OVER( PARTITION BY name ORDER BY start_date ) - 1 AS new_date
FROM demo
WHERE end_date IS NULL)
WHERE new_date IS NOT NULL;
BEGIN
FOR rec IN c_todo
LOOP
UPDATE demo
SET end_date = rec.new_date
WHERE id = rec.id;
END LOOP;
END demo_aius;
/
INSERT INTO demo( id, name, VALUE, start_date )
VALUES ( 6, 'Par_A', 60, TO_DATE( '2016-10-15', 'YYYY-MM-DD' ) );
COMMIT;
SELECT id
, name
, start_date
, end_date
FROM demo
ORDER BY id;
Like the Script shows, such an Update can even handle multiple missing end dates, in case the trigger was accidentally disabled. The "PARTITION BY name" part makes sure that it also functions after complex insert statements.
BtW I agree that Autonomous Transactions in triggers are a last resort. I try to avoid triggers in general by controlling the User Interface and putting all such functionality in packages.
Try something like this:
create or replace trigger parameter_version
before insert
on parameters
for each row
begin
/*Don't care if there's 0 rows updated */
update parameters
set end_date = :new.start_date - 1
where name = :new.name and end_date is null;
:new.end_date := null;
end;
People work from 10:00AM to 21:00PM except Sundays and public holidays.
Jobs for them are reserved at 15 minute intervals. Job duration is from 15 minutes to 4 hours. Whole job must fit to single day.
How to find first nearest free start times which are not reserved for given duration in Postgres 9.3 starting from current date and time ?
For example, Mary has already reservation at 12:30 .. 16:00 and
John has already reservation at 12:00 to 13:00
Reservat table contains reservations, yksus2 table contains workes and
pyha table contains public holidays. Table structures are below. Reservat structure can changed if this helps.
Query for ealiest start times for duration of 1.5 hours should return
John 2014-10-28 10:00
Mary 2014-10-28 10:00
John 2014-10-28 10:15
Mary 2014-10-28 10:15
John 2014-10-28 10:30
Mary 2014-10-28 10:30
Mary 2014-10-28 11:00
John 2014-10-28 13:00
Mary 2014-10-28 16:00
Mary 2014-10-28 16:15
Mary 2014-10-28 16:30
... etc and also starting from next days
I tried query based on answer in How to return only work time from reservations in PostgreSql? below but it returns wrong result:
MARY 2014-10-28 13:00:00
MARY 2014-10-29 22:34:40.850255
JOHN 2014-10-30 22:34:40.850255
MARY 2014-10-31 22:34:40.850255
MARY 2014-11-03 22:34:40.850255
Also sliding start times 10:00, 10:30 etc are not returned.
How to get proper first reservations?
Query which returns wrong result is:
insert into reservat (objekt2, during) values
('MARY', '[2014-10-28 11:30:00,2014-10-28 13:00:00)'),
('JOHN', '[2014-10-28 10:00:00,2014-10-28 11:30:00)');
with gaps as (
select
yksus,
upper(during) as start,
lead(lower(during),1,upper(during)) over (ORDER BY during) - upper(during) as gap
from (
select
yksus2.yksus,
during
from reservat join yksus2 on reservat.objekt2=yksus2.yksus
where upper(during)>= current_date
union all
select
yksus2.yksus,
unnest(case
when pyha is not null then array[tsrange1(d, d + interval '1 day')]
when date_part('dow', d) in (0, 6) then array[tsrange1(d, d + interval '1 day')]
when d::date = current_Date then array[
tsrange1(d, current_timestamp ),
tsrange1(d + interval '20 hours', d + interval '1 day')]
else array[tsrange1(d, d + interval '8 hours'),
tsrange1(d + interval '20 hours', d + interval '1 day')]
end)
from yksus2, generate_series(
current_timestamp,
current_timestamp + interval '1 month',
interval '1 day'
) as s(d)
left join pyha on pyha = d::date
) as x
)
select yksus, start
from gaps
where gap >= interval'1hour 30 minutes'
order by start
limit 30
Schema:
CREATE EXTENSION btree_gist;
CREATE TABLE Reservat (
id serial primary key,
objekt2 char(10) not null references yksus2 on update cascade deferrable,
during tsrange not null check(
lower(during)::date = upper(during)::date
and lower(during) between current_date and current_date+ interval'1 month'
and (lower(during)::time >= '10:00'::time and upper(during)::time < '21:00'::time)
AND EXTRACT(MINUTE FROM lower(during)) IN (0, 15, 30,45)
AND EXTRACT(MINUTE FROM upper(during)) IN (0, 15, 30, 45)
and (date_part('dow', lower(during)) in (1,2,3,4,5,6)
and date_part('dow', upper(during)) in (1,2,3,4,5,6))
),
EXCLUDE USING gist (objekt2 WITH =, during WITH &&)
);
create or replace function holiday_check() returns trigger language plpgsql stable as $$
begin
if exists (select * from pyha where pyha in (lower(NEW.during)::date, upper(NEW.during)::date)) then
raise exception 'public holiday %', lower(NEW.during) ;
else
return NEW;
end if;
end;
$$;
create trigger holiday_check_i before insert or update on Reservat for each row execute procedure holiday_check();
CREATE OR REPLACE FUNCTION public.tsrange1(start timestamp with time zone,
finish timestamp with time zone ) RETURNS tsrange AS
$BODY$
SELECT tsrange(start::timestamp without time zone, finish::timestamp without time zone );
$BODY$ language sql immutable;
-- Workers
create table yksus2( yksus char(10) primary key);
insert into yksus2 values ('JOHN'), ('MARY');
-- public holidays
create table pyha( pyha date primary key);
Also posted to the pgsql-general mailing list.
Adapted schema
CREATE EXTENSION btree_gist;
CREATE TYPE timerange AS RANGE (subtype = time); -- create type once
-- Workers
CREATE TABLE worker(
worker_id serial PRIMARY KEY
, worker text NOT NULL
);
INSERT INTO worker(worker) VALUES ('JOHN'), ('MARY');
-- Holidays
CREATE TABLE pyha(pyha date PRIMARY KEY);
-- Reservations
CREATE TABLE reservat (
reservat_id serial PRIMARY KEY
, worker_id int NOT NULL REFERENCES worker ON UPDATE CASCADE
, day date NOT NULL CHECK (EXTRACT('isodow' FROM day) < 7)
, work_from time NOT NULL -- including lower bound
, work_to time NOT NULL -- excluding upper bound
, CHECK (work_from >= '10:00' AND work_to <= '21:00'
AND work_to - work_from BETWEEN interval '15 min' AND interval '4 h'
AND EXTRACT('minute' FROM work_from) IN (0, 15, 30, 45)
AND EXTRACT('minute' FROM work_from) IN (0, 15, 30, 45)
)
, EXCLUDE USING gist (worker_id WITH =, day WITH =
, timerange(work_from, work_to) WITH &&)
);
INSERT INTO reservat (worker_id, day, work_from, work_to) VALUES
(1, '2014-10-28', '10:00', '11:30') -- JOHN
, (2, '2014-10-28', '11:30', '13:00'); -- MARY
-- Trigger for volatile checks
CREATE OR REPLACE FUNCTION holiday_check()
RETURNS trigger AS
$func$
BEGIN
IF EXISTS (SELECT 1 FROM pyha WHERE pyha = NEW.day) THEN
RAISE EXCEPTION 'public holiday: %', NEW.day;
ELSIF NEW.day < now()::date OR NEW.day > now()::date + 31 THEN
RAISE EXCEPTION 'day out of range: %', NEW.day;
END IF;
RETURN NEW;
END
$func$ LANGUAGE plpgsql STABLE; -- can be "STABLE"
CREATE TRIGGER insupbef_holiday_check
BEFORE INSERT OR UPDATE ON reservat
FOR EACH ROW EXECUTE PROCEDURE holiday_check();
Major points
Don't use char(n). Rather varchar(n), or better yet, varchar or just text.
Any downsides of using data type "text" for storing strings?
Don't use the name of a worker as primary key. It's not necessarily unique and can change. Use a surrogate primary key instead, best a serial. Also makes entries in reservat smaller, indexes smaller, queries faster, ...
Update: For cheaper storage (8 bytes instead of 22) and simpler handling I save start and end as time now and construct a range on the fly for the exclusion constraint:
EXCLUDE USING gist (worker_id WITH =, day WITH =
, timerange(work_from, work_to) WITH &&)
Since your ranges can never cross the date border by definition, it would be more efficient to have a separate date column (day in my implementation) and a time range. The type timerange is not shipped in default installations, but easily created. This way you can largely simplify your check constraints.
Use EXTRACT('isodow', ...) to simplify excluding sundays
The day of the week as Monday(1) to Sunday(7)
I assume you want to allow the upper border of '21:00'.
Borders are assumed to be including for the lower and excluding for the upper bound.
The check whether new / updated days lie within a month from "now" is not IMMUTABLE. Moved it from the CHECK constraint to the trigger - else you might run into problems with dump / restore! Details:
Disable all constraints and table checks while restoring a dump
Aside
Besides simplifying input and check constraints I expected timerange to save 8 bytes of storage as compared to tsrange since time only occupies 4 bytes. But it turns out timerange occupies 22 bytes on disk (25 in RAM), just like tsrange (or tstzrange). So you might go with tsrange as well. The principle of query and exclusion constraint are the same.
Query
Wrapped into an SQL function for convenient parameter handling:
CREATE OR REPLACE FUNCTION f_next_free(_start timestamp, _duration interval)
RETURNS TABLE (worker_id int, worker text, day date
, start_time time, end_time time) AS
$func$
SELECT w.worker_id, w.worker
, d.d AS day
, t.t AS start_time
,(t.t + _duration) AS end_time
FROM (
SELECT _start::date + i AS d
FROM generate_series(0, 31) i
LEFT JOIN pyha p ON p.pyha = _start::date + i
WHERE p.pyha IS NULL -- eliminate holidays
) d
CROSS JOIN (
SELECT t::time
FROM generate_series (timestamp '2000-1-1 10:00'
, timestamp '2000-1-1 21:00' - _duration
, interval '15 min') t
) t -- times
CROSS JOIN worker w
WHERE d.d + t.t > _start -- rule out past timestamps
AND NOT EXISTS (
SELECT 1
FROM reservat r
WHERE r.worker_id = w.worker_id
AND r.day = d.d
AND timerange(r.work_from, r.work_to) && timerange(t.t, t.t + _duration)
)
ORDER BY d.d, t.t, w.worker, w.worker_id
LIMIT 30 -- could also be parameterized
$func$ LANGUAGE sql STABLE;
Call:
SELECT * FROM f_next_free('2014-10-28 12:00'::timestamp, '1.5 h'::interval);
SQL Fiddle on Postgres 9.3 now.
Explain
The function takes a _start timestamp as minimum starting time and _duration interval. Be careful to only rule out earlier times on the starting day, not the following days. Simplest by just adding day and time: t + d > _start.
To book a reservation starting "now", just pass now()::timestamp:
SELECT * FROM f_next_free(`now()::timestamp`, '1.5 h'::interval);
Subquery d generates days starting from the input value _day. Holidays excluded.
Days are cross-joined with possible time ranges generated in subquery t.
That is cross-joined to all available workers w.
Finally eliminate all candidates that collide with existing reservations using an NOT EXISTS anti-semi-join, and in particular the overlaps operator && .
Related:
How do you do date math that ignores the year? (for date math example)
Preventing adjacent/overlapping entries with EXCLUDE in PostgreSQL
Calculate working hours between 2 dates in PostgreSQL
Thom Brown in psql-general mailing list recommends the following solution.
It is more readable but Erwin answer looks more optimized.
I have 10 workes and 1 month reservation with 15 minute offsess from 8 to 20:00, so perfomance is hopafully not and issue.
Which to use ?
Which solution is better ?
create table pyha (pyha date primary key);
insert into pyha(pyha) values('2014-10-29');
create table yksus2(yksus char(10) primary key);
insert into yksus2 values ('JOHN'),('MARY');
CREATE EXTENSION btree_gist;
CREATE TABLE reservat
(
reservat_id serial primary key,
objekt2 char(10) not null references yksus2 on update cascade deferrable,
during tstzrange not null,
EXCLUDE USING gist (objekt2 WITH =, during WITH &&),
CONSTRAINT same_date
CHECK (lower(during)::date = upper(during)::date),
CONSTRAINT max_1month_future
CHECK (lower(during) between current_date and current_date+ interval'1 month' ),
CONSTRAINT time_between_1000_and_2100
CHECK (lower(during)::time >= '10:00'::time and upper(during)::time < '21:00'::time),
CONSTRAINT lower_bound_included
CHECK (lower_inc(during)),
CONSTRAINT upper_bound_excluded
CHECK (not upper_inc(during)),
CONSTRAINT start_time_at_15minute_offset
CHECK (EXTRACT(MINUTE FROM lower(during)) IN (0, 15, 30,45)),
-- or (extract(epoch from lower(during)::time)::int % (60*15) = 0)
CONSTRAINT end_time_at_15minute_offset
CHECK (EXTRACT(MINUTE FROM upper(during)) IN (0, 15, 30,45)),
CONSTRAINT duration_between_15min_and_4hours
CHECK (upper(during) - lower(during) between '15 mins'::interval and '4 hours'::interval),
CONSTRAINT exclude_sundays
CHECK (date_part('dow', lower(during)) in (1,2,3,4,5,6) )
);
create or replace function holiday_check() returns trigger language plpgsql stable as $$
begin
if exists (select * from pyha where pyha between lower(NEW.during)::date and upper(NEW.during)::date) then
raise exception 'public holiday %', lower(NEW.during) ;
else
return NEW;
end if;
end;
$$;
create trigger holiday_check_i before insert or update on Reservat for each row execute procedure holiday_check();
INSERT INTO reservat (objekt2, during)
VALUES ('MARY','[2014-10-29 11:30+2,2014-10-29 13:00+2)'::tstzrange);
INSERT INTO reservat (objekt2, during)
VALUES ('JOHN','[2014-10-29 10:00+2,2014-10-29 11:30+2)'::tstzrange);
SELECT yksus2.yksus, times.period
FROM generate_series(now()::date::timestamptz, now()::date::timestamptz + '3 months'::interval, '15 mins'::interval) times(period)
CROSS JOIN yksus2
LEFT JOIN reservat ON tstzrange(times.period,times.period + '1 hour 30 mins'::interval, '[)') && reservat.during
AND yksus2.yksus = reservat.objekt2
LEFT JOIN pyha ON times.period::date = pyha.pyha::date
WHERE reservat.during IS NULL
AND pyha.pyha IS NULL
AND times.period::timetz BETWEEN '10:00'::timetz AND '21:00'::timetz - '1 hour 30 mins'::interval
AND times.period >= now()
AND EXTRACT(isoDOW FROM times.period) != 7 -- exclude sundays
ORDER BY 2, 1
LIMIT 300;