Update date range in Postgres table - sql

I have table with dates:
select id,date date_ranges where range_id = 1;
1 2016-04-12
2 2016-04-13
3 2016-04-14
also i have an array:
example:
array('2016-04-11','2016-04-12','2016-04-13','2016-04-14','2016-04-15')
or
array('2016-04-13','2016-04-14','2016-04-15')
How can i insert new values from array to my table without changing existing table values?
And if i have second array, how can i delete value 2016-04-12 from table?
Help plz, I need one query)

WITH current_values AS (
SELECT generate_series('2016-04-13'::DATE, '2016-04-17'::DATE, '1 day')::DATE AS date
),
deleted_values AS (
DELETE FROM date_ranges WHERE date NOT IN (SELECT * FROM current_values) RETURNING id
)
INSERT INTO date_ranges ("date", range_id)
WITH new_values AS (
SELECT new."date"
FROM current_values AS new
LEFT JOIN date_ranges AS old
ON old."date" = new."date"
WHERE old.id IS NULL
)
SELECT date, 1 FROM new_values;

Related

SQL - Select date ranges without overlapping

I have the following table (Oracle database):
ID
valid_from
valid_to
1
01.01.22
28.02.22
1
01.03.22
30.06.22
1
01.07.22
31.12.22
1
01.01.23
null
2
01.01.22
31.03.22
2
01.04.22
null
How do I best extract now all date ranges without overlaps over both IDs? The final result set should look like:
valid_from
valid_to
01.01.22
28.02.22
01.03.22
31.03.22
01.04.22
30.06.22
01.07.23
31.12.22
01.01.23
null
Null stands for max_date (PL / SQL Oracle Max Date).
Moreover, I should only select the values valid for the current year (let's assume we are already in 2022).
Thanks for your help in advance!
You can apply next select statement:
with
-- main table
t1 AS (SELECT w, q1, q2, to_date(q1,'dd.mm.yy') q1d, to_date(q2,'dd.mm.yy') q2d FROM www)
-- custom year in YYYY format
, t0 AS (SELECT '2022' y FROM dual)
-- join and order dates FROM - TO
, t2 AS (SELECT t1.q1, t1.q1d, s2.q2, s2.q2d
FROM t1
LEFT JOIN t1 s2 on t1.q1d <= s2.q2d
ORDER BY t1.q1d, s2.q2d)
-- mark the first each new row-pair by row_number()
, t3 AS (SELECT t2.*,
row_number() OVER (PARTITION BY t2.q1d ORDER BY t2.q1d ) r
FROM t2 )
-- join custom year value and select desired rows based on that value
SELECT q1, q2 FROM t3
JOIN t0 on 1=1
WHERE r = 1
-- for the custom year
AND t0.y <= to_char(q1d, 'yyyy')
ORDER BY q1d;
Demo
In my table-example dates are presented in varchar2 datatype and in dd.mm.yy date format. In case if your table fields have datatype date, then you don't need to implement function to_date() for those 2 fields.
Used table sample:
create table www (w integer, q1 varchar2(30), q2 varchar2(30));
insert into www values (1, '01.01.22', '28.02.22');
insert into www values (1, '01.03.22', '30.06.22');
insert into www values (1, '01.07.22', '31.12.22');
insert into www values (1, '01.01.23', '');
insert into www values (2, '01.01.22', '31.03.22');
insert into www values (2, '01.04.22', '');
If your table sample has more rows which are have null value in the field valid_to and the dates in valid_from are not in any range, let's say:
insert into www values (1, '01.01.24', '');
then previous solution will produce more rows in the end with null value.
In this case you can use that more complex solution:
...
-- join custom year value and select desired rows based on that value
, t4 as (SELECT q1, q2, q1d FROM t3
JOIN t0 on 1=1
WHERE r = 1 AND
-- for the custom year
t0.y <= to_char(q1d, 'yyyy')
ORDER BY q1d)
-- filter non-nullable rows
, t5 as ( SELECT q1, q2 FROM t4 WHERE Q2 IS NOT NULL )
-- max date from rows where Q2 field has null value
, t6 as ( SELECT to_char(MAX(Q1D),'dd.mm.yy') q1, q2
FROM t4
WHERE Q2 IS NULL
GROUP BY q2)
-- append rows with max date
SELECT * FROM t5
UNION ALL
SELECT * FROM t6;
Demo

Select min date record from duplicates in table

Let say that I have this table "contract" which have duplicated records in the "END" column for the same ID.
ID
Begin
End
20
2016-01-01
9999-12-31
20
2020-01-01
9999-12-31
30
2018-01-01
2019-02-28
30
2019-03-01
9999-12-31
30
2020-02-01
9999-12-31
10
2019-01-01
2019-06-30
10
2019-07-01
2020-02-29
10
2020-03-01
9999-12-31
I want to get the oldest date in the "Begin" column for all the ID's that have duplicated records in the "END" column with the date "9999-12-31". So for this example I expect to get:
ID
Begin
20
2016-01-01
30
2019-03-01
I made an SQL script, but there should be a better way.
select ID, MIN(Begin) from
(
select * from contract m where exists
(
select 1 from contract v where END = '9999-12-31' and v.ID = m.ID
having count(ID)=2
)
and END = '9999-12-31'
)a
group by FUN_ID
If it is a big table, you really want to use EXISTS for finding duplicates because it will short circuit. Here's two ways to use EXISTS that might help with what you are trying to do.
DROP TABLE IF EXISTS #Test;
CREATE TABLE #Test
(
ID INT NOT NULL
,[Begin] DATE NOT NULL
,[End] DATE NOT NULL
)
;
INSERT INTO #Test
VALUES
(20,'2016-01-01','9999-12-31')
,(20,'2020-01-01','9999-12-31')
,(30,'2018-01-01','2019-02-28')
,(30,'2019-03-01','9999-12-31')
,(30,'2020-02-01','9999-12-31')
,(10,'2019-01-01','2019-06-30')
,(10,'2019-07-01','2020-02-29')
,(10,'2020-03-01','9999-12-31')
;
--See all duplicates with OldestBegin for context
SELECT
TST.ID
,TST.[Begin]
,TST.[End]
,OldestBegin = MIN([Begin]) OVER (PARTITION BY TST.ID,TST.[End])
FROM #Test AS TST
WHERE EXISTS
(
SELECT 1
FROM #Test AS _TST
WHERE TST.ID = _TST.ID
AND TST.[End] = _TST.[End]
AND TST.[Begin] <> _TST.[Begin]
)
;
--Get only oldest duplicate
SELECT
TST.ID
,TST.[End]
,[Begin] = MIN([Begin])
FROM #Test AS TST
WHERE EXISTS
(
SELECT 1
FROM #Test AS _TST
WHERE TST.ID = _TST.ID
AND TST.[End] = _TST.[End]
AND TST.[Begin] <> _TST.[Begin]
)
GROUP BY
TST.ID
,TST.[End]
;
Perhaps this will help:
DECLARE #Tab TABLE(ID INT,[Begin] DATE,[End] DATE)
INSERT #Tab
VALUES
(20,'2016-01-01','9999-12-31')
,(20,'2020-01-01','9999-12-31')
,(30,'2018-01-01','2019-02-28')
,(30,'2019-03-01','9999-12-31')
,(30,'2020-02-01','9999-12-31')
,(10,'2019-01-01','2019-06-30')
,(10,'2019-07-01','2020-02-29')
,(10,'2020-03-01','9999-12-31')
;WITH cte AS(
SELECT *
FROM #Tab
WHERE [End] = '9999-12-31'
)
SELECT ID, MIN([Begin]) AS [Begin]
FROM cte
GROUP BY ID
HAVING COUNT(*) > 1
Try this:
WITH test as (SELECT
count(*), min(begin) as Begin, ID from contract
where end = '9999-12-31' group by ID having count(*) > 1) select ID, Begin from test

Query populating dates

query that generates records to hold a future calculated value.
Hi I trying to write a query with the tables below to populate a collection. I want the t2 values when the dates match but when there is not a match I want the dates to populate with a null values (will be populate later with a calculated value) The number of records for the same date should match the last time the dates matched. So in the example for each day after 7/1 there should be 3 records for each day and after 7/5 just 2. I am trying to do this in one query but I am not sure it is possible. Any help on creating this and getting into a collection would be much appreciated.
create table t1 as
WITH DATA AS
(SELECT to_date('07/01/2019', 'MM/DD/YYYY') date1,
to_date('07/10/2019', 'MM/DD/YYYY') date2
FROM dual
)
SELECT date1+LEVEL-1 the_date,
TO_CHAR(date1+LEVEL-1, 'DY','NLS_DATE_LANGUAGE=AMERICAN') day
FROM DATA
WHERE TO_CHAR(date1+LEVEL-1, 'DY','NLS_DATE_LANGUAGE=AMERICAN')
NOT IN ('SAT', 'SUN')
CONNECT BY LEVEL <= date2-date1+1
create table t2
(cdate date,
camount number);
insert into t2 values
('01-JUL-2019', 10);
insert into t2 values
('01-JUL-2019', 20);
insert into t2 values
('01-JUL-2019', 30);
insert into t2 values
('05-JUL-19', 50);
insert into t2 values
('05-JUL-19', 20);
expected results:
01-JUL-19 10
01-JUL-19 20
01-JUL-19 30
02-JUL-19 null
02-JUL-19 null
02-JUL-19 null
03-JUL-19 null
03-JUL-19 null
03-JUL-19 null
04-JUL-19 null
04-JUL-19 null
04-JUL-19 null
05-JUL-19 50
05-JUL-19 20
08-JUL-19 null
08-JUL-19 null
09-JUL-19 null
09-JUL-19 null
10-JUL-19 null
10-JUL-19 null
One approach to this kind of problem is to build the result set incrementally in a few steps:
Count matches that each THE_DATE in T1 has in T2.
Apply the rule you outlined in the question to those THE_DATE which have zero matches (carry forward (across dates in ascending order) the number of matches of the last THE_DATE that did have matches.
Generate the extra rows in T1 for the THE_DATE that have zero matches. (e.g. If it is supposed to have three null records, duplicate up to this number)
Outer join to T2 to get the CAMOUNT where it is available.
Here's an example (The three named subfactors corresponding to steps 1,2,3 above):
WITH DATE_MATCH_COUNT AS (
SELECT T1.THE_DATE,
COUNT(T2.CDATE) AS MATCH_COUNT,
ROW_NUMBER() OVER (PARTITION BY NULL ORDER BY T1.THE_DATE ASC) AS ROWKEY
FROM T1
LEFT OUTER JOIN T2
ON T1.THE_DATE = T2.CDATE
GROUP BY T1.THE_DATE),
ADJUSTED_MATCH_COUNT AS (
SELECT THE_DATE,
MATCH_COUNT AS ACTUAL_MATCH_COUNT,
GREATEST(MATCH_COUNT,
(SELECT MAX(MATCH_COUNT) KEEP ( DENSE_RANK LAST ORDER BY ROWKEY ASC )
FROM DATE_MATCH_COUNT SCALAR_MATCH_COUNT
WHERE SCALAR_MATCH_COUNT.ROWKEY <= DATE_MATCH_COUNT.ROWKEY AND
SCALAR_MATCH_COUNT.MATCH_COUNT > 0)) AS FORCED_MATCH_COUNT
FROM DATE_MATCH_COUNT),
GENERATED_MATCH_ROW AS (
SELECT THE_DATE, FORCED_MATCH_COUNT, MATCH_KEY
FROM ADJUSTED_MATCH_COUNT CROSS APPLY (SELECT LEVEL AS MATCH_KEY
FROM DUAL CONNECT BY LEVEL <= DECODE(ACTUAL_MATCH_COUNT,0,FORCED_MATCH_COUNT,1)))
SELECT THE_DATE, CAMOUNT
FROM GENERATED_MATCH_ROW
LEFT OUTER JOIN T2
ON GENERATED_MATCH_ROW.THE_DATE = T2.CDATE
ORDER BY THE_DATE, CAMOUNT ASC;
Result:
THE_DATE CAMOUNT
____________ __________
01-JUL-19 10
01-JUL-19 20
01-JUL-19 30
02-JUL-19
02-JUL-19
02-JUL-19
03-JUL-19
03-JUL-19
03-JUL-19
04-JUL-19
04-JUL-19
04-JUL-19
05-JUL-19 20
05-JUL-19 50
08-JUL-19
08-JUL-19
09-JUL-19
09-JUL-19
10-JUL-19
10-JUL-19

Determining consecutive and independent PTO days

Based on feedback, I am restructuring my question.
I am working with SQL on a Presto database.
My objective is to report on employees that take consecutive days of PTO or Sick Time since the beginning of 2018. My desired output would have the individual islands of time taken by employee with the start and end dates, along the lines of:
The main table I am using is d_employee_time_off
There are only two time_off_type_name: PTO and Sick Leave.
The ds is a datestamp and I use the latest ds (usually the current date)
I have access to a date table named d_date
I can join the tables on d_employee_time_off.time_off_date = d_date.full_date
I hope that I have structured this question in a fashion that is understandable.
I believe the need here is to join the day off material to a calendar table.
In the example solution below I am generating this "on the fly" but I think you do have your own solution for this. Also in my example I have used the string 'Monday' and moved backward from that (or, you could use 'Friday' and move forward). I'm, not keen on language dependent solutions but as I'm not a Presto user wasn't able to test anything on Presto. So the example below uses some of your own logic, but using SQL Server syntax which I trust you can translate to Presto:
Query:
;WITH
Digits AS (
SELECT 0 AS digit UNION ALL
SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL
SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL
SELECT 9
)
, cal AS (
SELECT
ca.number
, dateadd(day,ca.number,'20180101') as cal_date
, datename(weekday,dateadd(day,ca.number,'20180101')) weekday
FROM Digits [1s]
CROSS JOIN Digits [10s]
CROSS JOIN Digits [100s] /* add more like this as needed */
cross apply (
SELECT
[1s].digit
+ [10s].digit * 10
+ [100s].digit * 100 /* add more like this as needed */
AS number
) ca
)
, time_off AS (
select
*
from cal
inner join mytable t on (cal.cal_date = t.time_off_date and cal.weekday <> 'Monday')
or (cal.cal_date between dateadd(day,-2,t.time_off_date)
and t.time_off_date and datename(weekday,t.time_off_date) = 'Monday')
)
, starting_points AS (
SELECT
employee_id,
cal_date,
dense_rank() OVER(partition by employee_id
ORDER BY
time_off_date
) AS rownum
FROM
time_off A
WHERE
NOT EXISTS (
SELECT
*
FROM
time_off B
WHERE
B.employee_id = A.employee_id
AND B.cal_date = DATEADD(day, -1, A.cal_date)
)
)
, ending_points AS (
SELECT
employee_id,
cal_date,
dense_rank() OVER(partition by employee_id
ORDER BY
time_off_date
) AS rownum
FROM
time_off A
WHERE
NOT EXISTS (
SELECT
*
FROM
time_off B
WHERE
B.employee_id = A.employee_id
AND B.cal_date = DATEADD(day, 1, A.cal_date)
)
)
SELECT
S.employee_id,
S.cal_date AS start_range,
E.cal_date AS end_range
FROM
starting_points S
JOIN
ending_points E
ON E.employee_id = S.employee_id
AND E.rownum = S.rownum
order by employee_id
, start_range
Result:
employee_id start_range end_range
1 200035 02.01.2018 02.01.2018
2 200035 20.04.2018 27.04.2018
3 200037 27.01.2018 29.01.2018
4 200037 31.03.2018 02.04.2018
see: http://rextester.com/MISZ50793
CREATE TABLE mytable(
ID INT NOT NULL
,employee_id INTEGER NOT NULL
,type VARCHAR(3) NOT NULL
,time_off_date DATE NOT NULL
,time_off_in_days INT NOT NULL
);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (1,200035,'PTO','2018-01-02',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (2,200035,'PTO','2018-04-20',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (3,200035,'PTO','2018-04-23',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (4,200035,'PTO','2018-04-24',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (5,200035,'PTO','2018-04-25',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (6,200035,'PTO','2018-04-26',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (7,200035,'PTO','2018-04-27',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (8,200037,'PTO','2018-01-29',1);
INSERT INTO mytable(id,employee_id,type,time_off_date,time_off_in_days) VALUES (9,200037,'PTO','2018-04-02',1);

How to combine tables based on timestamps

Imagine you have two tables of events. Both tables A and B have a single column, called timestamp, with multiple rows.
Now I'd like to combine these two tables into a table C with the following properties:
C has a row for every row in A
C has a timestamp column that perfectly reflects the contents of A
C has another column called near_event that is true if there is a row in B within 1s of the timestamp of this row, false otherwise
How might I do that efficiently?
mauro pointed me to this one, saying that Vertica could do better than that - and, indeed, it can, as it has a predicate that enables what we call the event series join. All you need to do is to run a non-inner join (left, right or full outer) and use INTERPOLATE PREVIOUS VALUE intelligently as the join predicate.
You might want to have a look on my LinkedIn post :
https://www.linkedin.com/pulse/verticas-event-series-join-joining-two-time-tables-marco-gessner/
.. which illustrates even a more complex use case.
Using the same tables as in that blog:
CREATE LOCAL TEMPORARY TABLE oilpressure (
op_ts,op_psi
) ON COMMIT PRESERVE ROWS AS (
SELECT TIMESTAMP '2015-04-01 07:00:00', 25.356
UNION ALL SELECT TIMESTAMP '2015-04-01 07:00:10', 35.124
UNION ALL SELECT TIMESTAMP '2015-04-01 07:00:20', 47.056
UNION ALL SELECT TIMESTAMP '2015-04-01 07:00:30', 45.225
)
;
CREATE LOCAL TEMPORARY TABLE revspeed (
rs_ts,rpm
) ON COMMIT PRESERVE ROWS AS (
SELECT TIMESTAMP '2015-04-01 07:00:00', 2201
UNION ALL SELECT TIMESTAMP '2015-04-01 07:00:08', 3508
UNION ALL SELECT TIMESTAMP '2015-04-01 07:00:15', 6504
UNION ALL SELECT TIMESTAMP '2015-04-01 07:00:20', 6608
)
;
Let oilpressurebe your table A, and revspeed be your table B.
Then what you would want (if you only want the timestamps) is this:
SELECT
op_ts
, rs_ts
FROM oilpressure
LEFT JOIN revspeed
ON op_ts INTERPOLATE PREVIOUS VALUE rs_ts;
op_ts |rs_ts
2015-04-01 07:00:00|2015-04-01 07:00:00
2015-04-01 07:00:10|2015-04-01 07:00:08
2015-04-01 07:00:20|2015-04-01 07:00:20
2015-04-01 07:00:30|2015-04-01 07:00:20
You may be able to do this, if you don't have too many duplicates. Here is the idea:
select timestamp,
(case when timestamp < timestamp_add(second, 1, last_b_timestamp) or
timestamp > timestamp_add(second, -1, next_b_timestamp)
then 1 else 0
end) as flag
from (select timestamp, which,
last_value(case when which = 'b' then timestamp) over (order by timestamp) as last_b_timestamp,
last_value(case when which = 'b' then timestamp) over (order by timestamp desc) as next_b_timestamp,
from ((select a.timestamp, 'a' as which from a) union all
(select b.timestamp, 'b' as which from b)
) ab
) ab
where which = 'a';