Oracle SQL - left join record with closest datetime - sql

I have 2 tables.
table1:
item
end time
1
2022-11-23 08:12:00
1
2022-11-23 09:12:00
2
2022-11-22 13:12:00
3
2022-11-22 14:12:00
table2:
item
value
last_dt
1
11
2022-11-23 09:12:00
1
12
2022-11-23 08:30:00
1
13
2022-11-24 08:30:00
2
21
2022-11-22 13:12:00
3
31
2022-11-22 14:12:00
3
32
2022-11-22 14:30:00
i would like to left join table1 to table2 by comparing the table1's end_time with table2's last_dt.
below is the expected result.
item
end time
value
1
2022-11-23 08:12:00
12
1
2022-11-23 09:12:00
11
2
2022-11-22 13:12:00
21
3
2022-11-22 14:12:00
31

You may use lateral join with fetch first row only to select the closest value per row. But it effectively will perform a nested loop (which would be fast in case of index on item, last_dt and small table1).
select *
from table1
left join lateral (
select value
from table2
where table2.item = table1.item
order by abs(table2.last_dt - table1.end_time) asc
fetch first row only
) val
on 1 = 1
Alternatively you may use first aggregate function and order by time difference. It would work in old Oracle versions (at least from 10g) also.
select
table1.item,
table1.end_time,
max(table2.value) keep(dense_rank first order by abs(table2.last_dt - table1.end_time)) as value
from table1
left join table2
on table2.item = table1.item
group by
table1.item,
table1.end_time
For your sample data both will return this result:
ITEM
END_TIME
VALUE
1
2022-11-23 08:12:00
12
2
2022-11-22 13:12:00
21
3
2022-11-22 14:12:00
31
db<>fiddle

Preparing
-- ms-sql-syntax
create table table1(item# int, end_time datetime);
insert table1 select 1, '2022-11-23T08:12:00';
insert table1 select 2, '2022-11-22T13:12:00';
insert table1 select 3, '2022-11-22T14:12:00';
create table table2 (item# int, value int, end_time datetime);
insert table2 select 1, 11, '2022-11-23T09:12:00';
insert table2 select 1, 12, '2022-11-23T08:30:00';
insert table2 select 1, 13, '2022-11-24T08:30:00';
insert table2 select 2, 21, '2022-11-22T13:12:00';
insert table2 select 3, 31, '2022-11-22T14:12:00';
insert table2 select 3, 32, '2022-11-22T14:30:00';
expected result
item# end_time value
1 2022-11-23 08:12:00 12
2 2022-11-22 13:12:00 21
3 2022-11-22 14:12:00 31
Second
You do not need "left join". You need "outer apply" or "cross apply"
https://oracle-base.com/articles/12c/lateral-inline-views-cross-apply-and-outer-apply-joins-12cr1#cross-apply-join
It should be something like this:
-- it should be oracle syntax. not sure
SELECT
t1.item#, t1.end_time, t2.value
FROM table1 AS t1
CROSS APPLY (
SELECT value
FROM table2 AS t2ca
WHERE rownum = 1
ORDER BY ABS(#DATEDIFF('SS', t2ca.end_time, t1.end_time))
) AS t2
-- ms-sql-syntax. exactly
SELECT
t1.item#, t1.end_time, t2.value
FROM table1 AS t1
CROSS APPLY (
SELECT top 1 value
FROM table2 AS t2ca
ORDER BY ABS(DATEDIFF(second, t2ca.end_time, t1.end_time))
) AS t2
TOP 1 WITH TIES -- ms-sql-syntax
-- TOP 1 WITH TIES -- ms-sql-syntax
SELECT TOP 1 WITH TIES
t1.item#
, t1.end_time
, t2.value
FROM table1 AS t1
CROSS JOIN table2 AS t2
ORDER BY
ROW_NUMBER() OVER (
PARTITION BY t1.item#, t1.end_time
ORDER BY ABS(DATEDIFF(second, t2.end_time, t1.end_time))
);
SUBQUERY and window-ROW_NUMBER() -- ms-sql-syntax
-- SUBQUERY and window-ROW_NUMBER() -- ms-sql-syntax
SELECT
item#
, end_time
, value
FROM (
SELECT
t1.item#
, t1.end_time
, t2.value
, ROW_NUMBER() OVER (
PARTITION BY t1.item#, t1.end_time
ORDER BY ABS(DATEDIFF (second, t2.end_time, t1.end_time))
) AS __rn__
FROM table1 AS t1
CROSS JOIN table2 AS t2
) AS ordering
WHERE __rn__ = 1

Related

Left join event table to incident table where dates are closest for each device id

I have two table as follows:
T1
event_id device_id event_time var1 var2
1 A 2021-01-01 5 6
2 C 2021-01-02 8 7
3 B 2021-01-05 1 6
4 C 2021-01-07 7 7
5 D 2021-01-12 8 9
6 C 2021-01-18 3 4
7 B 2021-01-21 7 1
T2
device_id incident_time
B 2021-01-06
C 2021-01-17
I would like the Output to be
device_id incident_time event_id event_time var1 var2
B 2021-01-06 3 2021-01-05 1 6
C 2021-01-17 6 2021-01-18 3 4
So, I am trying to find event_id, event_time, var1 and var2 of a single event in T1 where the event time is closest to the incident_time for each device in T2, irrespective of if event_time is bigger or smaller than incident_time. My ugly (and incorrect) code so far:
select T2.device_id, T2.incident_time, (select distinct on (device_id) event_id
from T1
where device_id = T2.device_id
and event_time <= T2.incident_time
order by device_id, event_time desc) as event_id
from T2;
How can I accomplish this more elegantly using Join statement? Or is the select as statement in brackets correct? How can I include all the required columns? Thanks
Skip the subquery, do a join instead. Something like:
select distinct on (T2.device_id) T2.device_id, T2.incident_time,
T1.event_id, T1.event_time, T1.var1, T1.var2
from T2
left join T1 ON T1.device_id = T2.device_id
order by T2.device_id, ABS(T2.incident_time - T1.event_time)
I would use "Fetch First 1 Rows With Ties" via "Left Join Lateral" to get all suitable events for each incident.
Select T2.device_id, T2.incident_time, T1.event_id, T1.event_time, T1.var1, T1.var2
From T2 Left Join Lateral
(Select T1.event_id, T1.device_id, T1.event_time, T1.var1, T1.var2
From T1
Where T1.device_id=T2.device_id
Order by Abs(T1.event_time-T2.incident_time)
Fetch First 1 Rows With Ties) As T1 On true
Schema and insert statements:
create table T1(event_id int, device_id varchar(2), event_time date, var1 int, var2 int);
insert into T1 values(1 ,'A' ,'2021-01-01', 5, 6);
insert into T1 values(2 ,'C' ,'2021-01-02', 8, 7);
insert into T1 values(3 ,'B' ,'2021-01-05', 1, 6);
insert into T1 values(4 ,'C' ,'2021-01-07', 7, 7);
insert into T1 values(5 ,'D' ,'2021-01-12', 8, 9);
insert into T1 values(6 ,'C' ,'2021-01-18', 3, 4);
insert into T1 values(7 ,'B' ,'2021-01-21', 7, 1);
create table T2 (device_id varchar(2), incident_time date);
insert into T2 values('B', '2021-01-06');
insert into T2 values('C', '2021-01-17');
Query 1 (using cte and row_number() window function):
with cte as
(
select T2.device_id, T2.incident_time,T1.event_id,T1.event_time,T1.var1, T1.var2,
row_number()over(partition by T2.device_id order by ABS(T2.incident_time-T1.event_time))rn
from T2 left join T1 on T2.device_id=T1.device_id
)
select device_id, incident_time, event_id, event_time, var1, var2 from cte
where rn=1
Output:
device_id
incident_time
event_id
event_time
var1
var2
B
2021-01-06
3
2021-01-05
1
6
C
2021-01-17
6
2021-01-18
3
4
db<>fiddle here

how to merge two table in sql server?

I have two table like this:
table 1
---------------
ID Name Fname
1 N1 FN1
2 N2 FN2
3 N3 FN3
table 2
---------------
TID Day Hour ID
1 30 14 1
2 30 14 2
and i want show this result:
Result Table
---------------
ID Name Fname TID Day Hour
1 N1 FN1 1 30 14
2 N2 FN2 2 30 14
3 N3 FN3 --- --- ---
Note: ID in Table2 is forgin key form Table1 And I Do not use join because I can't get all row in both table.
What is needed sql command to display the above table?
Thank a lot.
Use the following query to retrieve your desired result:
SELECT
Name,
FName,
(SELECT TID From Table2 WHERE ID=t1.ID) TID,
(SELECT [Day] From Table2 WHERE ID=t1.ID) [Day],
(SELECT [Hour] From Table2 WHERE ID=t1.ID) [Hour]
FROM Table1 t1
simple left join would work
select
*
from
table1 t1
left join
table2 t2 on t1.id=t2.id
SELECT t1.ID,t1.Name, t1.fname, t2.TID, t2.Day, t2.Hour
From Table` t1
LEFT OUTER JOIN Table2 t2
on t1.ID = t2.ID

HiveQL equivalent of !> in SQL

I have currently been trying to extract those values from a table that do not exist in another table. However, as the joining value contains null values - the not in, not exists and left join option do not seem to be working.
Therefore, is there a way to apply the 'not greater than' condition in the HiveQL?
For reference, this is the query that I ran, and similarly with not exists and left join ..
with date_prob as
(
select distinct visit
from t1
where dt=20161124
and dt1!=orig_ts
),
ev_data as
(
select distinct visit
from t1
where dt=20161124
and visit is not null
and origts is not null
and uid is not null
),
fin_data as
(
select x.visit
from ev_data x
where x.visit not in
(
select distinct visit
from date_prob
and visit is not null
)
)
The query that I ran for a left join -
with date_prob as
(
select distinct id
from t1
where dt1='2016-11-24'
and dt1!=orig_ts
and (datediff(dt1,orig_ts) not in ('1','-1'))
),
ev_data as
(
select distinct id
from t1
where dt1='2016-11-24'
and id is not null
)
select x.id
from ev_data x
left join date_prob y
where y.id is null
;
The Data Example -
id dt1 orig_ts
1 2016-11-24 2016-11-10
2 2016-11-24 2016-11-24
3 2016-11-24 2010-01-01
4 2016-11-24 2017-01-01
5 2016-11-24 2016-11-24
6 2016-11-24 2016-11-25
7 2016-11-23 2016-11-23
Therefore, from this table I want to remove those Id's where there is greater than a difference of a day. Thus, the query should return values only where the ID is equal to 2,5 and 6.
If you want to extract those values from a table that do not exist in another table than you can use left join and filter where second_table_key is null.
This will work even there are NULLs in keys:
--this query will return records from table a that do not exist in b
select a.id
from a left join b on a.id=b.id
where b.id is null; --only not joined
Have fixed your example. it works:
drop table if exists t1;
create table t1 (id int,dt1 string, orig_ts string );
insert overwrite table t1
select 1 id, '2016-11-24' dt1, '2016-11-10' orig_ts union all
select 2 id, '2016-11-24' dt1, '2016-11-24' orig_ts union all
select 3 id, '2016-11-24' dt1, '2010-01-01' orig_ts union all
select 4 id, '2016-11-24' dt1, '2017-01-01' orig_ts union all
select 5 id, '2016-11-24' dt1, '2016-11-24' orig_ts union all
select 6 id, '2016-11-24' dt1, '2016-11-25' orig_ts union all
select 7 id, '2016-11-23' dt1, '2016-11-23' orig_ts;
with date_prob as
(
select distinct id
from t1
where dt1='2016-11-24'
and dt1!=orig_ts
and (datediff(dt1,orig_ts) not in ('1','-1'))
),
ev_data as
(
select distinct id
from t1
where dt1='2016-11-24'
and id is not null
)
select x.id
from ev_data x
left join date_prob y on x.id=y.id
where y.id is null
;
OK
2
5
6
Time taken: 14.166 seconds, Fetched: 3 row(s)
hive>
Works as expected

Display records from table1 and records from table2 which does not exist in table 1 [duplicate]

This question already has answers here:
How to find rows in one table that have no corresponding row in another table
(6 answers)
Closed 6 years ago.
I've two table which contains three property in each. I want to display all records from table 1 and in table 2 extract only the records which do not exist in table 1.
table 1
ID Percentage OrderDate
+----+------------+----------+
1 2.0 2015-05-08
1 5.0 2014-05-08
1 19.65 2013-05-08
1 5.06 2012-05-08
1 98.0 2011-05-08
1 8.56 2010-05-08
+----+------------+----------+
table 2
ID Percentage OrderDate
+----+------------+----------+
1 45.5 2015-05-08
1 45.23 2014-05-08
1 12.00 2013-05-08
1 6.45 2012-05-08
1 18.0 2011-05-08
1 5.2 2010-05-08
1 12.0 2009-05-08
1 22.78 2008-05-08
1 48.9 2007-05-08
1 7.89 2006-05-08
1 17.96 2005-05-08
1 11.3 2004-05-08
+----+------------+----------+
Depending on which columns your are comparing you could use something like below.
SELECT t1.ID, t1.Percentage, t1.OrderDate
FROM table1 t1
UNION ALL
SELECT t2.ID, t2.Percentage, t2.OrderDate
FROM table1 t1
INNER JOIN table2 t2 ON t2.ID <> t1.ID AND t2.Percentage <> t1.Percentage AND t2.OrderDate <> t1.OrderDate
if you want remove duplicate use union
select ID, Percentage, OrderDate
from table1
union
select ID, Percentage, OrderDate
from table2
if you want all the rows from both the table use union all
select ID, Percentage, OrderDate
from table1
union all
select ID, Percentage, OrderDate
from table2
You can use EXCEPT keyword:
SELECT ID, Pourcentage, OrderDate
FROM table1
UNION
(
SELECT ID, Pourcentage, OrderDate
FROM table2
EXCEPT
SELECT ID, Pourcentage, OrderDate
FROM table1
)

How do I need to change my sql to get what I want in this case?

I have a table like following:
id value date
1 5 2015-01-10
2 5 2015-06-13
3 5 2015-09-05
4 11 2015-02-11
5 11 2015-01-10
6 11 2015-01-25
As can be seen, every value appears 3 times with different date. I want to write a query that returns the unique values that has the maximum date, which would be the following for the above table:
id value date
3 5 2015-09-05
4 11 2015-02-11
How could I do it?
This is the updated question:
The real question I am encountering is a little bit more complicated than the simplified version above. I thought I can move a step further once I know the answer to the simplified version, but I guest I was wrong. So, I am updating the question herein.
I have 2 tables like following:
Table 1
id id2 date
1 2 2015-01-10
2 5 2015-06-13
3 9 2015-09-05
4 10 2015-02-11
5 26 2015-01-10
6 65 2015-01-25
Table 2
id id2 data
1 2 A
2 5 A
3 9 A
4 10 B
5 26 B
6 65 B
Here, Table 1 and Table 2 are joined by id2
What I want to get is two records as follows:
id2 date data
9 2015-01-10 A
10 2015-02-11 B
You can use row_number to select the rows with the greatest date per value
select * from (
select t2.id2, t1.date, t2.data,
row_number() over (partition by t2.data order by t1.date desc) rn
from table1 t1
join table2 t2 on t1.id = t2.id2
) t where rn = 1
select a.id, a.value, a.date
from mytable a,
( select id, max(date) maxdate
from mytable b
group by id) b
where a.id = b.id
and a.date = b.maxdate;
Oracle Setup:
CREATE TABLE Table1 ( id, id2, "date" ) AS
SELECT 1, 2, DATE '2015-01-10' FROM DUAL UNION ALL
SELECT 2, 5, DATE '2015-06-13' FROM DUAL UNION ALL
SELECT 3, 9, DATE '2015-09-05' FROM DUAL UNION ALL
SELECT 4, 10, DATE '2015-02-11' FROM DUAL UNION ALL
SELECT 5, 26, DATE '2015-01-10' FROM DUAL UNION ALL
SELECT 6, 65, DATE '2015-01-25' FROM DUAL;
CREATE TABLE Table2 ( id, id2, data ) AS
SELECT 1, 2, 'A' FROM DUAL UNION ALL
SELECT 2, 5, 'A' FROM DUAL UNION ALL
SELECT 3, 9, 'A' FROM DUAL UNION ALL
SELECT 4, 10, 'B' FROM DUAL UNION ALL
SELECT 5, 26, 'B' FROM DUAL UNION ALL
SELECT 6, 65, 'B' FROM DUAL;
Query:
SELECT MAX( t1.id ) KEEP ( DENSE_RANK LAST ORDER BY t1."date" ) AS id,
MAX( t1.id2 ) KEEP ( DENSE_RANK LAST ORDER BY t1."date" ) AS id2,
MAX( t1."date" ) AS "date",
t2.data
FROM Table1 t1
INNER JOIN
Table2 t2
ON ( t1.id = t2.id AND t1.id2 = t2.id2 )
GROUP BY t2.data
Output:
ID ID2 date DATA
---------- ---------- ------------------- ----
3 9 2015-09-05 00:00:00 A
4 10 2015-02-11 00:00:00 B
Query 2:
SELECT id,
id2,
"date",
data
FROM (
SELECT t1.*,
t2.data,
ROW_NUMBER() OVER ( PARTITION BY t2.data ORDER BY t1."date" DESC ) AS rn
FROM Table1 t1
INNER JOIN
Table2 t2
ON ( t1.id = t2.id AND t1.id2 = t2.id2 )
)
WHERE rn = 1;
Output:
ID ID2 date DATA
---------- ---------- ------------------- ----
3 9 2015-09-05 00:00:00 A
4 10 2015-02-11 00:00:00 B