Get 'most recent' grouped record (with order by) - sql

I have a query like the below
SELECT
t1.Supplier,
t2.Product,
FROM
t1
INNER JOIN
t2 ON t1.ProductCode = t2.ProductCode
GROUP BY
t1.Supplier, t2.Product
On table t1, there are also columns called 'Timestamp' and 'Price' - I want to get the most recent price, i.e. SELECT Price ORDER BY Timestamp DESC. Can I do this with any aggregate functions, or would it have to be a subquery?

One standard way of doing this is to use ROW_NUMBER() to create an additional column in the source data, allowing you to identify which row is "first" within each "partition".
WITH
supplier_sorted AS
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY supplier, ProductCode
ORDER BY timestamp DESC
)
AS recency_id
FROM
supplier
)
SELECT
s.Supplier,
p.Product,
COUNT(*)
FROM
supplier_sorted AS s
INNER JOIN
product AS p
ON s.ProductCode = p.ProductCode
WHERE
s.recency_id = 1
GROUP BY
s.Supplier,
p.Product

You can use cross apply:
SELECT t2.*, t1.*
FROM t2 CROSS APPLY
(SELECT TOP (1) t1.*
FROM t1
WHERE t1.ProductCode = t2.ProductCode
ORDER BY t1.TimeStamp DESC
) t1;
So, GROUP BY is not necessary.

Can use the row_number() over the partiton of ProductCode and Supplier to by using Timestamp Order by desc to get the latest record by based on the partition. Then you can use in the same query without aggregation to get the desired result.
It is good to use Windows functions rather than Group by for these questions.
SELECT
A.Supplier
,A.Product
,A.Price
FROM
(
SELECT
t1.Supplier,
t2.Product,
T1.Price,
ROW_NUMBER () OVER ( PARTITION BY t1.Supplier,t2.Product ORDER BY T1.[Timestamp] DESC ) AS row_num
FROM t1
INNER JOIN t2
ON t1.ProductCode = t2.ProductCode
) AS A WHERE A.row_num = 1
Tested using below added data.
CREATE TABLE t1
( Supplier varchar(100)
,ProductCode int
, Price Decimal (10,2)
, [TimeStamp] datetime
)
CREATE TABLE t2
(
ProductCode int
,Product varchar(100)
)
insert into t1 values ('A', 1, 100.00, GetDate())
insert into t1 values ('A', 1, 80.00, GetDate())
insert into t1 values ('b', 2, 190.00, GetDate())
insert into t1 values ('b', 2, 500.00, GetDate())
insert into t2 values (1, 'Pro1')
insert into t2 values (2, 'Pro2')
insert into t2 values (3, 'Pro3')

Related

Count rows from another table matching 2 ids and after a date in Postgresql

I have 2 tables in Postgresql 13 with the following sample structure:
table1
-------
client_id
member_id
email_count
last_date
table2
-------
client_id
member_id
created_at
Im trying to update the email_count column for each record in table1 with a count of rows from table2 where the client_id, member_id match and the created_at date is > than the last_date column.
Ive tried multiple approaches but cant seem to get the right combination. My latest approach using a CTE looks like:
with counted as (
select t.client_id,
t.member_id,
t.last_date,
(select count(*)
from table2 t2
where t2.client_id = t.client_id
and t2.member_id = t.member_id
and t2.created_at > t.last_engagement_date
) as count
from (
select t1.client_id,
t1.member_id,
t1.last_date
from table1 t1
) t
)
update table1
set email_count = counted.count
where table1.client_id = counted.client_id
and table1.member_id = counted.member_id;
But all of the counts are coming up as zero. Ive verified the data and should be getting counts as high as 200 in some cases.
Thanks in advance for any assistance!
EDIT
Example structure with additional data from first answer:
create table table1 (
client_id int,
member_id int,
email_count int,
last_date date
);
create table table2 (
client_id int,
member_id int,
created_at date
);
insert into table1
values (1, 1, null, '2021-06-01')
,(2, 3, null, '2021-05-01')
,(2, 4, null, '2021-04-01');
insert into table2
values (1, 1, '2021-05-01')
,(1, 1, '2021-07-01')
,(2, 3, '2021-06-01')
,(2, 3, '2021-07-01')
,(2, 4, '2021-04-01')
,(2, 4, '2021-05-01')
,(2, 4, '2021-06-01')
,(2, 4, '2021-07-01');
From this data Im expecting to get the following results in the email_count field:
client_id|member_id|email_count
1 | 1 | 1
2 | 3 | 2
2 | 4 | 3
Your code throws errors on dbfiddle.
This would be easier with a working fiddle to start with, including sample data.
create table table1 (
client_id int,
member_id int,
email_count int,
last_date date
);
create table table2 (
client_id int,
member_id int,
created_at date
);
insert into table1
values (1, 1, null, '2021-06-01');
insert into table2
values (1, 1, '2021-05-01')
, (1, 1, '2021-07-01');
update table1
set email_count = t2.email_count
from (
select count(table1.*) email_count
from table1
inner join table2 on table2.client_id = table1.client_id
and table2.member_id = table1.member_id
and table2.created_at > table1.last_date
) t2
;
select *
from table1
https://dbfiddle.uk/?rdbms=postgres_13&fiddle=71d887b17cd91793a719786ed829b58d
You can use the following code to update in postgresql.
UPDATE
table1
SET
email_count = result.count
FROM
(select t.client_id,
t.member_id,
t.email_count,
t.last_date,
(select count(*)
from table2 t2
where t2.client_id = t.client_id
and t2.member_id = t.member_id
and t2.created_at > t.last_date) as count
from (
select t1.client_id,
t1.member_id,
t1.email_count,
t1.last_date
from table1 t1) t) result
where table1.client_id = result.client_id and table1.member_id = result.member_id
Use the same name cte to update with cte because it is in the source table in sql
with counted as (
select t.client_id,
t.member_id,
t.email_count,
t.last_date,
(select count(*)
from table2 t2
where t2.client_id = t.client_id
and t2.member_id = t.member_id
and t2.created_at > t.last_date) as count
from (
select t1.client_id,
t1.member_id,
t1.email_count,
t1.last_date
from table1 t1
) t)
update counted
set email_count = counted.count;
If this is so easy to query...
select table2.client_id
, table2.member_id
, count(table2.*) email_count
from table1
inner join table2 on table2.client_id = table1.client_id
and table2.member_id = table1.member_id
and table2.created_at > table1.last_date
group by table2.client_id
, table2.member_id
https://dbfiddle.uk/?rdbms=postgres_13&fiddle=a0df6cef6916a321d97669fd4d0cec63
...why would you need to persist the value in email_count? That's a form of duplication that, if there isn't a reason to persist the data to improve system performance, you may not want.

Extract records modified in different date

I have this table called t1 with these fields: codLoan, codOp, codCau, action, and dateExec. Action field can be assumed three values: 'I' (Inserted), 'M' (Modified) or 'C' (Cancelled).
My records can be modified in different dates, so I can have two records with the same codLoan but with different value for dateExec.
I have to extract all the records that have the same codLoan and different Action (I or M) in different dateExec.
For instance:
codLoan=1
dateExec= '2018/08/08'
action='I'
codLoan=1
dateExec= '2018/08/08'
action='M'
codLoan=2
dateExec= '2018/08/07'
action='I'
codLoan=2
dateExec= '2018/08/08'
action='M'
Result: codLoan=2, dateExec= '2018/08/08'
I tried this query, but it extracts all the records with Action='I' and Action='M'.
select codLoan, dateExec
from t1
where Action in ('I','M');
How can I fix my code?
Perhaps you want :
select t.*
from table t
where exists (select 1
from table t1
where t1.codLoan = t.codLoan and
t1.dateExec <> t.dateExec and
t1.action <> t.action
);
Use a join:
select b.*
from mytable a
join mytable b on b.codLoan = a.codLoan
and b.dateExec > a.dateExec
and b.action != a.action
and b.action in ('I','M')
where a.action in ('I','M')
This returns the last action and date.
declare #t table
(
codLoan int,
dateExec date,
[action] char(1)
);
insert into #t values
(1, '2018-08-08', 'I'),
(1, '2018-08-08', 'M'),
(2, '2018-08-07', 'I'),
(2, '2018-08-08', 'M');
with diff as
(
select x.*
from
(
select
*,
cnt1 = count(*) over (partition by codLoan, dateExec
order by codLoan) ,
cnt2 = count(*) over (partition by codLoan, [action], dateExec
order by codLoan),
rnum = row_number() over (partition by codLoan
order by dateExec desc)
from #t
) x
where cnt1 = cnt2
)
select codLoan, dateExec
from diff
where rnum = 1
order by codLoan, dateExec;
Just another option
select codLoan
, dateExec = min(dateExec)
from t1
where Action in ('I','M')
Group By codLoan
Having min(dateExec)<>max(dateExec)

Finding closest expiration date

I have a table with columns ItemCode , ItemCount, ExpiredDate in which the expired date of items are saved when stocks of items increase. I have a View which shows current stocks of my items and it's columns are ItemCode and ItemStock.
Table
ItemCode, ItemCount, ExpiredDate
1001 , 200 , 2010/01/01
1001 , 100 , 2020/02/01
1001 , 200 , 2021/03/01
1002 , 150 , 2020/03/01
View
ItemCode, ItemStock
1001 , 250
1002 , 40
1003 , 50
1004 , 60
I want a query that returns closest expired date according to items stock.
Result
ItemCode, ClosestExpirationDate
1001 , 2020/02/01
1002 , 2020/03/01
1003 , -----
1004 , -----
Try using the absolute difference of dates:
WITH cte AS (
SELECT t1.ItemCode, t2.ExpiredDate,
ROW_NUMBER() OVER (PARTITION BY t1.ItemCode
ORDER BY ABS(DATEDIFF(day, GETDATE(), COALESCE(t2.ExpiredDate, GETDATE())))) rn
FROM [yourView] t1
LEFT JOIN [yourTable] t2
ON t1.ItemCode = t2.ItemCode
)
SELECT ItemCode, ExpiredDate AS ClosestExpirationDate
FROM cte
WHERE rn = 1
ORDER BY ItemCode;
Demo
Note: I assumed you want expiry dates regardless of whether they occur in the past or future. If you only want future expiry dates, then the above query can be slightly modified.
Unfortunately, I cannot provide the exact query for you but I can tell you how I would solve that puzzle:
you need only your table. In the following lines, I will call that table 'Items'
numbering your items based on their expiration date with ROW_NUMBER(), PARTITION BY ItemCode ORDER BY ExpirationDate. MSDN: https://learn.microsoft.com/en-us/sql/t-sql/functions/row-number-transact-sql?view=sql-server-2017.
put the previous query into a CTE and filter out only the first rows. MSND: https://learn.microsoft.com/en-us/sql/t-sql/queries/with-common-table-expression-transact-sql?view=sql-server-2017
I cannot validate that right now but the solution is something like this:
;with numberedItems as (
select ItemCode, ExpirationDate,
row_number() over(partition by ItemCode order by ExpirationDate) as RowNo
from Items
)
select ItemCode, ExpirationDate
from numberedItems
where RowNo = 1
The benefit of this solution is the SQL server will read your table only once, you don't have to do two queries to get the single resultset.
I hope it helps.
Use outer apply. I think you want the next date in the future:
select v.*, t.ExpiredDate
from view v outer apply
(select top (1) t.*
from table t
where t.ExpiredDate > getdate()
order by t.ExpiredDate desc
) t;
If you want to include past dates as well, the structure is very similar:
select v.*, t.ExpiredDate
from view v outer apply
(select top (1) t.*
from table t
order by datediff(day, getdate(), t.ExpiredDate) asc
) t;
Try this:
Sample data:
declare #tbl1 table (ItemCode int, ItemCount int, ExpiredDate date);
insert into #tbl1 values
(1001, 200, '2010/01/01'),
(1001, 100, '2020/02/01'),
(1001, 200, '2021/03/01'),
(1002, 150, '2020/03/01');
declare #tbl2 table (ItemCode int, ItemStock int);
insert into #tbl2 values
(1001, 250),
(1002, 40),
(1003, 50),
(1004, 60);
T-SQL:
select t2.ItemCode, min(t1.ExpiredDate) ClosestExpirationDate from (
select ItemCode, ItemCount, ExpiredDate,
SUM(ItemCount) over (partition by ItemCode order by ExpiredDate) CumSum
from #tbl1
) t1 right join #tbl2 t2 on t1.ItemCode = t2.ItemCode and t1.CumSum > ItemStock
group by t2.ItemCode
For SQL Server versions earlier than 12:
select t2.ItemCode, min(t1.ExpiredDate) ClosestExpirationDate from (
select t1.ItemCode, t1.ItemCount, t1.ExpiredDate, SUM(t2.ItemCount) CumSum
from #tbl1 t1
join #tbl1 t2 on t1.ItemCode = t2.ItemCode and t1.ExpiredDate >= t2.ExpiredDate
group by t1.ItemCode, t1.ItemCount, t1.ExpiredDate
) t1 right join #tbl2 t2 on t1.ItemCode = t2.ItemCode and t1.CumSum > ItemStock
group by t2.ItemCode

how to use SQL group to filter rows with maximum date value

I have the following table
CREATE TABLE Test
(`Id` int, `value` varchar(20), `adate` varchar(20))
;
INSERT INTO Test
(`Id`, `value`, `adate`)
VALUES
(1, 100, '2014-01-01'),
(1, 200, '2014-01-02'),
(1, 300, '2014-01-03'),
(2, 200, '2014-01-01'),
(2, 400, '2014-01-02'),
(2, 30 , '2014-01-04'),
(3, 800, '2014-01-01'),
(3, 300, '2014-01-02'),
(3, 60 , '2014-01-04')
;
I want to achieve the result which selects only Id having max value of date. ie
Id ,value ,adate
1, 300,'2014-01-03'
2, 30 ,'2014-01-04'
3, 60 ,'2014-01-04'
how can I achieve this using group by? I have done as follows but it is not working.
Select Id,value,adate
from Test
group by Id,value,adate
having adate = MAX(adate)
Can someone help with the query?
Select the maximum dates for each id.
select id, max(adate) max_date
from test
group by id
Join on that to get the rest of the columns.
select t1.*
from test t1
inner join (select id, max(adate) max_date
from test
group by id) t2
on t1.id = t2.id and t1.adate = t2.max_date;
Please try:
select
*
from
tbl a
where
a.adate=(select MAX(adate) from tbl b where b.Id=a.Id)
If you are using a DBMS that has analytical functions you can use ROW_NUMBER:
SELECT Id, Value, ADate
FROM ( SELECT ID,
Value,
ADate,
ROW_NUMBER() OVER(PARTITION BY ID ORDER BY Adate DESC) AS RowNum
FROM Test
) AS T
WHERE RowNum = 1;
Otherwise you will need to use a join to the aggregated max date by Id to filter the results from Test to only those where the date matches the maximum date for that Id
SELECT Test.Id, Test.Value, Test.ADate
FROM Test
INNER JOIN
( SELECT ID, MAX(ADate) AS ADate
FROM Test
GROUP BY ID
) AS MaxT
ON MaxT.ID = Test.ID
AND MaxT.ADate = Test.ADate;
I would try something like this
Select t1.Id, t1.value, t1.adate
from Test as t1
where t1.adate = (select max(t2.adate)
from Test as t2
where t2.id = t1.id)

How to get the closest dates in Oracle sql

For example, I have 2 time tables:
T1
id time
1 18:12:02
2 18:46:57
3 17:49:44
4 12:19:24
5 11:00:01
6 17:12:45
and T2
id time
1 18:13:02
2 17:46:57
I need to get time from T1 that are the closest to time from T2. There is no relationship between this tables.
It should be something like this:
select T1.calldatetime
from T1, T2
where T1.calldatetime between
T2.calldatetime-(
select MIN(ABS(T2.calldatetime-T1.calldatetime))
from T2, T1)
and
T2.calldatetime+(
select MIN(ABS(T2.calldatetime-T1.calldatetime))
from T2, T1)
But I can't get it. Any suggestions?
You only have to use a single Cartesian join to solve you problem unlike the other solutions, which use multiple. I assume time is stored as a VARCHAR2. If it is stored as a date then you can remove the TO_DATE functions. If it is stored as a date (I would highly recommend this), you will have to remove the date portions
I've made it slightly verbose so it's obvious what's going on.
select *
from ( select id, tm
, rank() over ( partition by t2id order by difference asc ) as rnk
from ( select t1.*, t2.id as t2id
, abs( to_date(t1.tm, 'hh24:mi:ss')
- to_date(t2.tm, 'hh24:mi:ss')) as difference
from t1
cross join t2
) a
)
where rnk = 1
Basically, this works out the absolute difference between every time in T1 and T2 then picks the smallest difference by T2 ID; returning the data from T1.
Here it is in SQL Fiddle format.
The less pretty (but shorter) format is:
select *
from ( select t1.*
, rank() over ( partition by t2.id
order by abs(to_date(t1.tm, 'hh24:mi:ss')
- to_date(t2.tm, 'hh24:mi:ss'))
) as rnk
from t1
cross join t2
) a
where rnk = 1
I believe this is the query you are looking for:
CREATE TABLE t1(id INTEGER, time DATE);
CREATE TABLE t2(id INTEGER, time DATE);
INSERT INTO t1 VALUES (1, TO_DATE ('18:12:02', 'HH24:MI:SS'));
INSERT INTO t1 VALUES (2, TO_DATE ('18:46:57', 'HH24:MI:SS'));
INSERT INTO t1 VALUES (3, TO_DATE ('17:49:44', 'HH24:MI:SS'));
INSERT INTO t1 VALUES (4, TO_DATE ('12:19:24', 'HH24:MI:SS'));
INSERT INTO t1 VALUES (5, TO_DATE ('11:00:01', 'HH24:MI:SS'));
INSERT INTO t1 VALUES (6, TO_DATE ('17:12:45', 'HH24:MI:SS'));
INSERT INTO t2 VALUES (1, TO_DATE ('18:13:02', 'HH24:MI:SS'));
INSERT INTO t2 VALUES (2, TO_DATE ('17:46:57', 'HH24:MI:SS'));
SELECT t1.*, t2.*
FROM t1, t2,
( SELECT t2.id, MIN (ABS (t2.time - t1.time)) diff
FROM t1, t2
GROUP BY t2.id) b
WHERE ABS (t2.time - t1.time) = b.diff;
Make sure that the time columns have the same date part, because the t2.time - t1.time part won't work otherwise.
EDIT: Thanks for the accept, but Ben's answer below is better. It uses Oracle analytic functions and will perform much better.
This one here selects that row(s) from T1, which has/have the smallest distance to any in T2:
select T1.id, T1.calldatetime from T1, T2
where ABS(T2.calldatetime-T1.calldatetime)
=( select MIN(ABS(T2.calldatetime-T1.calldatetime))from T1, T2);
(tested it with mysql, hope you dont get an ORA from that)
Edit: according to the last comment, it should be like that:
drop table t1;
drop table t2;
create table t1(id int, t time);
create table t2(id int, t time);
insert into t1 values (1, '18:12:02');
insert into t1 values (2, '18:46:57');
insert into t1 values (3, '17:49:44');
insert into t1 values (4, '12:19:24');
insert into t1 values (5, '11:00:01');
insert into t1 values (6, '17:12:45');
insert into t2 values (1, '18:13:02');
insert into t2 values (2, '17:46:57');
select ot2.id, ot2.t, ot1.id, ot1.t from t2 ot2, t1 ot1
where ABS(ot2.t-ot1.t)=
(select min(abs(t2.t-t1.t)) from t1, t2 where t2.id=ot2.id)
Produces:
id t id t
1 18:13:02 1 18:12:02
2 17:46:57 3 17:49:44
Another one way of using analytic functions.
May be strange :)
select id, time,
case
when to_date(time, 'hh24:mi:ss') - to_date(lag_time, 'hh24:mi:ss') < to_date(lead_time, 'hh24:mi:ss') - to_date(time, 'hh24:mi:ss')
then lag_time
else lead_time
end closest_time
from (
select id, tbl,
LAG(time, 1, null) OVER (ORDER BY time) lag_time,
time,
LEAD(time, 1, null) OVER (ORDER BY time) lead_time
from
(
select id, time, 1 tbl from t1
union all
select id, time, 2 tbl from t2
)
)
where tbl = 2
To SQLFiddle... and beyond!
Try this query its little lengthy, I will try to optimize it
select * from t1
where id in (
select id1 from
(select id1,id2,
rank() over (partition by id2 order by diff) rnk
from
(select distinct t1.id id1,t2.id id2,
round(min(abs(to_date(t1.time,'HH24:MI:SS') - to_date(t2.time,'HH24:MI:SS'))),2) diff
from
t1,t2
group by t1.id,t2.id) )
where rnk = 1);