Datediff , coalesce function throwing error in redshift - sql

select * from
(select
convert (timestamp, '1970-01-01 00:00:00') as Fixed_Date,
DATEDIFF (Second,
Fixed_Date::timestamp,
COALESCE(detection::timestamp, Fixed_date::timestamp)
)as TTD_seconds,
row_number() over(partition by a.number order by a.sys_updated_on desc,
a.record_processed_datetime desc ) as rn
from Table a
)
where rn = 1
Detection column has null values, i want my datediff to return 0.
entering 0 is throwing me error as well.
appreciate all the help
error screenshot :

I think you need an alias for your subquery.
SELECT *
FROM (SELECT CONVERT(timestamp, '1970-01-01 00:00:00') as Fixed_Date,
DATEDIFF (Second, Fixed_Date::timestamp, COALESCE (u_time_to_detection::timestamp, fixed_date::timestamp,'1970-01-01 00:00:00') )as TTD_seconds,
row_number() over(partition by a.number order by a.sys_updated_on desc, a.record_processed_datetime desc ) as rn
FROM Table a ) as AA
WHERE rn = 1

Related

Mariadb: Use result of window function LAG in WHERE clause

I am using the following query to get the difference between two timestamps:
SELECT tracker_id,
TIMESTAMP,
LAG(TIMESTAMP) OVER(ORDER BY TIMESTAMP DESC),
TIMESTAMPDIFF(MINUTE,
TIMESTAMP,
LAG(TIMESTAMP) OVER(ORDER BY TIMESTAMP DESC)) AS diff_in_minutes
FROM comm_telemetry
WHERE comm_telemetry.tracker_id = "123456789"
ORDER BY comm_telemetry.timestamp DESC;
I want to filter the result to only show when diff_in_minutes > 0. The problem is, that windows functions are not allowed in WHERE clauses.
Any suggestion how so solve this?
You will need to first compute the lag in a subquery and then query that again to use it to filter.
WITH cte AS (
SELECT tracker_id,
TIMESTAMP,
TIMESTAMPDIFF(MINUTE,
TIMESTAMP,
LAG(TIMESTAMP) OVER (ORDER BY TIMESTAMP DESC)) AS diff_in_minutes
FROM comm_telemetry
WHERE tracker_id = '123456789'
)
SELECT tracker_id, TIMESTAMP, diff_in_minutes
FROM cte
WHERE diff_in_minutes > 0
ORDER BY TIMESTAMP DESC;
found a solution meanwhile:
WITH tbl_diff_in_minutes AS (SELECT
tracker_id,
`timestamp` as ts,
LAG( `timestamp` ) OVER ( ORDER BY `timestamp` DESC ) prev_ts,
TIMESTAMPDIFF(
MINUTE,
`timestamp`,
LAG( `timestamp` ) OVER ( ORDER BY `timestamp` DESC )) AS diff_in_minutes
FROM
comm_telemetry
WHERE
comm_telemetry.tracker_id = "123456789"
ORDER BY
comm_telemetry.`timestamp` DESC)
SELECT tracker_id, ts, prev_ts, diff_in_minutes FROM tbl_diff_in_minutes WHERE diff_in_minutes > 0;

Optimizing SQL query - finding a group with in a group

I have a working query and looking for ideas to optimize it.
Query explanation: Within each ID group (visitor_id), look for row where c_id != 0. From that row, show all consecutive rows within that ID group.
select t2.*
from (select *, row_number() OVER (PARTITION BY visitor_id ORDER BY date) as row_number
from "DB"."schema"."table"
where visitor_id in
(select distinct visitor_id
from (select * from "DB"."schema"."table" where date >= '2021-08-01' and date <= '2021-08-30')
where c_id in ('101')
)
) as t2
inner join
(select visitor_id, min(rn) as row_number
from
(select *, row_number() OVER (PARTITION BY visitor_id ORDER BY date) as rn
from "DB"."schema"."table"
where visitor_id in
(select distinct visitor_id
from (select * from "DB"."schema"."table" where date >= '2021-08-01' and date <= '2021-08-30')
where c_id in ('101')
)
) as filtered_table
where c_id != 0
group by visitor_id) as t1
on t2.visitor_id = t1.visitor_id
and t2.row_number >= t1.row_number
so you have a common sub expression
select distinct visitor_id
from (select * from "DB"."schema"."table" where date >= '2021-08-01' and date <= '2021-08-30')
where c_id in ('101')
so that can be moved to a CTE and run just once. like
WITH distinct_visitors AS (
SELECT DISTINCT visitor_id
FROM (SELECT * FROM "DB"."schema"."table" WHERE date >= '2021-08-01' and date <= '2021-08-30')
where c_id in ('101')
)
but the sub clause filter is equally valid as a top level filter, and given it's a value inclusive range filter BETWEEN will give better performance.
WITH distinct_visitors AS (
SELECT DISTINCT visitor_id
FROM "DB"."schema"."table"
WHERE date BETWEEN '2021-08-01' AND'2021-08-30'
AND c_id IN ('101')
)
then both uses of that CTE do the same ROW_NUMBER operation so that can be a CTE
and simplified as such
WITH rw_rows AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY visitor_id ORDER BY date) AS row_number
FROM "DB"."schema"."table"
WHERE visitor_id IN (
SELECT DISTINCT visitor_id
FROM "DB"."schema"."table"
WHERE date BETWEEN '2021-08-01' AND '2021-08-30'
AND c_id in ('101')
)
)
SELECT t2.*
FROM rw_rows AS t2
JOIN (
SELECT visitor_id,
min(rn) AS row_number
FROM rw_rows AS filtered_table
WHERE c_id != 0
GROUP BY visitor_id
) AS t1
ON t2.visitor_id = t1.visitor_id
AND t2.row_number >= t1.row_number
So we are want to keep all rows that come after the first non-zero c_id which a QUALIFY should be able to solve like:
WITH rw_rows AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY visitor_id ORDER BY date) AS row_number
FROM "DB"."schema"."table"
WHERE visitor_id IN (
SELECT DISTINCT visitor_id
FROM "DB"."schema"."table"
WHERE date BETWEEN '2021-08-01' AND '2021-08-30'
AND c_id in ('101')
)
)
SELECT t2.*,
MIN(IFF(c_id != 0, row_number, NULL )) OVER (PARTITION BY visitor_id) as min_rn
FROM rw_rows AS t2
QUALIFY t2.row_number >= min_rn
which without have run feels like the MIN also should be able to be moved to the QUALIFY like:
WITH rw_rows AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY visitor_id ORDER BY date) AS row_number
FROM "DB"."schema"."table"
WHERE visitor_id IN (
SELECT DISTINCT visitor_id
FROM "DB"."schema"."table"
WHERE date BETWEEN '2021-08-01' AND '2021-08-30'
AND c_id in ('101')
)
)
SELECT t2.*
FROM rw_rows AS t2
QUALIFY t2.row_number >= MIN(IFF(c_id != 0, row_number, NULL )) OVER (PARTITION BY visitor_id)
At which point the CTE is not needed, as it's just used once, so could be moved back in, or not as they are the same.

Find continuous dates in PostgreSQL

I am trying to solve one query which I have already solved in SQL Server.
Write a SQL query to find continuous dates appear at least three times.
SQLfiddle
Table: orders
*------------*
| mdate |
*------------*
|'2012-05-01'|
|'2012-05-02'|
|'2012-05-03'|
|'2012-05-06'|
|'2012-05-07'|
|'2012-05-10'|
|'2012-05-11'|
*------------*
SQL Server:
select
mdate
from
(
select
mdate,
count(gap) over (partition by gap) as total
from
(
select
mdate,
dateadd(day, - row_number() over (order by mdate), mdate) as gap
from orders
) t
) tt
where total >= 3
Result:
*------------*
| mdate |
*------------*
|'2012-05-01'|
|'2012-05-02'|
|'2012-05-03'|
*------------*
I cannot use dateadd() function in PostgreSQL so how can I achieve same result in it?
In SQL Server, this would look like:
select mdate
from (select o.*,
count(*) over (partition by dateadd(day, - seqnum, mdate)) as cnt
from (select o.*,
row_number() over (order by mdate) as seqnum
from orders o
) o
) o
where cnt >= 3;
In Postgres:
select mdate
from (select o.*,
count(*) over (partition by mdate - seqnum * interval '1 day') as cnt
from (select o.*,
row_number() over (order by mdate) as seqnum
from orders o
) o
) o
where cnt >= 3;
The only difference is the date arithmetic.
You can make an interval out of your ROW_NUMBER computation by CONCAT with ' day' and typecasting; then you can subtract that from mdate. Your query remains the same other than changing
dateadd(day, - row_number() over (order by mdate), mdate) as gap
to
mdate - concat(row_number() over (order by mdate), ' day')::interval as gap
Demo on SQLFiddle

Transact SQL : Only most recent changes if 2 columns value are the same

Im trying to write a SQL request that'll fetch the user in my database, but here's the trick, if there's the same email twice, i only want to fetch the most recent one ( i have a time stamp in my db )
Here's the SQL CODE
SELECT cs_login, cs_email_opt_out_did
FROM dbo.individual
WHERE insert_date >= '2016-12-05 00:00:00' and insert_date < dateadd(day,1,'2016-12-05 23:59:59')
cs_login is the email adress i want to check for duplicate
cs_email_opt_oud_did is a boolean which values doesn't matter
insert_date is the timestamp on which i want to check the latest date
My problem is i have 2 email address user#test.com with a different cs_email_opt_out_did value. I don't how to could i select the one with the most recent date value from the insert_date colums
You could use the row_number() function inside a cte like this:
with cte as (
select
cs_login
, cs_email_opt_out_did
, rn = row_number() over (partition by cs_login order by insert_date desc)
from dbo.individual
where insert_date >= '2016-12-05 00:00:00'
and insert_date < dateadd(day,1,'2016-12-05 23:59:59')
)
select
cs_login
, cs_email_opt_out_did
from cte
where rn = 1;
Just get the record with the MAX(insert_date)
SELECT i.cs_login, i.cs_email_opt_out_did
FROM dbo.individual i
INNER JOIN
(SELECT cs_login, MAX(insert_date) dt FROM dbo.individual GROUP BY cs_login) as i2
ON i2.dt = i.insert_date and i2.cs_login = i.cs_login
WHERE i.insert_date >= '2016-12-05 00:00:00' and i.insert_date < dateadd(day,1,'2016-12-05 23:59:59')
You could also use the the With Ties clause. No need for a sub-query or cte
Select Top 1 With Ties
cs_login
, cs_email_opt_out_did
From dbo.individual
Where insert_date >= '2016-12-05 00:00:00' and insert_date < dateadd(day,1,'2016-12-05 23:59:59')
Order By Row_Number() over(Partition By cs_login Order By insert_date desc)
You can use Ranking windows function like DENSE_RANK
SELECT cs_login, cs_email_opt_out_did FROM (
SELECT cs_login,cs_email_opt_out_did,insert_date, DENSE_RANK() OVER (PARTITION BY cs_login ORDER BY insert_date Desc) as [Rank]
FROM dbo.individual
WHERE insert_date >= '2016-12-05 00:00:00' and insert_date < dateadd(day,1,'2016-12-05 23:59:59')) AS T1
WHERE T1.[Rank] = 1
you can also use other functions like RANK AND ROW_NUMBER

How to select the user with max count by day

I have a table with three columns
UserID, Count, Date
I'd like to be able to select the userid with the highest count for each date.
I've tried a few different variations of queries with inline select statements but none have worked 100%, and I'm not too fond of having a select with three inline selects.
Is doing inline selects the only way to go without using temp tables? Whats the best way to tackle this?
This solution will give you multiple records if there is a tie in Count but should work.
SELECT a.Date, a.UserId, a.[Count]
FROM yourTable a INNER JOIN (
SELECT MAX([Count]) as [Count], Date
FROM yourTable
GROUP BY Date
) b ON a.[Count] = b.[Count] AND a.Date = b.Date
ORDER BY a.Date
If [Date] is in fact a [Date] column with no time component:
;WITH x AS
(
SELECT [Date], [Count], UserID, rn = ROW_NUMBER() OVER
(PARTITION BY [Date] ORDER BY [Count] DESC)
FROM dbo.table
)
SELECT [Date], [Count], UserID
FROM x
WHERE rn = 1
ORDER BY [Date];
If [Date] is a DATETIME column with a time component, then:
;WITH x AS
(
SELECT [Date] = DATEADD(DAY, DATEDIFF(DAY, '19000101', [Date]), '19000101'),
[Count], UserID, rn = ROW_NUMBER() OVER
(PARTITION BY DATEADD(DAY, DATEDIFF(DAY, '19000101', [Date]), '19000101')
ORDER BY [Count] DESC)
FROM dbo.table
)
SELECT [Date], [Count], UserID
FROM x
WHERE rn = 1
ORDER BY [Date];
If you want to pick a specific row in the event of a tie, you can add a tie-breaker to the ORDER BY within the over. If you want to include multiple rows in the case of ties, you can try changing ROW_NUMBER() to DENSE_RANK().
SELECT x.*
FROM (
SELECT Date
FROM atable
GROUP BY Date
) t
CROSS APPLY (
SELECT TOP 1 WITH TIES
UserID, Count, Date
FROM atable
WHERE Date = t.Date
ORDER BY Count DESC
) x
If Date is datetime type and can have a non-zero time component, change the t table like this:
…
FROM (
SELECT Date = DATEADD(DAY, DATEDIFF(DAY, 0, Date), 0)
FROM atable
GROUP BY DATEADD(DAY, DATEDIFF(DAY, 0, Date), 0)
) t
…
References:
TOP (Transact-SQL)
Using APPLY
for SQL 2k5
select UserID, Count, Date
from tb
where Rank() over (partition by Date order by Count DESC, UserID DESC) = 1