I have a postgresql statement which is:
( select cast(start_time as date) as time , SUM(count) as count
from tbl_product
where ( cast(start_time as date) >= '2016-08-30 23:00:00' and cast(start_time as date) <= '2016-09-01 20:00:00' )
and ( extract(hour from start_time) >= 23 and extract(hour from start_time) <= 24)
group by time order by time limit 5 )
UNION ( select cast(start_time as date) as time , SUM(count) as count
from tbl_product
where ( cast(start_time as date) >= '2016-08-31 23:00:00' and cast(start_time as date) <= '2016-09-01 20:00:00' )
and ( extract(hour from start_time) >= 0 and extract(hour from start_time) < 20)
group by time order by time limit 5 )
But it returns the same data for the same date, because of a UNION statement
time count
date numeric
"2016-08-31" 543595
"2016-08-31" 3666277
"2016-09-01" 3365093
How can I add these data values like:
time count
date numeric
"2016-08-31" 4209872
"2016-09-01" 3365093
Thanks for helping.
You need to move the GROUP BY out of the individual queries. Something like that:
SELECT time, SUM(count) as count FROM (
( select cast(start_time as date) as time , count
from tbl_product
where ( cast(start_time as date) >= '2016-08-30 23:00:00' and cast(start_time as date) <= '2016-09-01 20:00:00' )
and (extract(hour from start_time) >= 23))
UNION ALL
( select cast(start_time as date) as time , count
from tbl_product
where ( cast(start_time as date) >= '2016-08-31 23:00:00' and cast(start_time as date) <= '2016-09-01 20:00:00' )
and ( extract(hour from start_time) >= 0 and extract(hour from start_time) < 20))
) AS t
GROUP BY time ORDER by time;
I've also changed the UNION to a UNION ALL, because it seems to make more sense in this case. Finally, the test extract(hour from start_time) <= 24 is always true, so it's redundant.
try this query:
select
exe.time_,
sum(exe.count_)
from
(
select cast(start_time as date) as time_ , SUM(count) as count_
from tbl_product
where ( cast(start_time as date) >= '2016-08-30 23:00:00' and cast(start_time as date) <= '2016-09-01 20:00:00' )
and ( extract(hour from start_time) >= 23 and extract(hour from start_time) <= 24)
group by time order by time limit 5
UNION
select cast(start_time as date) as time_, SUM(count) as count_
from tbl_product
where ( cast(start_time as date) >= '2016-08-31 23:00:00' and cast(start_time as date) <= '2016-09-01 20:00:00' )
and ( extract(hour from start_time) >= 0 and extract(hour from start_time) < 20)
group by time order by time limit 5
) exe
group by exe.time_
Related
How do I calculate the difference between two dates in business days in Google Bigquery?
I want to replicate this example below:
I have tried these examples but they do not give the expected results:
DATE_DIFF but only counting business days
I also used this logic,ionand it did not work:
CREATE TEMP FUNCTION BusinessDateDiff(start_date DATE, end_date DATE) AS (
(SELECT COUNTIF(MOD(EXTRACT(DAYOFWEEK FROM date), 7) > 1)
FROM UNNEST(GENERATE_DATE_ARRAY(
start_date, DATE_SUB(end_date, INTERVAL 1 DAY))) AS date)
);
Consider below
create temp function BusinessDateDiff(delivery DATE, eta DATE) AS ((
select if(delivery > eta, 1, -1) * count(*)
from unnest(generate_date_array(
least(delivery, eta), greatest(delivery, eta) - 1
)) day
where not extract(dayofweek from day) in (1, 7)
));
select *,
BusinessDateDiff(DELIVERY_DATE, ORIGINAL_ETA_DATE) as BUSINESS_DAYS
from your_table
if applied to sample data as in your question - output is
getting desired result as follows:
CREATE TEMP FUNCTION BusinessDateDiff(start_date DATE, end_date DATE) AS (
(SELECT -1*COUNTIF(MOD(EXTRACT(DAYOFWEEK FROM date), 7) > 1)
FROM UNNEST(GENERATE_DATE_ARRAY( start_date , DATE_SUB(end_date,INTERVAL 1 DAY))) AS date));
CREATE TEMP FUNCTION BusinessDateDiff1( end_date DATE, start_date DATE) AS (
(SELECT COUNTIF(MOD(EXTRACT(DAYOFWEEK FROM date), 7) > 1)
FROM UNNEST(GENERATE_DATE_ARRAY( end_date , DATE_SUB(start_date,INTERVAL 1 DAY))) AS date));
WITH OrdersTable AS (
SELECT DATE '2022-06-28' AS DELIVERY_DATE,
DATE '2022-08-17' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-07-01' AS DELIVERY_DATE,
DATE '2022-07-14' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-06-30' AS DELIVERY_DATE,
DATE '2022-07-08' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-06-30' AS DELIVERY_DATE,
DATE '2022-07-08' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-06-29' AS DELIVERY_DATE,
DATE '2022-07-06' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-06-27' AS DELIVERY_DATE,
DATE '2022-07-01' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-06-30' AS DELIVERY_DATE,
DATE '2022-07-05' AS ORIGINAL_ETA_DATE
UNION ALL
SELECT DATE '2022-06-30' AS DELIVERY_DATE,
DATE '2022-06-28' AS ORIGINAL_ETA_DATE
)
SELECT
DELIVERY_DATE,
ORIGINAL_ETA_DATE,
case when DELIVERY_DATE < ORIGINAL_ETA_DATE then
BusinessDateDiff(DELIVERY_DATE, ORIGINAL_ETA_DATE)
when DELIVERY_DATE > ORIGINAL_ETA_DATE then
BusinessDateDiff1(ORIGINAL_ETA_DATE, DELIVERY_DATE)
else 0 end AS BUSINESS_DAYS
FROM OrdersTable
[![Desired Result][1]][1]
[1]: https://i.stack.imgur.com/efmw3.png
I have three queries executed consistently:
SELECT TOP 1 max(value) FROM tableA
where site = 18
and (CAST(DATEADD(s,t_stamp/1000,'1970-01-01 00:00:00') as DATE) >= '2017-2-1'
and CAST(DATEADD(s,t_stamp/1000,'1970-01-01 00:00:00') as DATE) <= '2017-2-28')
Group by CAST(DATEADD(s,t_stamp/1000,'1970-01-01 00:00:00') as DATE)
order by CAST(DATEADD(s,t_stamp/1000,'1970-01-01 00:00:00') as DATE) DESC;
SELECT TOP 1 max(value) FROM tableA
where site = 3
and (CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) >= '2017-2-1'
and CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) <= '2017-2-28')
Group by CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE)
order by CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) DESC;
SELECT TOP 1 max(value) FROM tableA
where site = 4
and (CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) >= '2017-2-1'
and CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) <= '2017-2-28')
Group by CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE)
order by CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) DESC;
I want to combine this three queries into one and query sites 18, 3, 4 via one select, but I don't see how. Please advise how to merge this 3 queries into one.
Any help will be appreciated!
You seem to want the maximum value for three different sites on the last day in February that has their data.
If so, this is simpler:
select site_id, max(value)
from (select t.*,
dense_rank() over (partition by site order by tstamp / (1000 * 24 * 60 * 60) desc) as seqnum
from t
where tstamp >= datediff(second, '1970-01-01', '2020-02-01') * 1000 and
tstamp < datediff(second, '1970-01-01', '2020-02-29') * 1000 and
site_id in (18, 3, 4)
) t
where seqnum = 1;
Actually, February in 2020 has 29 days. Perhaps you want the entire month; if so, then use '2020-03-01' for the second comparison.
Note that the manipulations on the date/time values are only on the "constant" side. This allows the query to use an index on tstamp if an appropriate index is available.
You can use the analytical function row_number in your existing query as follows:
Select * from
(SELECT max(value), site,
Row_number() over (partition by site order by CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) desc) as rn FROM tableA
where site in (4,18,3
and (CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) >= '2017-2-1'
and CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE) <= '2017-2-28')
Group by CAST(DATEADD(s,stamp/1000,'1970-01-01 00:00:00') as DATE), site)
Where rn = 1
I have a query that displays the total value (sum of amount) for each day.
The query:
SELECT CAST(date AS DATE), SUM(amount) AS total_amount FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
The CAST is to abbreviate the datetime format to just a date.
Now I want to select only the day which has the highest sum with the max function.
To do this I tried writing the following aggregate query:
SELECT s.date, s.total_amount
FROM (SELECT CAST(date AS DATE), SUM(amount) AS total_amount FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)) s
WHERE s.total_amount = (SELECT MAX(s.total_amount) FROM table)
This does not work. I know the problem is with the final WHERE clause, but I need help with making it work.
Use ORDER BY with LIMIT :
SELECT CAST(date AS DATE), SUM(amount) AS total_amount
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
ORDER BY total_amount DESC
LIMIT 1;
If you are working with SQL Server then you can use TOP :
SELECT TOP (1) CAST(date AS DATE), SUM(amount) AS total_amount
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
ORDER BY total_amount DESC;
If you want ties then you can use window function :
SELECT t.*
FROM (SELECT CAST(date AS DATE), SUM(amount) AS total_amount,
RANK() OVER (ORDER BY SUM(amount) DESC) as Seq
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
) t
WHERE seq = 1;
You can use CTE :
WITH CTE AS (
SELECT CAST(date AS DATE), SUM(amount) AS total_amount
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
)
SELECT c.*
FROM CTE C
WHERE C.total_amount = (SELECT MAX(total_amount) FROM CTE);
Note : If your DBMS doesn't support CTE expression then you need repeat the SELECT statement in Subquery.
SELECT CAST(date AS DATE), SUM(amount) AS total_amount
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
HAVING SUM(amount) = (SELECT MAX(total_amount)
FROM (SELECT CAST(date AS DATE), SUM(amount) AS total_amount
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
) t
);
If you are using SQL Server then you can use TOP
SELECT TOP 1 CAST(date AS DATE), SUM(amount) AS total_amount
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
ORDER BY total_amount DESC
Use window function row_number() - should work with MySQL 8.0, PostgreSQL, Oracle and SQL Server.
select
date,
total_amount
from
(
SELECT
CAST(date AS DATE) as date,
SUM(amount) AS total_amount,
row_number() over (order by SUM(amount) desc) as rnk
FROM table
WHERE date BETWEEN '2019-01-01 00:00:00' AND '2019-12-31 00:00:00'
GROUP BY CAST(date AS DATE)
) val
where rnk = 1
SELECT s.dt, s.total_amount
FROM (SELECT CAST(date AS DATE) as dt, SUM(amount) AS total_amount
FROM table
WHERE CAST(date as date) BETWEEN '2019-01-01' AND '2019-12-31'
GROUP BY CAST(date AS DATE)) s
WHERE s.total_amount = (Select max(total_amount)
FROM (SELECT CAST(date AS DATE) as dt, SUM(amount) AS total_amount
FROM table
WHERE CAST(date as date) BETWEEN '2019-01-01' AND '2019-12-31'
GROUP BY CAST(date AS DATE)) ss )
I need to write an SQL query for the following scenario.
I am having start date as 2020-01-10 13:00:00.347 and end date as 2020-01-12 02:00:00.347, so I need data grouped as
Day Hours
---- -----
10-01-2020 11
11-01-2020 24
12-01-2020 2.30
which means 11 hours was for the first date and 24 hours in second day and 2.3 hours on 3rd day.
What will the most Efficient SQL query to fetch the data in the above-mentioned format? Thanks in advance.
You can use a recursive CTE to break the dates into ranges:
with recursive cte as (
select start_date as day_start,
(case when date(start_date) = date(end_date) then end_date else date(start_date) + interval 1 day end) as day_end,
end_date
from (select cast('2020-01-10 13:00:00.347' as datetime) as start_date,
cast('2020-01-12 02:00:00.347' as datetime) as end_date
) t
union all
select day_end,
(case when date(day_end) = date(end_date) then end_date else date(day_end) + interval 1 day end) as day_end,
end_date
from cte
where day_end <> end_date
)
select day_start, day_end,
timestampdiff(second, day_start, day_end) / (60 * 60)
from cte;
Here is a db<>fiddle.
EDIT:
In SQL Server, this looks like:
with cte as (
select start_date as day_start,
(case when cast(start_date as date) = cast(end_date as date) then end_date else dateadd(day, 1, cast(start_date as date)) end) as day_end,
end_date
from (select cast('2020-01-10 13:00:00.347' as datetime) as start_date,
cast('2020-01-12 02:00:00.347' as datetime) as end_date
) t
union all
select day_end,
(case when cast(day_end as date) = cast(end_date as date) then end_date else dateadd(day, 1, day_end) end) as day_end,
end_date
from cte
where day_end <> end_date
)
select day_start, day_end,
datediff(second, day_start, day_end) / (60.0 * 60)
from cte;
Here is this db<>fiddle.
As the OP has asked for the most effecient method, and rCTE's are known to perform poorly, a more efficient approach would be using a Tally.
This isn't anywhere near as easy to read for a beginner, however, does get the results you are after (with the exception of that 2020-01-12 has a value of 2.0 not 2.3, as your math is clearly wrong there):
CREATE TABLE dbo.YourTable (StartDate datetime,
EndDate datetime);
INSERT INTO dbo.YourTable (StartDate,
EndDate)
VALUES('2020-01-10T13:00:00.347','2020-01-12T02:00:00.347'),
('2020-01-14T17:24:41.243','2020-01-19T09:17:12.997');
GO
WITH N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP(SELECT MAX(DATEDIFF(DAY, StartDate, EndDate)+1) FROM dbo.YourTable)
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -1 AS I
FROM N N1, N N2, N N3), --1000 days enough?
Dates AS(
SELECT DATEADD(DAY, T.I,CONVERT(date,YT.StartDate)) AS [Date],
CASE WHEN T.I = 0 THEN YT.StartDate ELSE DATEADD(DAY, T.I,CONVERT(date,YT.StartDate)) END AS StartingDateTime,
CASE WHEN LEAD(T.I) OVER (PARTITION BY YT.StartDate ORDER BY T.I) IS NULL THEN YT.EndDate ELSE DATEADD(DAY, T.I+1,CONVERT(date,YT.StartDate)) END AS EndingDateTime
FROM Tally T
JOIN dbo.YourTable YT ON T.I <= DATEDIFF(DAY, YT.StartDate, YT.EndDate))
SELECT D.[Date],
(DATEDIFF(SECOND,D.StartingDateTime,D.EndingDateTime) * 1.0) / 60 / 60 AS [Hours]
FROM Dates D;
GO
DROP TABLE dbo.YourTable;
DB<>Fiddle
I am able to do conditional aggregation for a single day using below SQL but wondering how can I accomplish it within a single query for multiple days. I am trying to do a cartesian product between logs_20190715 and dates but not able to think through further to solve this. Any inputs would be appreciated.
--1
SELECT CAST('2018-11-19' AS TIMESTAMP ) AS time_id,
city_id,
COUNT( DISTINCT CASE WHEN DATE_TRUNC('DAY',logged_at) = CAST( '2018-11-19' AS TIMESTAMP ) THEN user_id END ) AS A,
COUNT( DISTINCT CASE WHEN logged_at >= CAST( '2018-11-19' AS TIMESTAMP )
AND logged_at < CAST( '2018-11-19' AS TIMESTAMP ) + interval '7' DAY
THEN user_id
END
) AS B,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-19' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-19' AS TIMESTAMP ) - interval '7' DAY
THEN user_id
END
) AS C,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-19' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-19' AS TIMESTAMP ) - interval '28' DAY
THEN user_id
END
) AS D,
'2018-11-19'
FROM logs_20190715
WHERE logged_at <= CAST('2018-11-19' AS TIMESTAMP) + interval '10' DAY
AND logged_at >= CAST('2018-11-19' AS TIMESTAMP) - interval '40' DAY
GROUP BY 1,2;
--2
SELECT CAST('2018-11-18' AS TIMESTAMP ) AS time_id,
city_id,
COUNT( DISTINCT CASE WHEN DATE_TRUNC('DAY',logged_at) = CAST( '2018-11-18' AS TIMESTAMP ) THEN user_id END ) AS A,
COUNT( DISTINCT CASE WHEN logged_at >= CAST( '2018-11-18' AS TIMESTAMP )
AND logged_at < CAST( '2018-11-18' AS TIMESTAMP ) + interval '7' DAY
THEN user_id
END
) AS B,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-18' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-18' AS TIMESTAMP ) - interval '7' DAY
THEN user_id
END
) AS C,
COUNT( DISTINCT CASE WHEN logged_at < CAST( '2018-11-18' AS TIMESTAMP )
AND logged_at >= CAST( '2018-11-18' AS TIMESTAMP ) - interval '28' DAY
THEN user_id
END
) AS D,
'2018-11-18'
FROM logs_20190715
WHERE logged_at <= CAST('2018-11-18' AS TIMESTAMP) + interval '10' DAY
AND logged_at >= CAST('2018-11-18' AS TIMESTAMP) - interval '40' DAY
GROUP BY 1,2;
How can I combine the above two queries into a single query and have the same results produced?( Have date dimension which has all dates in it. )