sum before joining two table - sql

CREATE TABLE Daily
([DATE] datetime, [sales] int)
;
INSERT INTO Daily
([DATE], [sales])
VALUES
('2012-01-01 00:00:00', 1),
('2012-01-02 00:00:00', 2),
('2012-01-03 00:00:00', 3),
('2012-01-04 00:00:00', 4),
('2012-01-05 00:00:00', 5),
('2012-01-06 00:00:00', 6),
('2012-01-06 00:00:00', 5),
('2012-01-07 00:00:00', 7),
('2012-01-08 00:00:00', 8),
('2012-01-09 00:00:00', 9),
('2012-01-10 00:00:00', 10),
('2012-01-11 00:00:00', 11),
('2012-01-12 00:00:00', 12),
('2012-01-13 00:00:00', 13),
('2012-01-14 00:00:00', 14),
('2012-01-15 00:00:00', 15),
('2012-01-16 00:00:00', 16)
;
CREATE TABLE Weekly
([Weekly] datetime)
;
INSERT INTO Weekly
([Weekly])
VALUES
('2012-01-07 00:00:00'),
('2012-01-14 00:00:00'),
('2012-01-21 00:00:00')
;
i want the final output
Sales
1/7/2012 33
1/14/2012 77
any help on this would be appreciated. thanks in advance

I would strongly reccommend against storing this in a table, if any of your daily data changes your weekly data will need to be changed to or it will be wrong, instead create a view as follows:
CREATE VIEW Weekly
AS
SELECT WeekEnd = DATEADD(WEEK, DATEDIFF(WEEK, 0, [DATE]) + 1, -2),
Sales = SUM(Sales)
FROM Daily
GROUP BY DATEADD(WEEK, DATEDIFF(WEEK, 0, [DATE]) + 1, -2);
You can use this in the same way you would the table you want create, but this will always be in sync with the daily data. If you want to change your week start/end day (i.e. monday-sunday) you can change the -2 in the DATEADD function to alter this.
Example on SQL Fiddle
(Based on the [] around column names I am guessing this is SQL-Server.)

Related

SQL window function with date condition not returning expected value

I want to find the maximum value before the current date but within 1 year of a value using a window function. My attempt is not giving me the correct value and not sure why?
[MaxPrevious] is the desired result
[MaxPrevious2] is the window function result with the wrong value.
I need to use a window function as the final query is more complex but the date condition part is not working.
Desired Output:
Full table data and query:
--DROP TABLE [dbDelete].[dbo].[tblData]
--CREATE TABLE [dbDelete].[dbo].[tblData]
--([Date] datetime, [Part] varchar(10), [Tolerance] float);
--INSERT INTO [dbDelete].[dbo].[tblData] ([Date], [Part], [Tolerance])
--VALUES
--('2012-01-19 00:00:00', 'X1', 6.8),
--('2011-12-15 00:00:00', 'X1', 6.7),
--('2011-10-25 00:00:00', 'X1', 7.8),
--('2010-05-06 00:00:00', 'X1', 8.3),
--('2010-04-13 00:00:00', 'X1', 7.2),
--('2010-01-21 00:00:00', 'X1', 4.7),
--('2009-12-28 00:00:00', 'X1', 6.9),
--('2009-01-01 00:00:00', 'X1', 7.8),
--('2008-11-16 00:00:00', 'X1', 7.4),
--('2008-11-08 00:00:00', 'X1', 7.9),
--('2012-01-19 00:00:00', 'X2', 3.8),
--('2011-12-15 00:00:00', 'X2', 3.7),
--('2011-10-25 00:00:00', 'X2', 4.8),
--('2010-05-06 00:00:00', 'X2', 5.3),
--('2010-04-13 00:00:00', 'X2', 4.2),
--('2010-01-21 00:00:00', 'X2', 1.7),
--('2009-12-28 00:00:00', 'X2', 3.9),
--('2009-01-01 00:00:00', 'X2', 4.8),
--('2008-11-16 00:00:00', 'X2', 4.4),
--('2008-11-08 00:00:00', 'X2', 4.9)
--;
select t1.*
-- Find max before current record but within 1 year
,(select top (1) t2.[Tolerance] from [dbDelete].[dbo].[tblData] t2
where t2.[Date] < t1.[Date]
and t2.[Date] >= dateadd(year, -1, t1.[Date])
and t2.[Part] = t1.[Part]
order by t2.[Tolerance] desc) as [MaxPrevious]
-- Find max before current record but within 1 year
,max(case when t1.[Date] >= dateadd(year, -1, t1.[Date]) then t1.[Tolerance] else 0 end) over
(partition by t1.[Part]
order by t1.[Date]
rows between unbounded preceding and 1 preceding
) as [MaxPrevious2]
from [dbDelete].[dbo].[tblData] t1
order by t1.[Part], t1.[Date] desc
--DROP TABLE [dbDelete].[dbo].[tblData]
--CREATE TABLE [dbDelete].[dbo].[tblData]
--([Date] datetime, [Part] varchar(10), [Tolerance] float);
--INSERT INTO [dbDelete].[dbo].[tblData] ([Date], [Part], [Tolerance])
--VALUES
--('2012-01-19 00:00:00', 'X1', 6.8),
--('2011-12-15 00:00:00', 'X1', 6.7),
--('2011-10-25 00:00:00', 'X1', 7.8),
--('2010-05-06 00:00:00', 'X1', 8.3),
--('2010-04-13 00:00:00', 'X1', 7.2),
--('2010-01-21 00:00:00', 'X1', 4.7),
--('2009-12-28 00:00:00', 'X1', 6.9),
--('2009-01-01 00:00:00', 'X1', 7.8),
--('2008-11-16 00:00:00', 'X1', 7.4),
--('2008-11-08 00:00:00', 'X1', 7.9),
--('2012-01-19 00:00:00', 'X2', 3.8),
--('2011-12-15 00:00:00', 'X2', 3.7),
--('2011-10-25 00:00:00', 'X2', 4.8),
--('2010-05-06 00:00:00', 'X2', 5.3),
--('2010-04-13 00:00:00', 'X2', 4.2),
--('2010-01-21 00:00:00', 'X2', 1.7),
--('2009-12-28 00:00:00', 'X2', 3.9),
--('2009-01-01 00:00:00', 'X2', 4.8),
--('2008-11-16 00:00:00', 'X2', 4.4),
--('2008-11-08 00:00:00', 'X2', 4.9)
--;
;with cte as (
select DATEADD(year, -1, [Date]) as PrevDate, * from [dbDelete].[dbo].[tblData]
)
select b.[Date], b.Part, b.Tolerance, max(a.Tolerance) as MaxPrevious from cte a
right join cte b
on a.Part = b.Part and a.[Date] >= b.[PrevDate] and a.[Date] < b.[Date]
group by b.[Date], b.Part, b.Tolerance
order by b.[Part], b.[Date] desc
I am not sure if this is doable by using just a window functions.

case statement in Where clause issue

I have quite a Query to write.
I have a File called FG500 which has a field called UTXT(Certification number). The key(Not Unique) for the file is Model Number(MODL).The file can have multiple rows with the model numbers. It has also create date(CRDT), create time(CRTM), change date(CHDT), Change time(CHTM)
I need to pull latest EU Certification Number(UTXT) based on Create Date& Time / Change Date& Time. That is, if the change date/Time is not 0, then pull the latest of that.. Else check the create date and pull the latest record from that.
I will have to use a case but not sure how. Any help is highly appreciated.
Sample Data:
Model Number L12G4AGAEA L12G4AGAEA
UTXT (Blank) E4*2002/24*0458
Create date 07/30/12 03/16/12
Create Time 08:32:22 08:32:22
Change Date 07/31/12 03/17/12
Change Time 08:32:22 08:32:22
Expected result would be a Blank as the highest change date/Time is the first record. Had the change date been blank, then I would go ahead with the create date/Time
Perhaps this will assist. I built the table from your first set of tabular data, and the query demonstrates use of ROW_NUMBER() to arrive at the "latest" row for each modelnumber. I have used MS SQL Server from this example as the query syntax for DB2 is similar.
This might not be a complete answer, and as I don't have access to DB2 I'm not able to demonstrate how to combine the date and time columns which really should be done.
SQL Fiddle
MS SQL Server 2014 Schema Setup:
CREATE TABLE Table1
([ModelNumber] varchar(10), [UTXT] varchar(15), [CREATEdate] datetime, [CREATEtime] varchar(8), [CHANGEdate] datetime, [CHANGEtime] varchar(8))
;
INSERT INTO Table1
([ModelNumber], [UTXT], [CREATEdate], [CREATEtime], [CHANGEdate], [CHANGEtime])
VALUES
('L12G4AGAEA', NULL, '2012-07-30 00:00:00', '08:32:22', '2012-07-31 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-03-16 00:00:00', '08:32:22', '2012-03-17 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-07-11 00:00:00', '08:32:22', '2012-07-12 00:00:00', '08:32:22'),
('L12G4AGAEA', NULL, '2012-07-25 00:00:00', '08:32:22', '2012-07-26 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-07-11 00:00:00', '08:32:22', '2012-07-12 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-05-22 00:00:00', '08:32:22', '2012-05-23 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-08-03 00:00:00', '08:32:22', '2012-08-03 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-05-22 00:00:00', '08:32:22', '2012-05-23 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-05-15 00:00:00', '08:32:22', '2012-05-16 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-07-20 00:00:00', '08:32:22', '2012-07-21 00:00:00', '08:32:22'),
('L12G4AGAEA', 'E4*2002/24*0458', '2012-05-11 00:00:00', '08:32:22', '2012-05-14 00:00:00', '08:32:22')
;
Query 1:
select
*
from (
select
*
, row_number() over(partition by ModelNumber
order by coalesce(CHANGEdate,CREATEdate) DESC) rn
from table1
) d
where rn = 1
Results:
| ModelNumber | UTXT | CREATEdate | CREATEtime | CHANGEdate | CHANGEtime | rn |
|-------------|-----------------|----------------------|------------|----------------------|------------|----|
| L12G4AGAEA | E4*2002/24*0458 | 2012-08-03T00:00:00Z | 08:32:22 | 2012-08-03T00:00:00Z | 08:32:22 | 1 |

SQL Server 2014 - Use previous value when date not present

I asked a similar question yesterday but I was not very good in my description of what I wanted. This will be far clearer.
Lead/Lag is not getting me what I need. Its close, but not enough.
Using SQL Server 2014 for client, actual server built on SQL 2012.
Here is my code:
Creating Team Table
CREATE TABLE ##TeamTable
([UserID] varchar(50), [CurrentTeam] varchar(5), [ChangeDate] datetime)
;
INSERT INTO ##TeamTable
([UserID], [CurrentTeam], [ChangeDate])
VALUES
('User1', 'Team1', '6/1/2016'),
('User1', 'Team2', '9/1/2016'),
('User1', 'Team3', '12/1/2016'),
('User2', 'Team1', '4/1/2016'),
('User2', 'Team2', '10/1/2016'),
('User2', 'Team3', '11/1/2016');
Now to create data table I need to join to
CREATE TABLE ##DataTable
([UserID] varchar(50), Month_sk datetime, Media varchar(50), NCO int)
INSERT INTO ##DataTable
([UserID] , Month_sk , Media , NCO )
VALUES
('User1', '2016-06-01 00:00:00', 'Fax', 100),
('User1', '2016-06-01 00:00:00', 'Voice', 120),
('User1', '2016-07-01 00:00:00', 'Voice', 90),
('User1', '2016-07-01 00:00:00', 'Email', 100),
('User1', '2016-08-01 00:00:00', 'Voice', 150),
('User1', '2016-08-01 00:00:00', 'Email', 100),
('User1', '2016-09-01 00:00:00', 'Voice', 100),
('User1', '2016-09-01 00:00:00', 'Email', 120),
('User1', '2016-10-01 00:00:00', 'Voice', 90),
('User1', '2016-10-01 00:00:00', 'Email', 100),
('User1', '2016-11-01 00:00:00', 'Voice', 150),
('User1', '2016-11-01 00:00:00', 'Email', 100),
('User1', '2016-12-01 00:00:00', 'Voice', 150),
('User1', '2016-12-01 00:00:00', 'Email', 100),
('User2', '2016-04-01 00:00:00', 'Fax', 100),
('User2', '2016-04-01 00:00:00', 'Voice', 120),
('User2', '2016-05-01 00:00:00', 'Fax', 100),
('User2', '2016-05-01 00:00:00', 'Voice', 120),
('User2', '2016-06-01 00:00:00', 'Fax', 100),
('User2', '2016-06-01 00:00:00', 'Voice', 120),
('User2', '2016-07-01 00:00:00', 'Voice', 90),
('User2', '2016-07-01 00:00:00', 'Email', 100),
('User2', '2016-08-01 00:00:00', 'Voice', 150),
('User2', '2016-08-01 00:00:00', 'Email', 100),
('User2', '2016-09-01 00:00:00', 'Voice', 100),
('User2', '2016-09-01 00:00:00', 'Email', 120),
('User2', '2016-10-01 00:00:00', 'Voice', 90),
('User2', '2016-10-01 00:00:00', 'Email', 100),
('User2', '2016-11-01 00:00:00', 'Voice', 150),
('User2', '2016-11-01 00:00:00', 'Email', 100),
('User2', '2016-12-01 00:00:00', 'Voice', 150),
('User2', '2016-12-01 00:00:00', 'Email', 100);
Here is a basic Select to show whats going on:
SELECT b.UserID
,b.Media
,b.NCO
,Month_sk
,CurrentTeam
FROM ##DataTable b
LEFT OUTER JOIN ##TeamTable a on b.UserID = a.UserID and b.Month_sk = a.ChangeDate
order by UserID, Month_sk, media
This gives me a result set that looks like this:
What I need is for where I have nulls, that it would be pulling in the previous team name that's not null. So in User1 case, those 4 nulls for months of July and August would say Team1 since that was the team he was last on. Same for the nulls after Team2, those should say Team2.
Lead/Lag is close or I'm not using it right. Hopefully with all this code, this makes someone's jobs way easier.
UPDATE:
Lag/Lead gives same results. Still need the nulls to fill in
SELECT b.UserID
,b.Media
,b.NCO
,Month_sk
,CurrentTeam
,LAG(CurrentTeam,1, currentteam) OVER(PARTITION BY a.userid, changedate ORDER BY ChangeDate) as Lag
FROM ##DataTable b
LEFT OUTER JOIN ##TeamTable a on b.UserID = a.UserID and b.Month_sk = a.ChangeDate
order by UserID, Month_sk, media
(Moving update notes to end)
I think the easiest solution (conceptually) is to join against all months up to month_sk and then filter to get only the last match. This "feels" potentially inefficient, so you'd want to test it with realistic data volume and if there's a problem then look for something better. (But "something better" may involve changes to the physical data model...)
So:
select userid, media, nco, month_sk, currentteam
from (SELECT b.UserID
, b.Media
, b.NCO
, Month_sk
, CurrentTeam
, rank() over(partition by b.userID
order by a.changeDate desc) n
FROM ##DataTable b
INNER JOIN ##TeamTable a
on b.UserID = a.UserID
and b.Month_sk >= a.ChangeDate
) x
where n = 1
order by UserID, Month_sk, media
Note that in previous versions I used row_number() over() instead of rank() over()... and you can do that, but if you do then you have to include in the partitioning key any data from the b table that could cause a duplication of a row from the a table during the join. Using rank ensures that all such duplicates share their rank as they ought to.
UPDATE - After I initially wrote this, I deleted it because I thought I'd misread your question; but as I was writing a replacement realized I may have had it right in the first place. So here it is, with a caveat:
This assumes that the only reason you get the NULL value is the outer join. If ever the "right hand" table has a row and just a value for a column therein is NULL, then getting the previous value for that column would require further work with subqueries or analytic funcitons. But even then lead/lag may not work, since they are position based. (I think something with LAST_VALUE might be more suitable, but will leave the details of that unless it's needed.)
UPDATE 2 - based on your description of the data model in below comments, I'm changing the query to show an inner join as it sounds like that will work (once you broaden the join criteria) and should be more efficient.
UPDATE 3 - I did misread your sample data and got the partitioning expression for calculating n wrong. Should be fixed assuming the values from the b table are unique. If not it's still fixable but requires more trickery...
You can do this with an APPLY and a sub query like this.
SELECT
userid,
media,
nco,
month_sk,
currentteam
FROM
##DataTable td
OUTER APPLY (
SELECT TOP (1)
CurrentTeam,
ChangeDate
FROM
##TeamTable tt
WHERE
tt.UserID = td.UserID
and tt.ChangeDate <= td.Month_sk
ORDER BY
tt.ChangeDate desc
) dataTableWithTeam
ORDER BY
td.UserID,
td.Month_sk,
td.media
In this version, I first identify the appropriate "linking" month in the CTE, and then use that as a lookup in the final join. (It got much easier once I realized Media and NCO played no real part in the join.)
WITH cteDateLookup
as (
-- Get the ChangeDate for this User/Month
SELECT
b.UserID
,b.Month_sk
,max(a.ChangeDate) ChangeDate
from ##DataTable b
left outer join ##TeamTable a
on b.UserID = a.UserID
and b.Month_sk >= a.ChangeDate
group by
b.UserID
,b.Month_sk
)
-- Use the cte as a "lookup" for the appropriate date
SELECT
b.UserID
,b.Media
,b.NCO
,b.Month_sk
,a.CurrentTeam
from ##DataTable b
left outer join cteDateLookup cte
on cte.UserId = b.UserId
and b.Month_sk = cte.Month_sk
left outer join ##TeamTable a
on a.UserId = cte.UserId
and a.ChangeDate = cte.ChangeDate
order by
b.UserID
,b.Month_sk
,b.media

Select with formula that includes previous value

DECLARE #sales TABLE
(
code VARCHAR(10) NOT NULL,
date1 DATE NOT NULL,
sales NUMERIC(10, 2) NOT NULL,
profits NUMERIC(10, 2) NOT NULL
);
INSERT INTO #sales(Code, Date1, sales, profits)
VALUES ('q', '20140708', 0.51,21),
('q', '20140712', 0.3,33),
('q', '20140710', 0.5,12),
('q', '20140711', 0.6,43),
('q', '20140712', 0.2,66),
('q', '20140713', 0.7,21),
('q', '20140714', 0.24,76),
('q', '20140714', 0.24,12),
('x', '20140709', 0.25,0),
('x', '20140710', 0.16,0),
('x', '20140711', 0.66,31),
('x', '20140712', 0.23,12),
('x', '20140712', 0.35,11),
('x', '20140714', 0.57,1),
('c', '20140712', 0.97,2),
('c', '20140714', 0.71,3);
SELECT code,
CONVERT(VARCHAR, date1, 104) AS SPH_DATE_FORMATO,
Cast(Sum(sales)
OVER (
ORDER BY date1) AS NUMERIC (18, 2)) AS SPH_CLOSE
FROM #sales
WHERE date1 > Dateadd(month, -21, Getdate())
AND code = 'q'
This select gives me the accmulated sales ordered by date for the 'g' code and this is fine.
But now I would need an additional column that calculates:
(1+ previous day sales)*(1+ today sales) -1
also ordered by date for the 'g' code
Can anyone help with this, please?
you can do like this using CTE, just change your select query like this
;with Sales as
(
SELECT code, convert(varchar, date1, 104) AS SPH_DATE_FORMATO, cast(SUM(sales) OVER (ORDER BY date1) as numeric (18,2)) AS SPH_CLOSE,ROW_NUMBER() OVER(ORDER BY Date1 ASC) as rowid
FROM #sales
where date1 >DATEADD(month, -21, GETDATE()) and code='q')
select S1.code,S1.SPH_DATE_FORMATO,S1.SPH_CLOSE
,S2.SPH_close as Sales_Last_Day
from Sales S1 left outer join Sales S2 on S1.rowid -1 = S2.rowid

Calculate working hours between 2 dates in PostgreSQL

I am developing an algorithm with Postgres (PL/pgSQL) and I need to calculate the number of working hours between 2 timestamps, taking into account that weekends are not working and the rest of the days are counted only from 8am to 15pm.
Examples:
From Dec 3rd at 14pm to Dec 4th at 9am should count 2 hours:
3rd = 1, 4th = 1
From Dec 3rd at 15pm to Dec 7th at 8am should count 8 hours:
3rd = 0, 4th = 8, 5th = 0, 6th = 0, 7th = 0
It would be great to consider hour fractions as well.
According to your question working hours are: Mo–Fr, 08:00–15:00.
Rounded results
For just two given timestamps
Operating on units of 1 hour. Fractions are ignored, therefore not precise but simple:
SELECT count(*) AS work_hours
FROM generate_series (timestamp '2013-06-24 13:30'
, timestamp '2013-06-24 15:29' - interval '1h'
, interval '1h') h
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= '08:00'
AND h::time <= '14:00';
The function generate_series() generates one row if the end is greater than the start and another row for every full given interval (1 hour). This wold count every hour entered into. To ignore fractional hours, subtract 1 hour from the end. And don't count hours starting before 14:00.
Use the field pattern ISODOW instead of DOW for EXTRACT() to simplify expressions. Returns 7 instead of 0 for Sundays.
A simple (and very cheap) cast to time makes it easy to identify qualifying hours.
Fractions of an hour are ignored, even if fractions at begin and end of the interval would add up to an hour or more.
For a whole table
CREATE TABLE t (t_id int PRIMARY KEY, t_start timestamp, t_end timestamp);
INSERT INTO t VALUES
(1, '2009-12-03 14:00', '2009-12-04 09:00')
, (2, '2009-12-03 15:00', '2009-12-07 08:00') -- examples in question
, (3, '2013-06-24 07:00', '2013-06-24 12:00')
, (4, '2013-06-24 12:00', '2013-06-24 23:00')
, (5, '2013-06-23 13:00', '2013-06-25 11:00')
, (6, '2013-06-23 14:01', '2013-06-24 08:59') -- max. fractions at begin and end
;
Query:
SELECT t_id, count(*) AS work_hours
FROM (
SELECT t_id, generate_series (t_start, t_end - interval '1h', interval '1h') AS h
FROM t
) sub
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= '08:00'
AND h::time <= '14:00'
GROUP BY 1
ORDER BY 1;
db<>fiddle here
Old sqlfiddle
More precision
To get more precision you can use smaller time units. 5-minute slices for instance:
SELECT t_id, count(*) * interval '5 min' AS work_interval
FROM (
SELECT t_id, generate_series (t_start, t_end - interval '5 min', interval '5 min') AS h
FROM t
) sub
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= '08:00'
AND h::time <= '14:55' -- 15.00 - interval '5 min'
GROUP BY 1
ORDER BY 1;
The smaller the unit the higher the cost.
Cleaner with LATERAL in Postgres 9.3+
In combination with the new LATERAL feature in Postgres 9.3, the above query can then be written as:
1-hour precision:
SELECT t.t_id, h.work_hours
FROM t
LEFT JOIN LATERAL (
SELECT count(*) AS work_hours
FROM generate_series (t.t_start, t.t_end - interval '1h', interval '1h') h
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= '08:00'
AND h::time <= '14:00'
) h ON TRUE
ORDER BY 1;
5-minute precision:
SELECT t.t_id, h.work_interval
FROM t
LEFT JOIN LATERAL (
SELECT count(*) * interval '5 min' AS work_interval
FROM generate_series (t.t_start, t.t_end - interval '5 min', interval '5 min') h
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= '08:00'
AND h::time <= '14:55'
) h ON TRUE
ORDER BY 1;
This has the additional advantage that intervals containing zero working hours are not excluded from the result like in the above versions.
More about LATERAL:
Find most common elements in array with a group by
Insert multiple rows in one table based on number in another table
Exact results
Postgres 8.4+
Or you deal with start and end of the time frame separately to get exact results to the microsecond. Makes the query more complex, but cheaper and exact:
WITH var AS (SELECT '08:00'::time AS v_start
, '15:00'::time AS v_end)
SELECT t_id
, COALESCE(h.h, '0') -- add / subtract fractions
- CASE WHEN EXTRACT(ISODOW FROM t_start) < 6
AND t_start::time > v_start
AND t_start::time < v_end
THEN t_start - date_trunc('hour', t_start)
ELSE '0'::interval END
+ CASE WHEN EXTRACT(ISODOW FROM t_end) < 6
AND t_end::time > v_start
AND t_end::time < v_end
THEN t_end - date_trunc('hour', t_end)
ELSE '0'::interval END AS work_interval
FROM t CROSS JOIN var
LEFT JOIN ( -- count full hours, similar to above solutions
SELECT t_id, count(*)::int * interval '1h' AS h
FROM (
SELECT t_id, v_start, v_end
, generate_series (date_trunc('hour', t_start)
, date_trunc('hour', t_end) - interval '1h'
, interval '1h') AS h
FROM t, var
) sub
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= v_start
AND h::time <= v_end - interval '1h'
GROUP BY 1
) h USING (t_id)
ORDER BY 1;
db<>fiddle here
Old sqlfiddle
Postgres 9.2+ with tsrange
The new range types offer a more elegant solution for exact results in combination with the intersection operator *:
Simple function for time ranges spanning only one day:
CREATE OR REPLACE FUNCTION f_worktime_1day(_start timestamp, _end timestamp)
RETURNS interval
LANGUAGE sql IMMUTABLE AS
$func$ -- _start & _end within one calendar day! - you may want to check ...
SELECT CASE WHEN extract(ISODOW from _start) < 6 THEN (
SELECT COALESCE(upper(h) - lower(h), '0')
FROM (
SELECT tsrange '[2000-1-1 08:00, 2000-1-1 15:00)' -- hours hard coded
* tsrange( '2000-1-1'::date + _start::time
, '2000-1-1'::date + _end::time ) AS h
) sub
) ELSE '0' END
$func$;
If your ranges never span multiple days, that's all you need.
Else, use this wrapper function to deal with any interval:
CREATE OR REPLACE FUNCTION f_worktime(_start timestamp
, _end timestamp
, OUT work_time interval)
LANGUAGE plpgsql IMMUTABLE AS
$func$
BEGIN
CASE _end::date - _start::date -- spanning how many days?
WHEN 0 THEN -- all in one calendar day
work_time := f_worktime_1day(_start, _end);
WHEN 1 THEN -- wrap around midnight once
work_time := f_worktime_1day(_start, NULL)
+ f_worktime_1day(_end::date, _end);
ELSE -- multiple days
work_time := f_worktime_1day(_start, NULL)
+ f_worktime_1day(_end::date, _end)
+ (SELECT count(*) * interval '7:00' -- workday hard coded!
FROM generate_series(_start::date + 1
, _end::date - 1, '1 day') AS t
WHERE extract(ISODOW from t) < 6);
END CASE;
END
$func$;
Call:
SELECT t_id, f_worktime(t_start, t_end) AS worktime
FROM t
ORDER BY 1;
db<>fiddle here
Old sqlfiddle
How about this: create a small table with 24*7 rows, one row for each hour in a week.
CREATE TABLE hours (
hour timestamp not null,
is_working boolean not null
);
INSERT INTO hours (hour, is_working) VALUES
('2009-11-2 00:00:00', false),
('2009-11-2 01:00:00', false),
. . .
('2009-11-2 08:00:00', true),
. . .
('2009-11-2 15:00:00', true),
('2009-11-2 16:00:00', false),
. . .
('2009-11-2 23:00:00', false);
Likewise add 24 rows for each of the other days. It doesn't matter what year or month you give, as you'll see in a moment. You just need to represent all seven days of the week.
SELECT t.id, t.start, t.end, SUM(CASE WHEN h.is_working THEN 1 ELSE 0 END) AS hours_worked
FROM mytable t JOIN hours h
ON (EXTRACT(DOW FROM TIMESTAMP h.hour) BETWEEN EXTRACT(DOW FROM TIMESTAMP t.start)
AND EXTRACT(DOW FROM TIMESTAMP t.end))
AND (EXTRACT(DOW FROM TIMESTAMP h.hour) > EXTRACT(DOW FROM TIMESTAMP t.start)
OR EXTRACT(HOUR FROM TIMESTAMP h.hour) >= EXTRACT(HOUR FROM TIMESTAMP t.start))
AND (EXTRACT(DOW FROM TIMESTAMP h.hour) < EXTRACT(DOW FROM TIMESTAMP t.end)
OR EXTRACT(HOUR FROM TIMESTAMP h.hour) <= EXTRACT(HOUR FROM TIMESTAMP t.end))
GROUP BY t.id, t.start, t.end;
This following functions will take the input for the
working start time of the day
working end time of the day
start time
end time
-- helper function
CREATE OR REPLACE FUNCTION get_working_time_in_a_day(sdt TIMESTAMP, edt TIMESTAMP, swt TIME, ewt TIME) RETURNS INT AS
$$
DECLARE
sd TIMESTAMP; ed TIMESTAMP; swdt TIMESTAMP; ewdt TIMESTAMP; seconds INT;
BEGIN
swdt = sdt::DATE || ' ' || swt; -- work start datetime for a day
ewdt = sdt::DATE || ' ' || ewt; -- work end datetime for a day
IF (sdt < swdt AND edt <= swdt) -- case 1 and 2
THEN
seconds = 0;
END IF;
IF (sdt < swdt AND edt > swdt AND edt <= ewdt) -- case 3 and 4
THEN
seconds = EXTRACT(EPOCH FROM (edt - swdt));
END IF;
IF (sdt < swdt AND edt > swdt AND edt > ewdt) -- case 5
THEN
seconds = EXTRACT(EPOCH FROM (ewdt - swdt));
END IF;
IF (sdt = swdt AND edt > swdt AND edt <= ewdt) -- case 6 and 7
THEN
seconds = EXTRACT(EPOCH FROM (edt - sdt));
END IF;
IF (sdt = swdt AND edt > ewdt) -- case 8
THEN
seconds = EXTRACT(EPOCH FROM (ewdt - sdt));
END IF;
IF (sdt > swdt AND edt <= ewdt) -- case 9 and 10
THEN
seconds = EXTRACT(EPOCH FROM (edt - sdt));
END IF;
IF (sdt > swdt AND sdt < ewdt AND edt > ewdt) -- case 11
THEN
seconds = EXTRACT(EPOCH FROM (ewdt - sdt));
END IF;
IF (sdt >= ewdt AND edt > ewdt) -- case 12 and 13
THEN
seconds = 0;
END IF;
RETURN seconds;
END;
$$
LANGUAGE plpgsql;
-- Get work time difference
CREATE OR REPLACE FUNCTION get_working_time(sdt TIMESTAMP, edt TIMESTAMP, swt TIME, ewt TIME) RETURNS INT AS
$$
DECLARE
seconds INT = 0;
strst VARCHAR(9) = ' 00:00:00';
stret VARCHAR(9) = ' 23:59:59';
tend TIMESTAMP; tempEdt TIMESTAMP;
x int;
BEGIN
<<test>>
WHILE sdt <= edt LOOP
tend = sdt::DATE || stret; -- get the false end datetime for start time
IF edt >= tend
THEN
tempEdt = tend;
ELSE
tempEdt = edt;
END IF;
-- skip saturday and sunday
x = EXTRACT(DOW FROM sdt);
if (x > 0 AND x < 6)
THEN
seconds = seconds + get_working_time_in_a_day(sdt, tempEdt, swt, ewt);
ELSE
-- RAISE NOTICE 'MISSED A DAY';
END IF;
sdt = (sdt + (INTERVAL '1 DAY'))::DATE || strst;
END LOOP test;
--RAISE NOTICE 'diff in minutes = %', (seconds / 60);
RETURN seconds;
END;
$$
LANGUAGE plpgsql;
-- Table Definition
DROP TABLE IF EXISTS test_working_time;
CREATE TABLE test_working_time(
pk SERIAL PRIMARY KEY,
start_datetime TIMESTAMP,
end_datetime TIMESTAMP,
start_work_time TIME,
end_work_time TIME
);
-- Test data insertion
INSERT INTO test_working_time VALUES
(1, '2015-11-03 01:00:00', '2015-11-03 07:00:00', '08:00:00', '22:00:00'),
(2, '2015-11-03 01:00:00', '2015-11-04 07:00:00', '08:00:00', '22:00:00'),
(3, '2015-11-03 01:00:00', '2015-11-05 07:00:00', '08:00:00', '22:00:00'),
(4, '2015-11-03 01:00:00', '2015-11-06 07:00:00', '08:00:00', '22:00:00'),
(5, '2015-11-03 01:00:00', '2015-11-07 07:00:00', '08:00:00', '22:00:00'),
(6, '2015-11-03 01:00:00', '2015-11-03 08:00:00', '08:00:00', '22:00:00'),
(7, '2015-11-03 01:00:00', '2015-11-04 08:00:00', '08:00:00', '22:00:00'),
(8, '2015-11-03 01:00:00', '2015-11-05 08:00:00', '08:00:00', '22:00:00'),
(9, '2015-11-03 01:00:00', '2015-11-06 08:00:00', '08:00:00', '22:00:00'),
(10, '2015-11-03 01:00:00', '2015-11-07 08:00:00', '08:00:00', '22:00:00'),
(11, '2015-11-03 01:00:00', '2015-11-03 11:00:00', '08:00:00', '22:00:00'),
(12, '2015-11-03 01:00:00', '2015-11-04 11:00:00', '08:00:00', '22:00:00'),
(13, '2015-11-03 01:00:00', '2015-11-05 11:00:00', '08:00:00', '22:00:00'),
(14, '2015-11-03 01:00:00', '2015-11-06 11:00:00', '08:00:00', '22:00:00'),
(15, '2015-11-03 01:00:00', '2015-11-07 11:00:00', '08:00:00', '22:00:00'),
(16, '2015-11-03 01:00:00', '2015-11-03 22:00:00', '08:00:00', '22:00:00'),
(17, '2015-11-03 01:00:00', '2015-11-04 22:00:00', '08:00:00', '22:00:00'),
(18, '2015-11-03 01:00:00', '2015-11-05 22:00:00', '08:00:00', '22:00:00'),
(19, '2015-11-03 01:00:00', '2015-11-06 22:00:00', '08:00:00', '22:00:00'),
(20, '2015-11-03 01:00:00', '2015-11-07 22:00:00', '08:00:00', '22:00:00'),
(21, '2015-11-03 01:00:00', '2015-11-03 23:00:00', '08:00:00', '22:00:00'),
(22, '2015-11-03 01:00:00', '2015-11-04 23:00:00', '08:00:00', '22:00:00'),
(23, '2015-11-03 01:00:00', '2015-11-05 23:00:00', '08:00:00', '22:00:00'),
(24, '2015-11-03 01:00:00', '2015-11-06 23:00:00', '08:00:00', '22:00:00'),
(25, '2015-11-03 01:00:00', '2015-11-07 23:00:00', '08:00:00', '22:00:00'),
(26, '2015-11-03 08:00:00', '2015-11-03 11:00:00', '08:00:00', '22:00:00'),
(27, '2015-11-03 08:00:00', '2015-11-04 11:00:00', '08:00:00', '22:00:00'),
(28, '2015-11-03 08:00:00', '2015-11-05 11:00:00', '08:00:00', '22:00:00'),
(29, '2015-11-03 08:00:00', '2015-11-06 11:00:00', '08:00:00', '22:00:00'),
(30, '2015-11-03 08:00:00', '2015-11-07 11:00:00', '08:00:00', '22:00:00'),
(31, '2015-11-03 08:00:00', '2015-11-03 22:00:00', '08:00:00', '22:00:00'),
(32, '2015-11-03 08:00:00', '2015-11-04 22:00:00', '08:00:00', '22:00:00'),
(33, '2015-11-03 08:00:00', '2015-11-05 22:00:00', '08:00:00', '22:00:00'),
(34, '2015-11-03 08:00:00', '2015-11-06 22:00:00', '08:00:00', '22:00:00'),
(35, '2015-11-03 08:00:00', '2015-11-07 22:00:00', '08:00:00', '22:00:00'),
(36, '2015-11-03 08:00:00', '2015-11-03 23:00:00', '08:00:00', '22:00:00'),
(37, '2015-11-03 08:00:00', '2015-11-04 23:00:00', '08:00:00', '22:00:00'),
(38, '2015-11-03 08:00:00', '2015-11-05 23:00:00', '08:00:00', '22:00:00'),
(39, '2015-11-03 08:00:00', '2015-11-06 23:00:00', '08:00:00', '22:00:00'),
(40, '2015-11-03 08:00:00', '2015-11-07 23:00:00', '08:00:00', '22:00:00'),
(41, '2015-11-03 12:00:00', '2015-11-03 18:00:00', '08:00:00', '22:00:00'),
(42, '2015-11-03 12:00:00', '2015-11-04 18:00:00', '08:00:00', '22:00:00'),
(43, '2015-11-03 12:00:00', '2015-11-05 18:00:00', '08:00:00', '22:00:00'),
(44, '2015-11-03 12:00:00', '2015-11-06 18:00:00', '08:00:00', '22:00:00'),
(45, '2015-11-03 12:00:00', '2015-11-07 18:00:00', '08:00:00', '22:00:00'),
(46, '2015-11-03 12:00:00', '2015-11-03 22:00:00', '08:00:00', '22:00:00'),
(47, '2015-11-03 12:00:00', '2015-11-04 22:00:00', '08:00:00', '22:00:00'),
(48, '2015-11-03 12:00:00', '2015-11-05 22:00:00', '08:00:00', '22:00:00'),
(49, '2015-11-03 12:00:00', '2015-11-06 22:00:00', '08:00:00', '22:00:00'),
(50, '2015-11-03 12:00:00', '2015-11-07 22:00:00', '08:00:00', '22:00:00'),
(51, '2015-11-03 12:00:00', '2015-11-03 23:00:00', '08:00:00', '22:00:00'),
(52, '2015-11-03 12:00:00', '2015-11-04 23:00:00', '08:00:00', '22:00:00'),
(53, '2015-11-03 12:00:00', '2015-11-05 23:00:00', '08:00:00', '22:00:00'),
(54, '2015-11-03 12:00:00', '2015-11-06 23:00:00', '08:00:00', '22:00:00'),
(55, '2015-11-03 12:00:00', '2015-11-07 23:00:00', '08:00:00', '22:00:00'),
(56, '2015-11-03 22:00:00', '2015-11-03 23:00:00', '08:00:00', '22:00:00'),
(57, '2015-11-03 22:00:00', '2015-11-04 23:00:00', '08:00:00', '22:00:00'),
(58, '2015-11-03 22:00:00', '2015-11-05 23:00:00', '08:00:00', '22:00:00'),
(59, '2015-11-03 22:00:00', '2015-11-06 23:00:00', '08:00:00', '22:00:00'),
(60, '2015-11-03 22:00:00', '2015-11-07 23:00:00', '08:00:00', '22:00:00'),
(61, '2015-11-03 22:30:00', '2015-11-03 23:30:00', '08:00:00', '22:00:00'),
(62, '2015-11-03 22:30:00', '2015-11-04 23:30:00', '08:00:00', '22:00:00'),
(63, '2015-11-03 22:30:00', '2015-11-05 23:30:00', '08:00:00', '22:00:00'),
(64, '2015-11-03 22:30:00', '2015-11-06 23:30:00', '08:00:00', '22:00:00'),
(65, '2015-11-03 22:30:00', '2015-11-07 23:30:00', '08:00:00', '22:00:00');
-- select query to get work time difference
SELECT
start_datetime,
end_datetime,
start_work_time,
end_work_time,
get_working_time(start_datetime, end_datetime, start_work_time, end_work_time) AS diff_in_minutes
FROM
test_working_time;
This will give the difference of only the work hours in seconds between the start and end datetime