How to eliminate observations between two tables based on a formula? - sql

I have two tables:
First table has name, date, time and intraday price variables. It means there is an intraday price for each name in a specific date and time.
Second table has name, date and daily price and the daily price is intraday price aggregation for each name and date.
I try to write a program which performs the procedure below:
It can find same observations by name and date in two tables and then:
If first and last intraday price is out of 0.962 and 1.0398 times of daily price in last day; then delete all data related to that specific name and date in table 1.
The statement is:
IF first AND last (intraday price for specific name & date) NOT IN [0.962*(daily price of yesterday), 1.0398*(daily price of yesterday)] THEN DELETE.
For instance, consider two tables which are below:
data WORK.TABLE1;
infile datalines dsd truncover;
input name:$3. date:DATE9. time:TIME8. intraday_price:32.;
format date DATE9. time TIME8.;
label name="name" date="date" time="time" intraday_price="intraday price";
datalines4;
A,07MAY2008,11:32:41,3
A,07MAY2008,12:32:41,2
A,07MAY2008,13:32:41,1
A,08MAY2008,11:32:41,3.95
A,08MAY2008,12:32:41,3
A,08MAY2008,13:32:41,6
A,08MAY2008,14:32:41,4.01
B,07MAY2008,11:32:41,3.1
B,07MAY2008,12:32:41,1
B,07MAY2008,13:32:41,4
B,07MAY2008,14:32:41,2.9
B,08MAY2008,11:32:41,6
B,08MAY2008,12:32:41,1
B,09MAY2008,11:32:41,5
B,09MAY2008,12:32:41,7
C,07MAY2008,11:32:41,3
C,07MAY2008,12:32:41,2
C,08MAY2008,11:32:41,6.1
C,08MAY2008,12:32:41,3
C,08MAY2008,13:32:41,2
C,09MAY2008,11:32:41,8
C,09MAY2008,12:32:41,2
C,09MAY2008,13:32:41,3
C,09MAY2008,14:32:41,2
;;;;
And the table 2 is:
data WORK.TABLE2;
infile datalines dsd truncover;
input name:$3. date:DATE9. daily_price:32.;
format date DATE9.;
label name="name" date="date" daily_price="daily price";
datalines4;
A,05MAY2008,3
B,05MAY2008,6
C,05MAY2008,5
A,06MAY2008,5
A,07MAY2008,4
B,06MAY2008,3
B,07MAY2008,4
B,08MAY2008,3
C,06MAY2008,7
C,07MAY2008,6
C,08MAY2008,5
;;;;
Please consider that the daily price of yesterday should be used in formula.
So the result is:
+------+----------+----------+----------------+
| name | date | time | intraday price |
+------+----------+----------+----------------+
| B | 7-May-08 | 11:32:41 | 3.1 |
| B | 7-May-08 | 12:32:41 | 1 |
| B | 7-May-08 | 13:32:41 | 4 |
| B | 7-May-08 | 14:32:41 | 2.9 |
| A | 8-May-08 | 11:32:41 | 3.95 |
| A | 8-May-08 | 12:32:41 | 3 |
| A | 8-May-08 | 13:32:41 | 6 |
| A | 8-May-08 | 14:32:41 | 4.01 |
| C | 8-May-08 | 11:32:41 | 6.1 |
| C | 8-May-08 | 12:32:41 | 3 |
| C | 8-May-08 | 13:32:41 | 2 |
+------+----------+----------+----------------+
Would you please tell me how I can do that?
Thanks in advance.

Based on Shmuel and KurtBremser work in SAS community, the result is:
proc sort data=table1; by name date time; run;
proc sort data=table2; by name date; run;
proc sql;
create table table3 as
select * from table1, table2
where table1.name=table2.name and table1.date=table2.date;
quit;
data table2_new;
set table2;
by name;
/* save price of yesterday */
lag_Price = lag(Price);
if first.name then lag_Price = .;
run;
data to_delete(keep = name date);
merge table3 (in=in1)
table2_new (in=in2);
by name date;
retain start_price last_price;
if in1 and in2; /* deal with obs on both tables only */
if first.date then start_price = intradayprice;
if last.date then last_price = intradayprice;
if last.date then do;
min_price = 0.962 * lag_Price;
max_price = 1.0398 * lag_Price;
if not (min_price le start_price le max_price) and not (min_price le last_price le max_price)
then output;
end;
run;
data want;
merge table3 /* table2 */
to_delete (in=indel);
by name date;
if not indel;
run;
SAS Community

This will identify the rows you do not want:
select t1.*
from table1 t1
join table2 t2 on t1.name = t2.name and t1.date = t2.date
where (t1.intraday_price < (t2.daily_price*0.962)
or t1.intraday_price > (t2.daily_price*1.0398)
)
If you place that inside a subquery and then test for EXISTS in that subquery, you are identifying the rows you do not want.
Demo at: SQL Fiddle
CREATE TABLE Table1
([name] varchar(1), [date] datetime, [time] varchar(8), [intraday_price] decimal(12,2))
;
INSERT INTO Table1
([name], [date], [time], [intraday_price])
VALUES
('A', '2008-05-07 00:00:00', '11:32:41', 3),
('A', '2008-05-07 00:00:00', '12:32:41', 2),
('A', '2008-05-07 00:00:00', '13:32:41', 1),
('A', '2008-05-08 00:00:00', '11:32:41', 3.95),
('A', '2008-05-08 00:00:00', '12:32:41', 3),
('A', '2008-05-08 00:00:00', '13:32:41', 6),
('A', '2008-05-08 00:00:00', '14:32:41', 4.01),
('B', '2008-05-07 00:00:00', '11:32:41', 3.1),
('B', '2008-05-07 00:00:00', '12:32:41', 1),
('B', '2008-05-07 00:00:00', '13:32:41', 4),
('B', '2008-05-07 00:00:00', '14:32:41', 2.9),
('B', '2008-05-08 00:00:00', '11:32:41', 6),
('B', '2008-05-08 00:00:00', '12:32:41', 1),
('B', '2008-05-09 00:00:00', '11:32:41', 5),
('B', '2008-05-09 00:00:00', '12:32:41', 7),
('C', '2008-05-07 00:00:00', '11:32:41', 3),
('C', '2008-05-07 00:00:00', '12:32:41', 2),
('C', '2008-05-08 00:00:00', '11:32:41', 6.1),
('C', '2008-05-08 00:00:00', '12:32:41', 3),
('C', '2008-05-08 00:00:00', '13:32:41', 2),
('C', '2008-05-09 00:00:00', '11:32:41', 8),
('C', '2008-05-09 00:00:00', '12:32:41', 2),
('C', '2008-05-09 00:00:00', '13:32:41', 3),
('C', '2008-05-09 00:00:00', '14:32:41', 2)
;
CREATE TABLE Table2
([name] varchar(1), [date] datetime, [daily_price] decimal(12,2))
;
INSERT INTO Table2
([name], [date], [daily_price])
VALUES
('A', '2008-05-05 00:00:00', 3),
('B', '2008-05-05 00:00:00', 6),
('C', '2008-05-05 00:00:00', 5),
('A', '2008-05-06 00:00:00', 5),
('A', '2008-05-07 00:00:00', 4),
('B', '2008-05-06 00:00:00', 3),
('B', '2008-05-07 00:00:00', 4),
('B', '2008-05-08 00:00:00', 3),
('C', '2008-05-06 00:00:00', 7),
('C', '2008-05-07 00:00:00', 6),
('C', '2008-05-08 00:00:00', 5)
;
Query 1:
with cte as (
select
*
from Table1
where exists (
select NULL
from table1 t1
join table2 t2 on t1.name = t2.name and t1.date = t2.date
where (t1.intraday_price < (t2.daily_price*0.962)
or t1.intraday_price > (t2.daily_price*1.0398)
)
and table1.name = t1.name and table1.date = t1.date and table1.time = t1.time
)
)
delete
from cte
;
select * from table1
Results:
| name | date | time | intraday_price |
|------|----------------------|----------|----------------|
| A | 2008-05-08T00:00:00Z | 11:32:41 | 3.95 |
| A | 2008-05-08T00:00:00Z | 12:32:41 | 3 |
| A | 2008-05-08T00:00:00Z | 13:32:41 | 6 |
| A | 2008-05-08T00:00:00Z | 14:32:41 | 4.01 |
| B | 2008-05-07T00:00:00Z | 13:32:41 | 4 |
| B | 2008-05-09T00:00:00Z | 11:32:41 | 5 |
| B | 2008-05-09T00:00:00Z | 12:32:41 | 7 |
| C | 2008-05-09T00:00:00Z | 11:32:41 | 8 |
| C | 2008-05-09T00:00:00Z | 12:32:41 | 2 |
| C | 2008-05-09T00:00:00Z | 13:32:41 | 3 |
| C | 2008-05-09T00:00:00Z | 14:32:41 | 2 |

Rather than delete from source tables, create a new dataset filtered for the required records. Specifically, consider an exists subquery that selects records according to needed logic.
Below uses a self-join on table1 to align min and max time records within same name and date into one resultset and checks row-wise both intraday_price if they fall within price range.
proc sql;
create table newtable as
select *
from work.table1 main
where exists(
select 1
from work.table1 m1
inner join work.table1 m2
on m1.name = m2.name and m1.date = m2.date
inner join work.table2 t2
on m1.name = t2.name and m1.date = intnx("day", t2.date, -1)
inner join
(select t.name, t.date, min(t.time) as min_time, max(t.time) as max_time
from work.table1 t
group by t.name, t.date
) agg
on m1.name = agg.name and m1.date = agg.date
and m1.time = agg.min_time and m2.time = agg.max_time
where m1.intraday_price between (0.962 * t2.daily_price) and (1.0398 * t2.daily_price)
and m2.intraday_price between (0.962 * t2.daily_price) and (1.0398 * t2.daily_price)
and main.name = m1.name and main.date = m1.date);
quit;

Related

What SQL query can be used to limit continious periods by parameter value, and then to calculate datediff inside them?

I have a table of phone calls consisting of user_id, call_date, city,
where city can be either A or B.
It looks like this:
user_id
call_date
city
1
2021-01-01
A
1
2021-01-02
B
1
2021-01-03
B
1
2021-01-05
B
1
2021-01-10
A
1
2021-01-12
B
1
2021-01-16
A
2
2021-01-17
A
2
2021-01-20
B
2
2021-01-22
B
2
2021-01-23
A
2
2021-01-24
B
2
2021-01-26
B
2
2021-01-30
A
For this table, we need to select for each user all the periods when he was in city B.
These periods are counted in days and start when the first call is made from city B, and end as soon as the next call is made from city A.
So for user_id = 1 fist period starts on 2021-01-02 and ands on 2021-01-10. There can be several such periods for each user.
The result should be the following table:
user_id
period_1
period_2
1
8
4
2
3
6
Can you please tell me how I can limit the periods according to the condition of the problem, and then calculate the datediff within each period?
Thank you
This is a typical gaps and islands problem. You need to group consecutive rows first, then find the first call_date of the next group. Sample code for Postgres is below, the same may be adapted to another DBMS by applying appropriate function to calculate the difference in days.
with a (user_id, call_date, city)
as (
select *
from ( values
('1', date '2021-01-01', 'A'),
('1', date '2021-01-02', 'B'),
('1', date '2021-01-03', 'B'),
('1', date '2021-01-05', 'B'),
('1', date '2021-01-10', 'A'),
('1', date '2021-01-12', 'B'),
('1', date '2021-01-16', 'A'),
('2', date '2021-01-17', 'A'),
('2', date '2021-01-20', 'B'),
('2', date '2021-01-22', 'B'),
('2', date '2021-01-23', 'A'),
('2', date '2021-01-24', 'B'),
('2', date '2021-01-26', 'B'),
('2', date '2021-01-30', 'A')
) as t
)
, grp as (
/*Identify groups*/
select a.*,
/*This is a grouping of consecutive rows:
they will have the same difference between
two row_numbers while the more detailed
row_number changes, which means the attribute had changed.
*/
dense_rank() over(
partition by user_id
order by call_date asc
) -
dense_rank() over(
partition by user_id, city
order by call_date asc
) as grp,
/*Get next call date*/
lead(call_date, 1, call_date)
over(
partition by user_id
order by call_date asc
) as next_dt
from a
)
select
user_id,
city,
min(call_date) as dt_from,
max(next_dt) as dt_to,
max(next_dt) - min(call_date) as diff
from grp
where city = 'B'
group by user_id, grp, city
order by 1, 3
user_id | city | dt_from | dt_to | diff
:------ | :--- | :--------- | :--------- | ---:
1 | B | 2021-01-02 | 2021-01-10 | 8
1 | B | 2021-01-12 | 2021-01-16 | 4
2 | B | 2021-01-20 | 2021-01-23 | 3
2 | B | 2021-01-24 | 2021-01-30 | 6
db<>fiddle here

Average Duration in Status - Gaps and Islands

I'm trying to calculate the average turnover time of a piece of equipment in REPAIR status.
I was able to create a query containing a list of equipments with their snapshotted status on each day.
+-----------------+--------------+--------+----------------------+------------+------------------+
| equipmentNumber | snapshotDate | status | previousSnapshotDate | prevStatus | statusChangeFlag |
+-----------------+--------------+--------+----------------------+------------+------------------+
| 123456 | 2018-04-29 | ONHIRE | 2018-04-28 | AVAILABLE | 1 |
| 123456 | 2018-04-30 | ONHIRE | 2018-04-29 | ONHIRE | 0 |
| 123456 | 2018-05-01 | ONHIRE | 2018-04-30 | ONHIRE | 0 |
| 123456 | 2018-05-02 | REPAIR | 2018-05-01 | ONHIRE | 1 |
| 123456 | 2018-05-03 | REPAIR | 2018-05-02 | REPAIR | 0 |
| 123456 | 2018-05-04 | ONHIRE | 2018-05-03 | REPAIR | 1 |
| 654321 | 2018-04-30 | REPAIR | 2018-04-29 | AVAILABLE | 1 |
| 654321 | 2018-05-01 | REPAIR | 2018-04-30 | REPAIR | 0 |
| 654321 | 2018-05-02 | REPAIR | 2018-05-01 | REPAIR | 0 |
+-----------------+--------------+--------+----------------------+------------+------------------+
So, in this example, we have 2 equipments, "123456" was in REPAIR status 2 days on 5/2 and 5/3, and "654321" was in REPAIR status 3 days on 4/30, 5/1, and 5/2. That would be an average repair turnaround time of (2+3) / 2 = 2.5 days.
I tried this algorithm (Detect consecutive dates ranges using SQL) but it doesn't seem to be quite working for my needs.
I attempt to answer Gaps and Islands using an Incrementing ID column, create one if one doesn't exist, and the ROW_NUMBER window function
CREATE TABLE T1
([equipmentNumber] int, [snapshotDate] datetime, [status] varchar(6), [previousSnapshotDate] datetime, [prevStatus] varchar(9), [statusChangeFlag] int)
;
INSERT INTO T1
([equipmentNumber], [snapshotDate], [status], [previousSnapshotDate], [prevStatus], [statusChangeFlag])
VALUES
(123456, '2018-04-29 00:00:00', 'ONHIRE', '2018-04-28 00:00:00', 'AVAILABLE', 1),
(123456, '2018-04-30 00:00:00', 'ONHIRE', '2018-04-29 00:00:00', 'ONHIRE', 0),
(123456, '2018-05-01 00:00:00', 'ONHIRE', '2018-04-30 00:00:00', 'ONHIRE', 0),
(123456, '2018-05-02 00:00:00', 'REPAIR', '2018-05-01 00:00:00', 'ONHIRE', 1),
(123456, '2018-05-03 00:00:00', 'REPAIR', '2018-05-02 00:00:00', 'REPAIR', 0),
(123456, '2018-05-04 00:00:00', 'ONHIRE', '2018-05-03 00:00:00', 'REPAIR', 1),
(654321, '2018-04-30 00:00:00', 'REPAIR', '2018-04-29 00:00:00', 'AVAILABLE', 1),
(654321, '2018-05-01 00:00:00', 'REPAIR', '2018-04-30 00:00:00', 'REPAIR', 0),
(654321, '2018-05-02 00:00:00', 'REPAIR', '2018-05-01 00:00:00', 'REPAIR', 0)
;
;WITH cteX
AS(
SELECT
Id = ROW_NUMBER()OVER(ORDER BY T.equipmentNumber, T.snapshotDate)
,T.equipmentNumber
,T.snapshotDate
,T.[status]
,T.previousSnapshotDate
,T.prevStatus
,T.statusChangeFlag
FROM dbo.T1 T
),cteIsland
AS(
SELECT
Island = X.Id - ROW_NUMBER()OVER(ORDER BY X.Id)
,*
FROM cteX X
WHERE X.[status] = 'REPAIR'
)
SELECT * FROM cteIsland
Note the Island Column
Island Id equipmentNumber status
3 4 123456 REPAIR
3 5 123456 REPAIR
4 7 654321 REPAIR
4 8 654321 REPAIR
4 9 654321 REPAIR
Using the Island Column you can get the answer you need with this TSQL
;WITH cteX
AS(
SELECT
Id = ROW_NUMBER()OVER(ORDER BY T.equipmentNumber, T.snapshotDate)
,T.equipmentNumber
,T.snapshotDate
,T.[status]
,T.previousSnapshotDate
,T.prevStatus
,T.statusChangeFlag
FROM dbo.T1 T
),cteIsland
AS(
SELECT
Island = X.Id - ROW_NUMBER()OVER(ORDER BY X.Id)
,*
FROM cteX X
WHERE X.[status] = 'REPAIR'
)
SELECT
AvgDuration =SUM(Totals.IslandCounts) / (COUNT(Totals.IslandCounts) * 1.0)
FROM
(
SELECT
IslandCounts = COUNT(I.Island)
,I.equipmentNumber
FROM cteIsland I
GROUP BY I.equipmentNumber
) Totals
Answer
AvgDuration
2.50000000000000
Here's the SQLFiddle
That method should work to identify the repair periods:
select equipmentNumber, min(snapshotDate), max(snapshotDate)
from (select t.*,
row_number() over (partition by equipmentNumber order by snapshotDate) as seqnum
from t
) t
where status = 'REPAIR'
group by equipmentNumber, dateadd(day, - seqnum, snapshotDate);
You can get the average using a subquery:
select avg(datediff(day, minsd, maxsd) * 1.0)
from (select equipmentNumber, min(snapshotDate) as minsd, max(snapshotDate) as maxsd
from (select t.*,
row_number() over (partition by equipmentNumber order by snapshotDate) as seqnum
from t
) t
where status = 'REPAIR'
group by equipmentNumber, dateadd(day, - seqnum, snapshotDate)
) e;

modify output from 1 query

need Your suggestion Guy's. I don't know what the title of my question. but I has 1 query which give an ouput like this picture :
and this is my query :
select to_char(aa.DATE_AWAL, 'dd/mm/yyyy hh24:mi') DATE_AWAL, to_char(aa.DATE_AKHIR, 'dd/mm/yyyy hh24:mi') DATE_AKHIR,
to_char(aa.DATE_AWAL, 'hh24:mi') TIME_AWAL, to_char(aa.DATE_AKHIR, 'hh24:mi') TIME_AKHIR,
cc.NAMARUANG,aa.IDMEETING from TMEETING_ROOM aa
inner join MMEETING_TYPE bb on aa.IDTYPE=bb.IDMEETING
inner join MMEETING_ROOM cc on aa.IDMEETINGROOM = cc.IDMEETINGROOM
inner join HR.VWKARYAWAN dd on aa.IDPENGUSUL=dd.IDKARYAWAN
inner join HR.MLOKASI ee on aa.IDLOKASI = ee.IDLOKASI
where aa.IS_DELETE IS NULL
and aa.IDCANCEL IS NULL
and (
wm_overlaps (
wm_period(aa.DATE_AWAL, aa.DATE_AKHIR),
wm_period(
TO_DATE(TO_CHAR(trunc(sysdate) + 08/24, 'yyyy-mm-dd hh24:mi'), 'yyyy-mm-dd hh24:mi'),
TO_DATE(TO_CHAR(trunc(sysdate) + 23/24, 'yyyy-mm-dd hh24:mi'), 'yyyy-mm-dd hh24:mi')
)
) = 1
) and aa.idlokasi = 'I' order by cc.NAMARUANG asc, aa.DATE_AWAL asc;
Can any body give me suggestion how to make from this query can like this picture:
I'm newbie using oracle SQL
Note: the time and room are dynamic.
Here is an example of how you might achieve a "generic" pivot table in MySQL.
The technique used requires row numbering (and in v8 of MySQL there will be an easier way to do this) but for now it requires using #variables.
Then, with each row number, we "transform" rows to columns using case expressions inside he max() function (conditional aggregates).
You will need to decide how many columns you need, and note that the order by inside the subquery t is vital to successfully arranging the data.
SQL Fiddle
MySQL 5.6 Schema Setup:
CREATE TABLE YourQueryHere
(`id` int, `date_column` datetime, `code_column` varchar(7), `data_for_cells1` varchar(5), `data_for_cells2` varchar(5))
;
INSERT INTO YourQueryHere
(`id`, `date_column`, `code_column`, `data_for_cells1`, `data_for_cells2`)
VALUES
(1, '2017-11-14 00:00:00', 'Bintang', '09:00', '10:30'),
(2, '2017-11-14 00:00:00', 'Bintang', '11:00', '12:30'),
(3, '2017-11-14 00:00:00', 'Bintang', '14:00', '17:00'),
(4, '2017-11-14 00:00:00', 'Sapporo', '11:30', '14:00'),
(5, '2017-11-14 00:00:00', 'Sapporo', '14:30', '15:00'),
(6, '2017-11-14 00:00:00', 'Tiger', '08:00', '09:30'),
(7, '2017-11-14 00:00:00', 'Tiger', '11:00', '12:00')
;
Query 1:
select
code_column
, max(case when RowNumber = 1 then concat(data_for_cells1, ' ', data_for_cells2) end) as pivcol1
, max(case when RowNumber = 2 then concat(data_for_cells1, ' ', data_for_cells2) end) as pivcol2
, max(case when RowNumber = 3 then concat(data_for_cells1, ' ', data_for_cells2) end) as pivcol3
, max(case when RowNumber = 4 then concat(data_for_cells1, ' ', data_for_cells2) end) as pivcol4
from (
select *
, #counter :=IF(#prev=code_column,#counter+1,1)AS RowNumber
, #prev := code_column
from YourQueryHere
cross join (select #counter:=0, #prev:= '') vars
order by
code_column, date_column
) t
group by
code_column
order by
code_column
;
Results:
| code_column | pivcol1 | pivcol2 | pivcol3 | pivcol4 |
|-------------|-------------|-------------|-------------|---------|
| Bintang | 09:00 10:30 | 11:00 12:30 | 14:00 17:00 | (null) |
| Sapporo | 11:30 14:00 | 14:30 15:00 | (null) | (null) |
| Tiger | 08:00 09:30 | 11:00 12:00 | (null) | (null) |

Calculate total time worked in a day with multiple stops and starts

I can use DATEDIFF to find the difference between one set of dates like this
DATEDIFF(MINUTE, #startdate, #enddate)
but how would I find the total time span between multiple sets of dates? I don't know how many sets (stops and starts) I will have.
The data is on multiple rows with start and stops.
ID TimeStamp StartOrStop TimeCode
----------------------------------------------------------------
1 2017-01-01 07:00:00 Start 1
2 2017-01-01 08:15:00 Stop 2
3 2017-01-01 10:00:00 Start 1
4 2017-01-01 11:00:00 Stop 2
5 2017-01-01 10:30:00 Start 1
6 2017-01-01 12:00:00 Stop 2
This code would work assuming that your table only store data from one person, and they should be of the order Start/Stop/Start/Stop
WITH StartTime AS (
SELECT
TimeStamp
, ROW_NUMBER() PARTITION BY (ORDER BY TimeStamp) RowNum
FROM
<<table>>
WHERE
TimeCode = 1
), StopTime AS (
SELECT
TimeStamp
, ROW_NUMBER() PARTITION BY (ORDER BY TimeStamp) RowNum
FROM
<<table>>
WHERE
TimeCode = 2
)
SELECT
SUM (DATEDIFF( MINUTE, StartTime.TimeStamp, StopTime.TimeStamp )) As TotalTime
FROM
StartTime
JOIN StopTime ON StartTime.RowNum = StopTime.RowNum
This will work if your starts and stops are reliable. Your sample has two starts in order - 10:00 and 10:30 starts. I assume in production you will have an employee id to group on, so I added this to the sample data in place of the identity column.
Also in production, the CTE sets will be reduced by using a parameter on date. If there are overnight shifts, you would want your stops CTE to use dateadd(day, 1, #startDate) as your upper bound when retrieving end date.
Set up sample:
declare #temp table (
EmpId int,
TimeStamp datetime,
StartOrStop varchar(55),
TimeCode int
);
insert into #temp
values
(1, '2017-01-01 07:00:00', 'Start', 1),
(1, '2017-01-01 08:15:00', 'Stop', 2),
(1, '2017-01-01 10:00:00', 'Start', 1),
(1, '2017-01-01 11:00:00', 'Stop', 2),
(2, '2017-01-01 10:30:00', 'Start', 1),
(2, '2017-01-01 12:00:00', 'Stop', 2)
Query:
;with starts as (
select t.EmpId,
t.TimeStamp as StartTime,
row_number() over (partition by t.EmpId order by t.TimeStamp asc) as rn
from #temp t
where Timecode = 1 --Start time code?
),
stops as (
select t.EmpId,
t.TimeStamp as EndTime,
row_number() over (partition by t.EmpId order by t.TimeStamp asc) as rn
from #temp t
where Timecode = 2 --Stop time code?
)
select cast(min(sub.StartTime) as date) as WorkDay,
sub.EmpId as Employee,
min(sub.StartTime) as ClockIn,
min(sub.EndTime) as ClockOut,
sum(sub.MinutesWorked) as MinutesWorked
from
(
select strt.EmpId,
strt.StartTime,
stp.EndTime,
datediff(minute, strt.StartTime, stp.EndTime) as MinutesWorked
from starts strt
inner join stops stp
on strt.EmpId = stp.EmpId
and strt.rn = stp.rn
)sub
group by sub.EmpId
This works assuming your table has an incremental ID and interleaving start/stop records
--Data sample as provided
declare #temp table (
Id int,
TimeStamp datetime,
StartOrStop varchar(55),
TimeCode int
);
insert into #temp
values
(1, '2017-01-01 07:00:00', 'Start', 1),
(2, '2017-01-01 08:15:00', 'Stop', 2),
(3, '2017-01-01 10:00:00', 'Start', 1),
(4, '2017-01-01 11:00:00', 'Stop', 2),
(5, '2017-01-01 10:30:00', 'Start', 1),
(6, '2017-01-01 12:00:00', 'Stop', 2)
--let's see every pair start/stop and discard stop/start
select start.timestamp start, stop.timestamp stop,
datediff(mi,start.timestamp,stop.timestamp) minutes
from #temp start inner join #temp stop
on start.id+1= stop.id and start.timecode=1
--Sum all for required result
select sum(datediff(mi,start.timestamp,stop.timestamp) ) totalMinutes
from #temp start inner join #temp stop
on start.id+1= stop.id and start.timecode=1
Results
+-------------------------+-------------------------+---------+
| start | stop | minutes |
+-------------------------+-------------------------+---------+
| 2017-01-01 07:00:00.000 | 2017-01-01 08:15:00.000 | 75 |
| 2017-01-01 10:00:00.000 | 2017-01-01 11:00:00.000 | 60 |
| 2017-01-01 10:30:00.000 | 2017-01-01 12:00:00.000 | 90 |
+-------------------------+-------------------------+---------+
+--------------+
| totalMinutes |
+--------------+
| 225 |
+--------------+
Maybe the tricky part is the join clause. We need to join #table with itself by deferring 1 ID. Here is where on start.id+1= stop.id did its work.
In the other hand, for excluding stop/start couple we use start.timecode=1. In case we don't have a column with this information, something like stop.id%2=0 works just fine.

Complex query with multiple conditions

The 'complex'-part of the title might be subjective, but for me, it is rather complex.
I have a table called Contracts (C) and FinancialYears (FY). A contract will have multiple financial years (one per year), created automatically, if a specific status is met (for example, cancelled contracts won't get new financial year records, but approved contracts will). It's the FY that is having a specific status each years. For example:
--------------------FinancialYears-------------------
ContractID: 1 | 1 | 1
StatusID: 2 | 3 | 5
dStart: 01-01-2012 | 01-01-2013 | 01-01-2014
dEnd: 31-12-2012 | 31-12-2013 | 31-12-2014
Year: 2012 | 2013 | 2014
-----------------------------------------------------
(For example: StatusID (2, 3, 5), (Proposed, Approved, Cancelled))
Now assume a user wants to find out how many contracts are approved at this point of time, then the query should be looking at the most recent financial year of the contract, and that's what I'm having a hard time with.
I have to write a query that does the following:
SELECT *
FROM Contracts C
INNER JOIN FinancialYears FY ON FY.ContractID = C.ContractID
WHERE StatusID = X AND (dStart < GETDATE() AND dEnd > GETDATE())
// This would search on the financial year of the contract which has its valid
period in-between today.
But since a, for example, cancelled contract will not have a new financial year in the next year, I would never be able to query today on a cancelled contract of 2014, so I need to adjust the following condition to the query somehow:
// IF (dStart < GETDATE() AND dEnd > GETDATE()) RETURNS 0, THEN DO INSTEAD:
SELECT TOP 1
//
WHERE (dEnd < GETDATE)
ORDER BY ENDDATE DESC
// With other words: if there is no ongoing financial year between the given time interval,
then select the most recent financial year in the past.
Could anyone help me out here?
Thank you.
Here's a quick mock-up:
SELECT *
FROM Contracts C
cross apply (
select top 1 ContractID
from FinancialYears where dStart < GETDATE()
order by dEnd desc
) F on C.ConractID = F.ContractID
But you'll probably need some extra criteria to find all of the Contracts, for example customer code or something.
You just need to filter the contracts where the current date falls between the start and end date of contracts that are approved if I'm not mistaken.
Here's a demo SQL Fiddle
MS SQL Server Schema Setup:
CREATE TABLE FinancialYearContracts
([ContractID] int, [StatusID] int, [dStart] datetime, [dEnd] datetime, [Year] int)
;
INSERT INTO FinancialYearContracts
([ContractID], [StatusID], [dStart], [dEnd], [Year])
VALUES
(1, 2, '2012-01-01 00:00:00', '2012-12-31 00:00:00', 2012),
(1, 3, '2013-01-01 00:00:00', '2013-12-31 00:00:00', 2013),
(1, 5, '2014-01-01 00:00:00', '2014-12-31 00:00:00', 2014),
(2, 2, '2013-01-01 00:00:00', '2013-12-31 00:00:00', 2013),
(2, 3, '2014-01-01 00:00:00', '2014-12-31 00:00:00', 2014),
(2, 3, '2015-01-01 00:00:00', '2015-12-31 00:00:00', 2015),
(3, 2, '2014-01-01 00:00:00', '2014-12-31 00:00:00', 2014),
(3, 3, '2015-01-01 00:00:00', '2015-12-31 00:00:00', 2015),
(4, 2, '2014-01-01 00:00:00', '2014-12-31 00:00:00', 2014),
(5, 2, '2013-01-01 00:00:00', '2013-12-31 00:00:00', 2013),
(5, 3, '2014-01-01 00:00:00', '2014-12-31 00:00:00', 2014),
(5, 3, '2015-01-01 00:00:00', '2015-12-31 00:00:00', 2015),
(6, 2, '2013-01-01 00:00:00', '2013-12-31 00:00:00', 2012),
(6, 3, '2014-01-01 00:00:00', '2014-12-31 00:00:00', 2013),
(6, 5, '2015-01-01 00:00:00', '2015-12-31 00:00:00', 2014)
;
Query to generate results:
declare #DateFilter as datetime = GETDATE()
declare #Status as int = 3
SELECT *
FROM FinancialYearContracts
WHERE #DateFilter BETWEEN dStart AND dEnd AND StatusID = #Status
Results:
| CONTRACTID | STATUSID | DSTART | DEND | YEAR |
|------------|----------|--------------------------------|---------------------------------|------|
| 2 | 3 | January, 01 2015 00:00:00+0000 | December, 31 2015 00:00:00+0000 | 2015 |
| 3 | 3 | January, 01 2015 00:00:00+0000 | December, 31 2015 00:00:00+0000 | 2015 |
| 5 | 3 | January, 01 2015 00:00:00+0000 | December, 31 2015 00:00:00+0000 | 2015 |
This shows contracts that are currently in the approved status based on the sample data I put together.