SQL query help to calculate max - sql

I have a question regarding a query I need to do in SQL (i use BQ).
I have this table:
train_no, wagon_no, weight, length, date, startpoint(km), endpoint(km)
1, 123, 1000, 20, 20190101, 0, 7
1, 234, 2000, 20, 20190101, 1, 2
1, 345, 3000, 30, 20190101, 1, 5
1, 456, 1000, 40, 20190101, 1, 6
2, 987, 1000, 10, 20190101, 0, 8
2, 876, 2000, 20, 20190101, 1, 2
2, 765, 3000, 20, 20190101, 1, 5
2, 654, 1000, 20, 20190101, 1, 6
The table shows two trains with wagons. Per wagon we see at what point the wagon was added on the train. So for train no 1 we see that wagon 234 was included on the train from startpoint=1 (kilometer 1) to endpoint=2 (kilometer 2) then it was removed from the train. We also see that max endpoint is 7 for train_no =1 so max travelled distance for the train is 7 km.
The total train length and weight varies during the distance and I would like to calculate the maximum length and maximum weight reached during the distance. How can I do this in SQL?
Any suggestion is appreciated.
Edit:
Adding a pic to clearify what Im looking for.
As you can see in attached pic Train_no =1 have a max weight between point 1 and 2. Total weight is 7000 and is the total of all wagons in the train at that specific distance. Also, total length is 110 which is the total length of the all wagons added together.

Below is for BigQuery Standard SQL
#standardSQL
WITH temp AS (
SELECT train_no, dt, MIN(startpoint) startpoint, MAX(endpoint) endpoint
FROM `project.dataset.table`
GROUP BY train_no, dt
)
SELECT train_no, dt, MAX(wagons) max_wagons, MAX(total_weight) AS max_total_weight, MAX(total_len) max_total_len
FROM (
SELECT train_no, dt, point, COUNT(wagon_no) wagons, SUM(weight) total_weight, SUM(len) total_len
FROM temp, UNNEST(GENERATE_ARRAY(startpoint, endpoint)) point
LEFT JOIN `project.dataset.table` t
USING(train_no, dt)
WHERE point >= t.startpoint AND point < t.endpoint
GROUP BY train_no, dt, point
)
GROUP BY train_no, dt
If to apply to sample data from your question as in example below
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 train_no, 123 wagon_no, 1000 weight, 20 len, '20190101' dt, 0 startpoint, 7 endpoint UNION ALL
SELECT 1, 234, 2000, 20, '20190101', 1, 2 UNION ALL
SELECT 1, 345, 3000, 30, '20190101', 1, 5 UNION ALL
SELECT 1, 456, 1000, 40, '20190101', 1, 6 UNION ALL
SELECT 2, 987, 1000, 10, '20190101', 0, 8 UNION ALL
SELECT 2, 876, 2000, 20, '20190101', 1, 2 UNION ALL
SELECT 2, 765, 3000, 20, '20190101', 1, 5 UNION ALL
SELECT 2, 654, 1000, 20, '20190101', 1, 6
), temp AS (
SELECT train_no, dt, MIN(startpoint) startpoint, MAX(endpoint) endpoint
FROM `project.dataset.table`
GROUP BY train_no, dt
)
SELECT train_no, dt, MAX(wagons) max_wagons, MAX(total_weight) AS max_total_weight, MAX(total_len) max_total_len
FROM (
SELECT train_no, dt, point, COUNT(wagon_no) wagons, SUM(weight) total_weight, SUM(len) total_len
FROM temp, UNNEST(GENERATE_ARRAY(startpoint, endpoint)) point
LEFT JOIN `project.dataset.table` t
USING(train_no, dt)
WHERE point >= t.startpoint AND point < t.endpoint
GROUP BY train_no, dt, point
)
GROUP BY train_no, dt
result is
Row train_no dt max_wagons max_total_weight max_total_len
1 1 20190101 4 7000 110
2 2 20190101 4 7000 70

The following query returns the the length at each KM marker sorted in descending fashion for each train.
with data as (
select 1 as train_no, 123 as wagon_no, 1000 as weight, 20 as length, 20190101 as date, 0 as startpoint, 7 as endpoint union all
select 1, 234, 2000, 20, 20190101, 1, 2 union all
select 1, 345, 3000, 30, 20190101, 1, 5 union all
select 1, 456, 1000, 40, 20190101, 1, 6 union all
select 2, 987, 1000, 10, 20190101, 0, 8 union all
select 2, 876, 2000, 20, 20190101, 1, 2 union all
select 2, 765, 3000, 20, 20190101, 1, 5 union all
select 2, 654, 1000, 20, 20190101, 1, 6
),
km_array as (
select * from unnest(generate_array(0,10)) km
),
joined as (
select *
from km_array
cross join data
where km between startpoint and endpoint
),
train_length_at_each_km as (
select
km,
train_no,
sum(length) as length
from joined
group by 1,2
)
select
train_no, length, km
from train_length_at_each_km
order by train_no, length desc
Getting maximum weight would use similar logic as the train_length_at_each_km CTE.

Related

get unique matches

How can I get unique matches played between two teams
Game_Number, Team_A, Team_B, Date, Team_A_score, Team_B_score
1, IND, USA, 2020-01-01, 10, 20
2, USA, IND, 2020-01-02, 10, 20
3, AUS, IND, 2020-01-02, 30, 15
4, IND, AUS, 2020-01-03, 22, 34
5, UAE, AUS, 2020-01-04, 14, 41
Expected output
IND, USA
AUS, IND
UAE, AUS
In above case 1 and 2 games will be considered unique
If you do not mind the order of the teams then use DISTINCT with GREATEST and LEAST:
SELECT DISTINCT
LEAST(Team_A, Team_B) AS team_a,
GREATEST(Team_A, Team_B) AS team_b
FROM table_name
Which, for the sample data:
CREATE TABLE table_name (Game_Number, Team_A, Team_B, "DATE", Team_A_score, Team_B_score) AS
SELECT 1, 'IND', 'USA', DATE '2020-01-01', 10, 20 FROM DUAL UNION ALL
SELECT 2, 'USA', 'IND', DATE '2020-01-02', 10, 20 FROM DUAL UNION ALL
SELECT 3, 'AUS', 'IND', DATE '2020-01-02', 30, 15 FROM DUAL UNION ALL
SELECT 4, 'IND', 'AUS', DATE '2020-01-03', 22, 34 FROM DUAL UNION ALL
SELECT 5, 'UAE', 'AUS', DATE '2020-01-04', 14, 41 FROM DUAL;
Outputs:
TEAM_A
TEAM_B
IND
USA
AUS
IND
AUS
UAE
If you want to respect the order of the teams and get the earliest instance of a pairing of teams then:
SELECT Team_A,
Team_B
FROM (
SELECT Team_A,
Team_B,
ROW_NUMBER() OVER (
PARTITION BY LEAST(Team_A, Team_B), GREATEST(Team_A, Team_B)
ORDER BY "DATE"
) AS rn
FROM table_name
)
WHERE rn = 1;
Which outputs:
TEAM_A
TEAM_B
AUS
IND
UAE
AUS
IND
USA
db<>fiddle here

Complex query analyzing historical records

I am using Oracle and trying to retrieve the total number of days a person was out of the office during the year. I have 2 tables involved:
Statuses
1 - Active
2 - Out of the Office
3 - Other
ScheduleHistory
RecordID - primary key
PersonID
PreviousStatusID
NextStatusID
DateChanged
I can easily find when the person went on vacation and when they came back, using
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND NextStatusID = 2
and
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND PreviousStatusID = 2
But in case a person went on vacation more than once, how can I can I calculate total number of days a person was out of the office. Is it possible to do programmatically, given only PersonID?
Here is some sample data:
RecordID PersonID PreviousStatusID NextStatusID DateChanged
-----------------------------------------------------------------------------
1 111 1 2 03/11/2020
2 111 2 1 03/13/2020
3 111 1 3 04/01/2020
4 111 3 1 04/07/2020
5 111 1 2 06/03/2020
6 111 2 1 06/05/2020
7 111 1 2 09/14/2020
8 111 2 1 09/17/2020
So from the data above, for the year 2020 for PersonID 111 the query should return 7
Try this:
with aux1 AS (
SELECT
a.*,
to_date(datechanged, 'MM/DD/YYYY') - LAG(to_date(datechanged, 'MM/DD/YYYY')) OVER(
PARTITION BY personid
ORDER BY
recordid
) lag_date
FROM
ScheduleHistory a
)
SELECT
personid,
SUM(lag_date) tot_days_ooo
FROM
aux1
WHERE
previousstatusid = 2
GROUP BY
personid;
If you want total days (or weekdays) for each year (and to account for periods when it goes over the year boundary) then:
WITH date_ranges ( personid, status, start_date, end_date ) AS (
SELECT personid,
nextstatusid,
datechanged,
LEAD(datechanged, 1, datechanged) OVER(
PARTITION BY personid
ORDER BY datechanged
)
FROM table_name
),
split_year_ranges ( personid, year, start_date, end_date, max_date ) AS (
SELECT personid,
TRUNC( start_date, 'YY' ),
start_date,
LEAST(
end_date,
ADD_MONTHS( TRUNC( start_date, 'YY' ), 12 )
),
end_date
FROM date_ranges
WHERE status = 2
UNION ALL
SELECT personid,
end_date,
end_date,
LEAST( max_date, ADD_MONTHS( end_date, 12 ) ),
max_date
FROM split_year_ranges
WHERE end_date < max_date
)
SELECT personid,
EXTRACT( YEAR FROM year) AS year,
SUM( end_date - start_date ) AS total_days,
SUM(
( TRUNC( end_date, 'IW' ) - TRUNC( start_date, 'IW' ) ) * 5 / 7
+ LEAST( end_date - TRUNC( end_date, 'IW' ), 5 )
- LEAST( start_date - TRUNC( start_date, 'IW' ), 5 )
) AS total_weekdays
FROM split_year_ranges
GROUP BY personid, year
ORDER BY personid, year
Which, for the sample data:
CREATE TABLE table_name ( RecordID, PersonID, PreviousStatusID, NextStatusID, DateChanged ) AS
SELECT 1, 111, 1, 2, DATE '2020-03-11' FROM DUAL UNION ALL
SELECT 2, 111, 2, 1, DATE '2020-03-13' FROM DUAL UNION ALL
SELECT 3, 111, 1, 3, DATE '2020-04-01' FROM DUAL UNION ALL
SELECT 4, 111, 3, 1, DATE '2020-04-07' FROM DUAL UNION ALL
SELECT 5, 111, 1, 2, DATE '2020-06-03' FROM DUAL UNION ALL
SELECT 6, 111, 2, 1, DATE '2020-06-05' FROM DUAL UNION ALL
SELECT 7, 111, 1, 2, DATE '2020-09-14' FROM DUAL UNION ALL
SELECT 8, 111, 2, 1, DATE '2020-09-17' FROM DUAL UNION ALL
SELECT 9, 222, 1, 2, DATE '2019-12-31' FROM DUAL UNION ALL
SELECT 10, 222, 2, 2, DATE '2020-12-01' FROM DUAL UNION ALL
SELECT 11, 222, 2, 2, DATE '2021-01-02' FROM DUAL;
Outputs:
PERSONID
YEAR
TOTAL_DAYS
TOTAL_WEEKDAYS
111
2020
7
7
222
2019
1
1
222
2020
366
262
222
2021
1
1
db<>fiddle here
Provided no vacation crosses a year boundary
with grps as (
SELECT sh.*,
row_number() over (partition by PersonID, NextStatusID order by DateChanged) grp
FROM ScheduleHistory sh
WHERE NextStatusID in (1,2) and 3 not in (NextStatusID, PreviousStatusID)
), durations as (
SELECT PersonID, min(DateChanged) DateChanged, max(DateChanged) - min(DateChanged) duration
FROM grps
GROUP BY PersonID, grp
)
SELECT PersonID, sum(duration) days_out
FROM durations
GROUP BY PersonID;
db<>fiddle
year_span is used to split an interval spanning across two years in two different records
H1 adds a row number dependent from PersonID to get the right sequence for each person
H2 gets the periods for each status change and extract 1st day of the year of the interval end
H3 split records that span across two years and calculate the right date_start and date_end for each interval
H calculates days elapsed in each interval for each year
final query sum up the records to get output
EDIT
If you need workdays instead of total days, you should not use total_days/7*5 because it is a bad approximation and in some cases gives weird results.
I have posted a solution to jump on fridays to mondays here
with
statuses (sid, sdescr) as (
select 1, 'Active' from dual union all
select 2, 'Out of the Office' from dual union all
select 3, 'Other' from dual
),
ScheduleHistory(RecordID, PersonID, PreviousStatusID, NextStatusID , DateChanged) as (
select 1, 111, 1, 2, date '2020-03-11' from dual union all
select 2, 111, 2, 1, date '2020-03-13' from dual union all
select 3, 111, 1, 3, date '2020-04-01' from dual union all
select 4, 111, 3, 1, date '2020-04-07' from dual union all
select 5, 111, 1, 2, date '2020-06-03' from dual union all
select 6, 111, 2, 1, date '2020-06-05' from dual union all
select 7, 111, 1, 2, date '2020-09-14' from dual union all
select 8, 111, 2, 1, date '2020-09-17' from dual union all
SELECT 9, 222, 1, 2, date '2019-12-31' from dual UNION ALL
SELECT 10, 222, 2, 2, date '2020-12-01' from dual UNION ALL
SELECT 11, 222, 2, 2, date '2021-01-02' from dual
),
year_span (n) as (
select 1 from dual union all
select 2 from dual
),
H1 AS (
SELECT ROW_NUMBER() OVER (PARTITION BY PersonID ORDER BY RecordID) PID, H.*
FROM ScheduleHistory H
),
H2 as (
SELECT
H1.*, H2.DateChanged DateChanged2,
EXTRACT(YEAR FROM H2.DateChanged) - EXTRACT(YEAR FROM H1.DateChanged) + 1 Y,
trunc(H2.DateChanged,'YEAR') Y2
FROM H1 H1
LEFT JOIN H1 H2 ON H1.PID = H2.PID-1 AND H1.PersonID = H2.PersonID
),
H3 AS (
SELECT Y, N, H2.PID, H2.RecordID, H2.PersonID, H2.NextStatusID,
CASE WHEN Y=1 THEN H2.DateChanged ELSE CASE WHEN N=1 THEN H2.DateChanged ELSE Y2 END END D1,
CASE WHEN Y=1 THEN H2.DateChanged2 ELSE CASE WHEN N=1 THEN Y2 ELSE H2.DateChanged2 END END D2
FROM H2
JOIN year_span N ON N.N <=Y
),
H AS (
SELECT PersonID, NextStatusID, EXTRACT(year FROM d1) Y, d2-d1 D
FROM H3
)
select PersonID, sdescr Status, Y, sum(d) d
from H
join statuses s on NextStatusID = s.sid
group by PersonID, sdescr, Y
order by PersonID, sdescr, Y
output
PersonID Status Y d
111 Active 2020 177
111 Other 2020 6
111 Out of the Office 2020 7
222 Out of the Office 2019 1
222 Out of the Office 2020 366
222 Out of the Office 2021 1
check the fiddle here

SQL - Selecting the rows with max timestamp by group

I have this table
SITE S_ID BAN COUNT P V TIMESTAMP
23 1 4 1500 0,05 50 10/05/17 09:58:22,609000000
23 3 3 800 0,05 50 10/05/17 09:58:22,736000000
23 2 3 3000 0,05 50 10/05/17 09:58:22,674000000
23 1 4 1500 0,05 50 10/05/17 15:57:04,079000000
23 1 4 1499 0,05 50 10/05/17 15:53:38,851000000
20 1 3 2000 0,1 50 10/05/17 10:57:07,172000000
20 2 3 2000 0,1 50 10/05/17 10:59:50,127000000
20 3 2 3000 0,1 50 10/05/17 11:00:39,051000000
20 4 2 3000 0,1 50 10/05/17 11:01:15,533000000
and I'm trying to obtain something like this:
SITE S_ID BAN COUNT P V TIMESTAMP
23 3 3 800 0,05 50 10/05/17 09:58:22,736000000
23 2 3 3000 0,05 50 10/05/17 09:58:22,674000000
23 1 4 1500 0,05 50 10/05/17 15:57:04,079000000
20 1 3 2000 0,1 50 10/05/17 10:57:07,172000000
20 2 3 2000 0,1 50 10/05/17 10:59:50,127000000
20 3 2 3000 0,1 50 10/05/17 11:00:39,051000000
20 4 2 3000 0,1 50 10/05/17 11:01:15,533000000
i.e. for every SITE the S_ID, BAN, COUNT, P, V and TIMESTAMP with the MAX(TIMESTAMP)
Here you go:
SELECT
SITE
, S_ID
,BAN
,COUNT
,P
,V
,TimeStamp
FROM [Your Table Name]
INNER JOIN
(
SELECT
SITE
, S_ID
, MAX(TIMESTAMP) as MaxTimeStamp
FROM [Your Table Name]
GROUP BY
SITE
, S_ID
) AS MaxDAata ON
MaxData.SITE = [Your Table Name].SITE
AND MaxData.S_ID = [Your Table Name].S_ID
AND MaxData.TimeStamp = [Your Table Name].MaxTimeStamp
I would make use of an analytic function to rank the dates within a group (partition) then only return the top ranked items. I'm not sure your partition so I used the fields site, s_id, ban
select * from
(with test_data (SITE, S_ID, BAN, s_COUNT, P, V, s_TIMESTAMP) as (
select 23, 1, 4, 1500, 0.05, 50, to_timestamp('10/05/17 09:58:22,609','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 23, 3, 3, 800 , 0.05, 50, to_timestamp('10/05/17 09:58:22,736','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 23, 2, 3, 3000, 0.05, 50, to_timestamp('10/05/17 09:58:22,674','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 23, 1, 4, 1500, 0.05, 50, to_timestamp('10/05/17 15:57:04,079','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 23, 1, 4, 1499, 0.05, 50, to_timestamp('10/05/17 15:53:38,851','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 20, 1, 3, 2000, 0.1 , 50, to_timestamp('10/05/17 10:57:07,172','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 20, 2, 3, 2000, 0.1 , 50, to_timestamp('10/05/17 10:59:50,127','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 20, 3, 2, 3000, 0.1 , 50, to_timestamp('10/05/17 11:00:39,051','MM/DD/YY HH24:MI:SS,FF') from dual union all
select 20, 4, 2, 3000, 0.1 , 50, to_timestamp('10/05/17 11:01:15,533','MM/DD/YY HH24:MI:SS,FF') from dual
)
SELECT site, s_id, ban, s_count, p, v, s_timestamp,
rank() over (partition by site, s_id, ban order by s_timestamp desc) as r
FROM test_data
)
where r = 1
order by site, s_id
SITE|S_ID|BAN|S_COUNT|P|V|S_TIMESTAMP|R
20|1|3|2000|0.1|50|05-OCT-17 10.57.07.172000000 AM|1
20|2|3|2000|0.1|50|05-OCT-17 10.59.50.127000000 AM|1
20|3|2|3000|0.1|50|05-OCT-17 11.00.39.051000000 AM|1
20|4|2|3000|0.1|50|05-OCT-17 11.01.15.533000000 AM|1
23|1|4|1500|0.05|50|05-OCT-17 03.57.04.079000000 PM|1
23|2|3|3000|0.05|50|05-OCT-17 09.58.22.674000000 AM|1
23|3|3|800|0.05|50|05-OCT-17 09.58.22.736000000 AM|1

sqHow to get sum values weekly based between two dates?

My table values like ...
Date Amt Cash Money Name
15-Jun 100 10 20 GUL
16-Jun 200 20 40 ABC
20-Jun 300 30 60 GUL
25-Jun 400 40 80 BCA
28-Jun 500 50 10 GUL
3-Jul 600 60 120 ABC
19-Jun 700 70 140 BCA
26-Jun 800 80 160 ABC
7-Jul 900 90 180 GUL
9-Jul 1000 100 200 ABC
I need to return weekly based sum of values between two date in oracle .My expected output.
Date Amt Cash Mony
13 to 19 June 1000 100 200
20 to 26 June 1500 150 300
27 to3 July 1100 110 130
4 to 10 July 1900 190 380
you can achieve this by a case statement:
e.g.
-- test data
with data(dat,
val1,
val2) as
(select sysdate - 7, 12, 13
from dual
union all
select sysdate - 6, 32, 1
from dual
union all
select sysdate - 5, 52, 53
from dual
union all
select sysdate - 4, 2, 16
from dual
union all
select sysdate - 3, 72, 154
from dual)
select -- build up your groups
case
when d.dat < to_date('28.09.2016', 'DD.MM.YYYY') then
'<28.09.'
when d.dat > to_date('30.09.2016', 'DD.MM.YYYY') then
'>30.09.'
else
'28.-30.'
end as grp,
sum(val1),
sum(val2)
from data d
group by case
when d.dat < to_date('28.09.2016', 'DD.MM.YYYY') then
'<28.09.'
when d.dat > to_date('30.09.2016', 'DD.MM.YYYY') then
'>30.09.'
else
'28.-30.'
end;
-- output
grp sum(val1) sum(val2)
28.-30. 84 54
<28.09. 12 13
>30.09. 74 170
To group by calendar week use
-- test data
with data(dat,
val1,
val2) as
(select sysdate - 9, 12, 13
from dual
union all
select sysdate - 6, 32, 1
from dual
union all
select sysdate - 5, 52, 53
from dual
union all
select sysdate - 4, 2, 16
from dual
union all
select sysdate + 3, 72, 154
from dual)
select TRUNC(dat, 'iw') ||'-'|| TRUNC(dat+7, 'iw'),
sum(val1),
sum(val2)
from data
group by TRUNC(dat, 'iw') ||'-'|| TRUNC(dat+7, 'iw');
The query below has the input dates (from and to) in the first factored subquery. Those can be made into bind variables, or whatever mechanism you want to use to pass these inputs to the query. Then I have the test data in the second factored subquery; you don't need that in your final solution. I create all the needed weeks in the "weeks" factored subquery and I use a left outer join, so that weeks with no transactions will show 0 sums. Note that in the main query, where I do a join, the "date" column from the base table is not enclosed within any kind of function; this allows the use of an index on that column, which you should have if the table is very large, or if performance may be a concern for any other reason. Note that the output is different from yours (missing the last row) because I input a to-date before the last row in the table. That is intentional, I wanted to make sure the query works correctly. Also: I didn't use "date" or "week" as column names; that is a very poor practice. Reserved Oracle keywords should not be used as column names. I used "dt" and "wk" instead.
with
user_inputs ( from_dt, to_dt ) as (
select to_date('4-Jun-2016', 'dd-Mon-yyyy'), to_date('3-Jul-2016', 'dd-Mon-yyyy') from dual
),
test_data ( dt, amt, cash, money, name ) as (
select to_date('15-Jun-2016', 'dd-Mon-yyyy'), 100, 10, 20, 'GUL' from dual union all
select to_date('16-Jun-2016', 'dd-Mon-yyyy'), 200, 20, 40, 'ABC' from dual union all
select to_date('20-Jun-2016', 'dd-Mon-yyyy'), 300, 30, 60, 'GUL' from dual union all
select to_date('25-Jun-2016', 'dd-Mon-yyyy'), 400, 40, 80, 'BCA' from dual union all
select to_date('28-Jun-2016', 'dd-Mon-yyyy'), 500, 50, 10, 'GUL' from dual union all
select to_date( '3-Jul-2016', 'dd-Mon-yyyy'), 600, 60, 120, 'ABC' from dual union all
select to_date('19-Jun-2016', 'dd-Mon-yyyy'), 700, 70, 140, 'BCA' from dual union all
select to_date('26-Jun-2016', 'dd-Mon-yyyy'), 800, 80, 160, 'ABC' from dual union all
select to_date( '7-Jul-2016', 'dd-Mon-yyyy'), 900, 90, 180, 'GUL' from dual union all
select to_date( '9-Jul-2016', 'dd-Mon-yyyy'), 1000, 100, 200, 'ABC' from dual
),
weeks ( start_dt ) as (
select trunc(from_dt, 'iw') + 7 * (level - 1)
from user_inputs
connect by level <= 1 + (to_dt - trunc(from_dt, 'iw')) / 7
)
select to_char(w.start_dt, 'dd-Mon-yyyy') || ' - ' ||
to_char(w.start_dt + 6, 'dd-Mon-yyyy') as wk,
nvl(sum(t.amt), 0) as tot_amt, nvl(sum(t.cash), 0) as tot_cash,
nvl(sum(t.money), 0) as tot_money
from weeks w left outer join test_data t
on t.dt >= w.start_dt and t.dt < w.start_dt + 7
group by start_dt
order by start_dt
;
Output:
WK TOT_AMT TOT_CASH TOT_MONEY
-------------------------------------------- ---------- ---------- ----------
30-May-2016 - 05-Jun-2016 0 0 0
06-Jun-2016 - 12-Jun-2016 0 0 0
13-Jun-2016 - 19-Jun-2016 1000 100 200
20-Jun-2016 - 26-Jun-2016 1500 150 300
27-Jun-2016 - 03-Jul-2016 1100 110 130
You can try like below, I chose 13-Jun-2016 as a starting date. You can chose it as per your requirement upto any range of dates.
with t as
(select dt,
min(dt) over (partition by week)||' to '|| max(dt) over (partition by week) week
from (
select to_date('13-Jun-2016','dd-Mon-yyyy')+(level-1) dt,
ceil(level/7) week
from dual
connect by level<=52))
select week,
sum(amt),
sum(cash),
sum(money)
from (
select your_table.*,
t.week
from your_table,t
where trunc(to_date(your_table.dt,'dd-Mon-yyyy'))=trunc(t.dt))
group by week;

SQL Query to designate value based upon finding a value within the previous n-rows

I have a process that records when an event occurs and it is assumed that this event has an influence over the next six hours.
A table has data consisting of Date, Period, BooleanValue; where the Day is dd/mm/yyyy, the Period is a value 1-48 designating the half-hour period (there are 48 during a day) and whether an event has caused a occurred (Impact or NotImpact). As an example, a single event has occurred on 5th Jan 2011 (Period 3) and thus, the table looks like:
Date Period Event
05/01/2011 1 NotImpact
05/01/2011 2 NotImpact
05/01/2011 3 IMPACT
05/01/2011 4 NotImpact
05/01/2011 5 NotImpact
In Excel, I have created forth column and wrote a formula that looks for "IMPACT" in the 'Event' column and if found, flags the next 12 Half Hours as "IMPACT". If it doesn't find "IMPACT", the default value of "NotImpact" is used.
=IF(IF(ISERROR(COUNTIF(E2:E13,"IMPACT")),"NotImpact",COUNTIF(E2:E13,"DIS"))>0,"IMPACT","NotImpact")
Applying this formula, to the forth column, would result it:
Date Period Event ImpactYesNo
05/01/2011 1 NotImpact NotImpact
05/01/2011 2 NotImpact NotImpact
05/01/2011 3 IMPACT IMPACT
05/01/2011 4 NotImpact IMPACT
05/01/2011 5 NotImpact IMPACT
05/01/2011 6 NotImpact IMPACT
05/01/2011 7 NotImpact IMPACT
05/01/2011 8 NotImpact IMPACT
05/01/2011 9 NotImpact IMPACT
05/01/2011 10 NotImpact IMPACT
05/01/2011 11 NotImpact IMPACT
05/01/2011 12 NotImpact IMPACT
05/01/2011 13 NotImpact IMPACT
05/01/2011 14 NotImpact IMPACT
05/01/2011 15 NotImpact NotImpact
05/01/2011 16 NotImpact NotImpact
05/01/2011 17 NotImpact NotImpact
I would much prefer to have this table generated in SQL (this table is residing on a SQL Server 2005 box) and I have attempted to replicate this Excel approach on there (and writing a function in Python) but with no success. I would really grateful if someone could help me out or point me in the right direction.
If I can answer any other questions about what I am asking, please do not hesitate to pop them in the comments
DECLARE
#Intervals INT, -- How many periods per day
#Lasts INT -- Number of intervals effected
SELECT
#Intervals = 48,
#Lasts = 6
-- Use CTE to calculate continous sequence #
;WITH SeqImpact ([Date], Period, [Event], Seq)
AS (Select *, Convert(int, Date) * #Intervals + Period as SEQ from IMPACT)
SELECT
[Date],
Period,
[Event],
CASE (
SELECT Count(*)
FROM SeqImpact History
WHERE
History.Seq <= SeqImpact.Seq
AND History.Seq > SeqImpact.Seq - #Lasts
AND [Event] = 'Impact'
)
WHEN 0 THEN 'NotImpact'
ELSE 'Impact'
END
AS ImpactYesNo
FROM SeqImpact
You can try this. It converts your date and period to datetime and then does some calculations on that. You have to test with your data to see if the performance is good enough or not.
set dateformat dmy
;with C as
(
select dateadd(minute, (Period - 1) * 30, cast([Date] as datetime)) as StartTime,
[Date],
Period,
[Event]
from YourTable
)
select C1.[Date],
C1.Period,
C1.[Event],
coalesce(C2.[Event], C1.[Event]) as ImpactYesNo
from C as C1
left outer join (select StartTime,
dateadd(hour, 6, StartTime) as EndTime,
[Event]
from C
where [Event] = 'IMPACT') as C2
on C1.StartTime >= C2.StartTime and
C1.StartTime < C2.EndTime
order by C1.StartTime
https://data.stackexchange.com/stackoverflow/q/122443/
declare #T table
(
[Date] varchar(10),
Period int,
[Event] varchar(9)
);
-- shortened to 1 hour periods
insert into #T
select '20110501', 1, 'NotImpact' union all
select '20110501', 2, 'NotImpact' union all
select '20110501', 3, 'NotImpact' union all
select '20110501', 4, 'NotImpact' union all
select '20110501', 5, 'NotImpact' union all
select '20110501', 6, 'NotImpact' union all
select '20110501', 7, 'NotImpact' union all
select '20110501', 8, 'NotImpact' union all
select '20110501', 9, 'NotImpact' union all
select '20110501', 10, 'NotImpact' union all
select '20110501', 11, 'NotImpact' union all
select '20110501', 12, 'NotImpact' union all
select '20110501', 13, 'NotImpact' union all
select '20110501', 14, 'NotImpact' union all
select '20110501', 15, 'IMPACT' union all
select '20110501', 16, 'NotImpact' union all
select '20110501', 17, 'NotImpact' union all
select '20110501', 18, 'NotImpact' union all
select '20110501', 19, 'NotImpact' union all
select '20110501', 20, 'NotImpact' union all
select '20110501', 21, 'NotImpact' union all
select '20110501', 22, 'NotImpact' union all
select '20110501', 23, 'NotImpact' union all
select '20110501', 24, 'NotImpact' union all
select '20110601', 1, 'NotImpact' union all
select '20110601', 2, 'NotImpact' union all
select '20110601', 3, 'NotImpact' union all
select '20110601', 4, 'NotImpact' union all
select '20110601', 5, 'NotImpact' union all
select '20110601', 6, 'NotImpact' union all
select '20110601', 7, 'NotImpact' union all
select '20110601', 8, 'NotImpact' union all
select '20110601', 9, 'NotImpact' union all
select '20110601', 10, 'NotImpact' union all
select '20110601', 11, 'NotImpact' union all
select '20110601', 12, 'NotImpact' union all
select '20110601', 13, 'NotImpact' union all
select '20110601', 14, 'NotImpact' union all
select '20110601', 15, 'NotImpact' union all
select '20110601', 16, 'NotImpact' union all
select '20110601', 17, 'NotImpact' union all
select '20110601', 18, 'NotImpact' union all
select '20110601', 19, 'NotImpact' union all
select '20110601', 20, 'NotImpact' union all
select '20110601', 21, 'NotImpact' union all
select '20110601', 22, 'NotImpact' union all
select '20110601', 23, 'NotImpact' union all
select '20110601', 24, 'NotImpact';
with cte as
(
select [Date], Period, [Event],
ROW_NUMBER() OVER (ORDER BY [Date], Period) AS rn
from #T
)
SELECT
T1.[Date], T1.Period,
ISNULL(T2.[Event], T1.[Event]),
T1.rn , T2.rn
FROM
cte T1
LEFT JOIN
cte T2 ON T1.rn BETWEEN T2.rn AND T2.rn + 12 AND T2.[Event] = 'Impact'