SQL min() max() function with exception - sql

Here is my simplied code:
SELECT
a.user_id as User_ID,
min(b.a_day) as Date_from,
max(b.a_day) as Date_to,
c.code as ID
FROM a, b, c
WHERE
a_day > (day, -15, getdate())
GROUP BY
a.user_id,
c.code
Query gives the following output:
User ID date_from date_to id
1234567 2016-06-13 2016-06-13 B
1234567 2016-06-17 2016-06-17 A
12345672016-06-18 2016-06-18 A
1234567 2016-06-19 2016-06-19 A
1234567 2016-06-20 2016-06-20 A
1234567 2016-06-21 2016-06-21 B
I need something like this:
User ID date_from date_to id
1234567 2016-06-13 2016-06-13 B
1234567 2016-06-17 2016-06-20 A
1234567 2016-06-21 2016-06-21 B
When I use min() and max() function with group by, it aggregates fine for all records with ID=A but there should be exception for ID=B. I have to aggregate only dates with the same ID day after day.
Any ideas?
Thanks in advance.

You can combine these rows using the following strategy:
Determine where a new grouping begins.
Do a cumulative sum of the flag from (1) to identify each grouping.
Then do the aggregation.
This looks like:
select min(date_from) as date_from, max(date_to) as date_to, id
from (select t.*,
sum(isNewGroup) over (partition by id order by date_from) as grp
from (select t.*,
(case when lag(date_to) over (partition by id order by date_from) >= date_from
then 0 else 1
end) as isNewGroup
from t
) t
) t
group by id, grp;

it's my solution to get min/max continuous date.
try to run the SQL in your oracle.
is it helpful for you?
WITH TEST_DATA AS (
SELECT TO_DATE('20160613', 'YYYYMMDD') AS DATE_FROM, TO_DATE('20160613', 'YYYYMMDD') AS DATE_TO, 'B' AS ID FROM DUAL
UNION ALL
SELECT TO_DATE('20160617', 'YYYYMMDD') AS DATE_FROM, TO_DATE('20160617', 'YYYYMMDD') AS DATE_TO, 'A' AS ID FROM DUAL
UNION ALL
SELECT TO_DATE('20160618', 'YYYYMMDD') AS DATE_FROM, TO_DATE('20160618', 'YYYYMMDD') AS DATE_TO, 'A' AS ID FROM DUAL
UNION ALL
SELECT TO_DATE('20160619', 'YYYYMMDD') AS DATE_FROM, TO_DATE('20160619', 'YYYYMMDD') AS DATE_TO, 'A' AS ID FROM DUAL
UNION ALL
SELECT TO_DATE('20160620', 'YYYYMMDD') AS DATE_FROM, TO_DATE('20160620', 'YYYYMMDD') AS DATE_TO, 'A' AS ID FROM DUAL
UNION ALL
SELECT TO_DATE('20160621', 'YYYYMMDD') AS DATE_FROM, TO_DATE('20160621', 'YYYYMMDD') AS DATE_TO, 'B' AS ID FROM DUAL
)
SELECT
MIN(ID) AS ID,
MIN(DATE_FROM) AS DATE_FROM,
MAX(DATE_TO) AS DATE_TO
FROM (
SELECT
CONNECT_BY_ROOT(DATE_FROM) || CONNECT_BY_ROOT(ID) AS GROUP_KEY,
ROW_NUMBER() OVER(PARTITION BY ID, DATE_FROM, DATE_TO ORDER BY ID, LEVEL DESC) AS DISTINCT_FLG,
DATE_FROM,
DATE_TO,
ID
FROM
TEST_DATA
WHERE ID = CONNECT_BY_ROOT(ID)
CONNECT BY DATE_FROM = PRIOR DATE_TO + 1
ORDER BY DATE_FROM
)
WHERE
DISTINCT_FLG = 1
GROUP BY
GROUP_KEY

Here is mysql solution:
select grp, min(f) f, max(t) t, i
from
(
select x.*
,case when #lastu = i and datediff(f, #lastf)=1 then #gr:=#gr else #gr:=#gr+1 end grp
,#lastu:= i
,#lastf:= f
from
(
select '2016-06-13' f,'2016-06-13' t ,'B' i union all
select '2016-06-17','2016-06-17','A' union all
select '2016-06-18','2016-06-18','A' union all
select '2016-06-19','2016-06-19','A' union all
select '2016-06-20','2016-06-20','A' union all
select '2016-06-21','2016-06-21','B'
order by i, f, t
) x
, (select #gr:=0, #lastu:='', #lastf:='' ) b
) xx
group by grp, i

Related

create time range with 2 columns date_time

The problem I am facing is how to find distinct time periods from multiple time periods with overlap in Teradata ANSI SQL.
For example, the attached tables contain multiple overlapping time periods, how can I combine those time periods into 3 unique time periods in Teradata SQL???
I think I can do it in python with the loop function, but not sure how to do it in SQL
ID
Start Date
End Date
001
2005-01-01
2006-01-01
001
2005-01-01
2007-01-01
001
2008-01-01
2008-06-01
001
2008-04-01
2008-12-01
001
2010-01-01
2010-05-01
001
2010-04-01
2010-12-01
001
2010-11-01
2012-01-01
My expected result is:
ID
start_Date
end_date
001
2005-01-01
2007-01-01
001
2008-01-01
2008-12-01
001
2010-01-01
2012-01-01
From Oracle 12, you can use MATCH_RECOGNIZE to perform a row-by-row comparison:
SELECT *
FROM table_name
MATCH_RECOGNIZE(
PARTITION BY id
ORDER BY start_date
MEASURES
FIRST(start_date) AS start_date,
MAX(end_date) AS end_date
ONE ROW PER MATCH
PATTERN (overlapping_ranges* last_range)
DEFINE overlapping_ranges AS NEXT(start_date) <= MAX(end_date)
)
Which, for the sample data:
CREATE TABLE table_name (ID, Start_Date, End_Date) AS
SELECT '001', DATE '2005-01-01', DATE '2006-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2005-01-01', DATE '2007-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-01-01', DATE '2008-06-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-04-01', DATE '2008-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-01-01', DATE '2010-05-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-04-01', DATE '2010-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-11-01', DATE '2012-01-01' FROM DUAL;
Outputs:
ID
START_DATE
END_DATE
001
2005-01-01 00:00:00
2007-01-01 00:00:00
001
2008-01-01 00:00:00
2008-12-01 00:00:00
001
2010-01-01 00:00:00
2012-01-01 00:00:00
db<>fiddle here
Update: Alternative query
SELECT id,
start_date,
end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) * type AS cnt
FROM table_name
UNPIVOT (dt FOR type IN (start_date AS 1, end_date AS -1))
)
WHERE cnt IN (1,0)
)
PIVOT (MAX(dt) FOR cnt IN (1 AS start_date, 0 AS end_date))
Or, an equivalent that does not use UNPIVOT, PIVOT or ROWNUM and works in both Oracle and PostgreSQL:
SELECT id,
MAX(CASE cnt WHEN 1 THEN dt END) AS start_date,
MAX(CASE cnt WHEN 0 THEN dt END) AS end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, rn) * type AS cnt
FROM (
SELECT r.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY dt ASC, type DESC) AS rn
FROM (
SELECT id, 1 AS type, start_date AS dt FROM table_name
UNION ALL
SELECT id, -1 AS type, end_date AS dt FROM table_name
) r
) p
) s
WHERE cnt IN (1,0)
) t
GROUP BY id, grp
Update 2: Another Alternative
SELECT id,
MIN(start_date) AS start_date,
MAX(end_Date) AS end_date
FROM (
SELECT t.*,
SUM(CASE WHEN start_date <= prev_max THEN 0 ELSE 1 END)
OVER (PARTITION BY id ORDER BY start_date) AS grp
FROM (
SELECT t.*,
MAX(end_date) OVER (
PARTITION BY id ORDER BY start_date
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS prev_max
FROM table_name t
) t
) t
GROUP BY id, grp
db<>fiddle Oracle PostgreSQL
This is a gaps and islands problem. Try this:
with u as
(select ID, start_date, end_date,
case
when start_date <= lag(end_date) over(partition by ID order by start_date, end_date) then 0
else 1 end as grp
from table_name),
v as
(select ID, start_date, end_date,
sum(grp) over(partition by ID order by start_date, end_date) as island
from u)
select ID, min(start_date) as start_Date, max(end_date) as end_date
from v
group by ID, island;
Fiddle
Basically you can identify "islands" by comparing start_date of current row to end_date of previous row (ordered by start_date, end_date), if it precedes it then it's the same island. Then you can do a rolling sum() to get the island numbers. Finally select min(start_date) and max(end_date) from each island to get the desired output.
This may work ,with little bit of change in function , I tried it in Dbeaver :
select ID,Start_Date,End_Date
from
(
select t.*,
dense_rank () over(partition by extract (year from Start_Date) order BY End_Date desc) drnk
from testing_123 t
) temp
where temp.drnk = 1
ORDER BY Start_Date;
Try this
WITH a as (
SELECT
ID,
LEFT(Start_Date, 4) as Year,
MIN(Start_Date) as New_Start_Date
FROM
TAB1
GROUP BY
ID,
LEFT(Start_Date, 4)
), b as (
SELECT
a.ID,
Year,
New_Start_Date,
End_Date
FROM
a
LEFT JOIN
TAB1
ON LEFT(a.New_Start_Date, 4) = LEFT(TAB1.Start_Date, 4)
)
select
ID,
New_Start_Date as Start_Date,
MAX(End_Date)
from
b
GROUP BY
ID,
New_Start_Date;
Example: https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=97f91b68c635aebfb752538cdd752ace

sql oracle goup by on dates with possibilities of null values

I have a table with emplid and end_date columns. I want from all emplids the max end_dates. If at least one end_date is null, I want to have the null value as max. So in this example:
emplid end_date
1 05/04/2019
1 05/10/2019
1 null
2 05/04/2019
2 05/10/2019
I want as result:
emplid end_date
1 null
2 05/10/2019
I tried something like
select emplid,
CASE
WHEN MAX(NVL(end_Date,'01/01/3000'))='01/01/3000' THEN null
ELSE end_date
END as end_dt
from people
group by emplid
then I get a group-by error.
Maybe it is very easy, but I don't figure out how to get properly what I want.
with s(id, dt) as (
select 1, to_date('05/04/2019', 'dd/mm/yyyy') from dual union all
select 1, to_date('05/10/2019', 'dd/mm/yyyy') from dual union all
select 1, null from dual union all
select 2, to_date('05/04/2019', 'dd/mm/yyyy') from dual union all
select 2, to_date('05/10/2019', 'dd/mm/yyyy') from dual)
select id, decode(count(dt), count(*), max(dt)) max_dt
from s
group by id;
ID MAX_DT
---------- -----------------------------
1
2 2019-10-05 00:00:00
I would simply do:
select emplid,
(case when count(*) = count(end_date)
then max(end_date)
end) as max_end_date
from t
group by emplid;
There is no reason to introduce a "magic" maximum value (even if it is correct).
The first expression in the case is simply asking "do the number of non-NULL end-date values match the number of rows".
Try this
SELECT
EMPLID,
CASE WHEN END_DATE='01/01/3000' THEN NULL ELSE END_DATE END AS END_DT
FROM
(
SELECT EMPLID, MAX(END_DATE) AS END_DATE FROM
(
SELECT EMPLID, NVL(END_DATE,'01/01/3000') AS END_DATE FROM PEOPLE
)
GROUP BY EMPLID
);
Case does not go with group by , you have to get the max value using group by first then evaluate the null values. Try below.
select empid, CASE WHEN NVL(eDate,'01-DEC-3000')='01-DEC-3000' THEN null ELSE edate end end_dt from (
select empid, MAX(NVL(eDate,'01-DEC-3000')) eDate
from
(select 1 empid, sysdate-100 edate from dual union all
select 1 empid, sysdate-10 edate from dual union all
select 1 empid, null edate from dual union all
select 2 empid, sysdate-105 edate from dual union all
select 2 empid, sysdate-1 edate from dual ) datad
group by empid);

Query to find changes in a row wrt previous row in SQL query

I have a table per_all_Assignments_f with date_from and date_to and following column structure :
PERSON_ID DATE_FROM DATE_TO GRADE
--------- ------------ ----------- -----
12 01-Jan-2018 28-Feb-2018 c
12 01-Mar-2018 29-Mar-2018 a
12 30-Mar-2018 31-dec-4712 b
13 01-jan-2018 31-dec-4712 c
In the above table, I have to retrieve the latest grade change i.e. for person_id '12', I have to retrieve both record rows : 30-mar-2018 to 31 dec 4712 being the latest and one prior row. What function can i use for this ?
solved by :
SELECT person_id,
asg.grade_id,
lag(asg.grade_id) Over (Partition By person_ID Order By start_date) as prev_ppg_line1,
lag(start_date) Over (Partition By person_ID Order By start_date)
as prev_ppg_effective_start_date,
start_date,
row_Number() Over (Partition By person_ID Order By effective_start_date) as rn
FROM asg_table asg
WHERE person_id = 12;
This query will fetch 3 rows with all the previous changes. I want to fetch the latest change only without using max on effective start date
You can use row_number and lead analytic functions together inside the subquery as :
select person_id, date_From, date_to, grade
from
(
with per_all_Assignments_f(person_id, date_From, date_to, grade) as
(
select 12,date'2018-01-01',date'2018-02-28','c' from dual union all
select 12,date'2018-03-01',date'2018-03-29','a' from dual union all
select 12,date'2018-03-30',date'4172-12-31','b' from dual union all
select 13,date'2018-01-01',date'4172-12-31','c' from dual
)
select t.*,
lead(grade) over (order by date_From desc) as ld,
row_number() over (order by date_From desc) as rn
from per_all_Assignments_f t
)
where rn <= 2
and grade != ld
order by rn desc;
PERSON_ID DATE_FROM DATE_TO GRADE
---------- ----------- ---------- -------
12 01.03.2018 29.03.2018 a
12 30.03.2018 31.12.4172 b
Rextester Demo
Seems like you just want all with a row_number() of 1 or 2 partitioned by the person and ordered by the beginning descending.
SELECT person_id,
date_from,
date_to,
grade
FROM (SELECT person_id,
date_from,
date_to,
grade,
row_number() OVER (PARTITION BY person_id
ORDER BY date_from DESC) rn
FROM per_all_assignments_f t) x
WHERE rn IN (1, 2)
ORDER BY person_id ASC,
date_from DESC;

select periods from date

I have a problem with choosing from the list of absences, those that follow one another and grouping them into periods.
date_from (data_od) date_to(data_do)
--------------------------
18/08/01 - 18/08/15
18/08/16 - 18/08/20
18/08/21 - 18/08/31
18/09/01 - 18/09/08
18/05/01 - 18/05/31
18/06/01 - 18/06/30
18/03/01 - 18/03/18
18/02/14 - 18/02/28
above is a list of absences, and the result of which should be a table:
date_from (data_od) date_to(data_do)
--------------------------
18/08/01 18/09/08
18/05/01 18/06/30
18/02/14 18/03/18
For now, I did something like this, but I only research in twos :(
SELECT u1.data_od,u2.data_do
FROM l_absencje u1 CROSS APPLY
(SELECT * FROM l_absencje labs
WHERE labs.prac_id=u1.prac_id AND
TRUNC(labs.data_od) = TRUNC(u1.data_do)+1
ORDER BY id DESC FETCH FIRST 1 ROWS ONLY
) u2 where u1.prac_id=1067 ;
And give me that:
18/08/01 18/08/20 bad
18/08/16 18/08/31 bad
18/08/21 18/09/08 bad
18/05/01 18/06/30 good
18/02/14 18/03/18 good
You can use a combination of the LAG(), LEAD() and LAST_VALUE() analytic functions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE absences ( date_from, date_to ) AS
SELECT DATE '2018-08-01', DATE '2018-08-15' FROM DUAL UNION ALL
SELECT DATE '2018-08-16', DATE '2018-08-20' FROM DUAL UNION ALL
SELECT DATE '2018-08-21', DATE '2018-08-31' FROM DUAL UNION ALL
SELECT DATE '2018-09-01', DATE '2018-09-08' FROM DUAL UNION ALL
SELECT DATE '2018-05-01', DATE '2018-05-31' FROM DUAL UNION ALL
SELECT DATE '2018-06-01', DATE '2018-06-30' FROM DUAL UNION ALL
SELECT DATE '2018-03-01', DATE '2018-03-18' FROM DUAL UNION ALL
SELECT DATE '2018-02-14', DATE '2018-02-28' FROM DUAL;
Query 1:
SELECT *
FROM (
SELECT CASE
WHEN date_to IS NOT NULL
THEN LAST_VALUE( date_from ) IGNORE NULLS
OVER( ORDER BY ROWNUM )
END AS date_from,
date_to
FROM (
SELECT CASE date_from
WHEN LAG( date_to ) OVER ( ORDER BY date_to )
+ INTERVAL '1' DAY
THEN NULL
ELSE date_from
END AS date_from,
CASE date_to
WHEN LEAD( date_from ) OVER ( ORDER BY date_from )
- INTERVAL '1' DAY
THEN NULL
ELSE date_to
END AS date_to
FROM absences
)
)
WHERE date_from IS NOT NULL
AND date_to IS NOT NULL
Results:
| DATE_FROM | DATE_TO |
|----------------------|----------------------|
| 2018-02-14T00:00:00Z | 2018-03-18T00:00:00Z |
| 2018-05-01T00:00:00Z | 2018-06-30T00:00:00Z |
| 2018-08-01T00:00:00Z | 2018-09-08T00:00:00Z |

Finding a 'run' of rows from an ordered result set

I'm trying to figure out a way of identifying a "run" of results (successive rows, in order) that meet some condition. Currently, I'm ordering a result set, and scanning by eye for particular patterns. Here's an example:
SELECT the_date, name
FROM orders
WHERE
the_date BETWEEN
to_date('2013-09-18',..) AND
to_date('2013-09-22', ..)
ORDER BY the_date
--------------------------------------
the_date | name
--------------------------------------
2013-09-18 00:00:01 | John
--------------------------------------
2013-09-19 00:00:01 | James
--------------------------------------
2013-09-20 00:00:01 | John
--------------------------------------
2013-09-20 00:00:02 | John
--------------------------------------
2013-09-20 00:00:03 | John
--------------------------------------
2013-09-20 00:00:04 | John
--------------------------------------
2013-09-21 16:00:01 | Jennifer
--------------------------------------
What I want to extract from this result set is all the rows attributed to John on 2013-09-20. Generally what I'm looking for is a run of results from the same name, in a row, >= 3. I'm using Oracle 11, but I'm interested to know if this can be achieved with strict SQL, or if some kind of analytical function must be used.
You need multiple nested window functions:
SELECT *
FROM
(
SELECT the_date, name, grp,
COUNT(*) OVER (PARTITION BY grp) AS cnt
FROM
(
SELECT the_date, name,
SUM(flag) OVER (ORDER BY the_date) AS grp
FROM
(
SELECT the_date, name,
CASE WHEN LAG(name) OVER (ORDER BY the_date) = name THEN 0 ELSE 1 END AS flag
FROM orders
WHERE
the_date BETWEEN
TO_DATE('2013-09-18',..) AND
TO_DATE('2013-09-22', ..)
) dt
) dt
) dt
WHERE cnt >= 3
ORDER BY the_date
Try this
WITH ORDERS
AS (SELECT
TO_DATE ( '2013-09-18 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-19 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'James' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:02',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:03',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:04',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-21 16:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'Jennifer' AS NAME
FROM
DUAL)
SELECT
B.*
FROM
(SELECT
TRUNC ( THE_DATE ) THE_DATE,
NAME,
COUNT ( * )
FROM
ORDERS
WHERE
THE_DATE BETWEEN TRUNC ( TO_DATE ( '2013-09-18',
'YYYY-MM-DD' ) )
AND TRUNC ( TO_DATE ( '2013-09-22',
'YYYY-MM-DD' ) )
GROUP BY
TRUNC ( THE_DATE ),
NAME
HAVING
COUNT ( * ) >= 3) A,
ORDERS B
WHERE
A.NAME = B.NAME
AND TRUNC ( A.THE_DATE ) = TRUNC ( B.THE_DATE );
OUTPUT
9/20/2013 12:00:01 AM John
9/20/2013 12:00:02 AM John
9/20/2013 12:00:03 AM John
9/20/2013 12:00:04 AM John