Oracle recursive CTE to check consecutive date rows

Oracle recursive CTE to check consecutive date rows - sql

In trying to make use of a recursive common table expressions and lead analytical function that checks for 'N>1'
consecutive absent_dates but seem to be struggling.
Note I know the employees table isn't included in the query yet to obtain first_name and last_name as I am trying to keep the test case as simple as possible.
Below is my test CASE.
The desired output should be as follows:
EMPLOYEE_ID ABSENT_DATE
1 14-JUL-21 Jane Doe
1 15-JUL-21 Jane Doe
1 30-JUL-21 Jane Doe
1 31-JUL-21 Jane Doe
4 22-JUL-21 Mike Jones
4 23-JUL-21 Mike Jones
Create table employees(
employee_id NUMBER(6),
first_name VARCHAR2(20),
last_name VARCHAR2(20),
card_num VARCHAR2(10),
work_days VARCHAR2(7)
);
ALTER TABLE employees
ADD ( CONSTRAINT employees_pk
PRIMARY KEY (employee_id));
INSERT INTO employees
(
EMPLOYEE_ID,
first_name,
last_name,
card_num,
work_days
)
WITH names AS (
SELECT 1, 'Jane', 'Doe','F123456', 'NYYYYYN'FROM dual UNION ALL
SELECT 2, 'Madison', 'Smith','R33432','NYYYYYN'
FROM dual UNION ALL
SELECT 3, 'Justin', 'Case','C765341','NYYYYYN'
FROM dual UNION ALL
SELECT 4, 'Mike', 'Jones','D564311','NYYYYYN' FROM dual )
SELECT * FROM names;
create table absences(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
employee_id NUMBER(6),
absent_date DATE,
constraint absence_chk check (absent_date=trunc(absent_date, 'dd')),
constraint absence_pk primary key (employee_id, absent_date)
);
begin
insert into absences values (1,1, date'2021-07-21');
insert into absences values (2,4, date'2021-07-22');
insert into absences values (3,4, date'2021-07-23');
insert into absences values (4,4, date'2021-07-26');
insert into absences values (5,1, date'2021-07-30');
insert into absences values (6,1, date'2021-07-31');
insert into absences values (7,4, date'2021-07-13');
insert into absences values (8,1, date'2021-07-14');
insert into absences values (9,1, date'2021-07-15');
commit;
end;
-- Different solutions to answer my question
WITH multi_day as (
-- Uses tabibitosan method to look for sequential groups
select employee_id
,absent_date
-- tabibitosan... date-row number gives a constant value (in this case date)
-- where the dates are sequential
,absent_date-row_number() over (partition by employee_id order by absent_date) as grp
from absences
order by. employee_id, absent_date
)
select. m.employee_id
,e.first_name
,e.last_name
,min(m.absent_date) as start_of_absence
,max(m.absent_date) as end_of_absensce
,count(*) as days_absent
from multi_day m
join employees e on (e.employee_id = m.employee_id)
group by m.employee_id
,m.grp
,e.first_name
,e.last_name
having count(*) > 1
order by 1,2;
WITH tab as (
-- Uses tabibitosan method to look for sequential groups
select employee_id
,absent_date
-- tabibitosan... date-row number gives a constant value (in this case date)
-- where the dates are sequential
,absent_date-row_number() over (partition by employee_id order by absent_date) as grp
from absences
)
,multi_day as (
select employee_id
,absent_date
,count(*) over (partition by employee_id, grp) as grp_cnt
from tab
)
select m.employee_id
,e.first_name
,e.last_name
,m.absent_date
from multi_day m
join employees e on (e.employee_id = m.employee_id)
where grp_cnt > 1
order by 1,2;
WITH consecutive_absences AS
(
SELECT a.absent_date,
a.employee_id,
e.first_name,
e.last_name,
LEAD (a.absent_date) OVER ( PARTITION BY a.employee_id
ORDER BY a.absent_date
) AS next_date
, LAG (a.absent_date) OVER ( PARTITION BY a.employee_id
ORDER BY a.absent_date
) AS prev_date
FROM absences a
join employees e on (e.employee_id = a.employee_id)
)
SELECT employee_id,
first_name,
last_name,
absent_date
FROM consecutive_absences
ORDER BY employee_id, absent_date;
SELECT a.employee_id, a.absent_date
, e.first_name, e.last_name
FROM absences
MATCH_RECOGNIZE
(
PARTITION BY employee_id
ORDER BY absent_date
ALL ROWS PER MATCH
PATTERN (frst nxt +)
DEFINE nxt AS absent_date <= PREV (absent_date) + 1
) a
JOIN employees e ON e.employee_id = a.employee_id
ORDER BY employee_id, absent_date
;

You needn't recursion. Query absences using lead/lag
select *
from (
select a.*, lead(absent_date) over(partition by employee_id order by absent_date) nxt,
lag(absent_date) over(partition by employee_id order by absent_date) prev
from absences a
) t
where absent_date = prev + 1 or absent_date = nxt - 1;
Then join this to employees.

Related

Get all rows from table and sort them by timestamp

I have these test tables which I would like to select and combine the result by timestamp:
create table employees
(
id bigint primary key,
account_id integer,
first_name varchar(150),
last_name varchar(150),
timestamp timestamp
);
create table accounts
(
id bigint primary key,
account_name varchar(150) not null,
timestamp timestamp
);
create table short_name
(
account_id bigint primary key,
full_name varchar(150) not null
);
INSERT INTO short_name(account_id, full_name)
VALUES(1, 'city 1');
INSERT INTO short_name(account_id, full_name)
VALUES(2, 'city 2');
INSERT INTO employees(id, account_id, first_name, last_name, timestamp)
VALUES(1, 1, 'Donkey', 'Kong', '10-10-10');
INSERT INTO employees(id, account_id, first_name, last_name, timestamp)
VALUES(2, 2, 'Ray', 'Kurzweil', '11-10-10');
INSERT INTO employees(id, account_id, first_name, last_name, timestamp)
VALUES(32, 2, 'Ray2', 'Kurzweil2', '1-10-10');
INSERT INTO employees(id, account_id, first_name, last_name, timestamp)
VALUES(33, 2, 'Ray3', 'Kurzweil3', '2-10-10');
INSERT INTO employees(id, account_id, first_name, last_name, timestamp)
VALUES(3432, 3, 'Percy', 'Fawcett', '6-10-10');
INSERT INTO accounts(id, account_name, timestamp)
VALUES(1, 'DK Banana Account', '5-10-10');
INSERT INTO accounts(id, account_name, timestamp)
VALUES(2, 'Kurzweil''s invetions moneyz baby!', '10-10-10');
INSERT INTO accounts(id, account_name, timestamp)
VALUES(3, 'Amazonian Emergency Fund', '10-10-10');
select *, e.timestamp, sn.full_name from employees e
INNER JOIN short_name as sn on sn.account_id = e.id
union all
select *, a.timestamp from accounts a
where timestamp >= '2022-03-25T13:00:00'
and timestamp < '2022-04-04T13:00:00'
AND timestamp IS NOT NULL
order by timestamp;
https://www.db-fiddle.com/f/pwzwQTsHuP27UDF17eAQy4/36
How I can select the tables and display a combined table rows ordered by timestamp?
The problem is that I have a different number of table columns and I would like to display them also and globally to sort all rows by timestamp.
Is it possible to display also the name of the tables as a first column into the select result?
Example result for result with table name:
table_name
timestamp
employees
2010-10-10T00:00:00.000Z
accounts
2010-11-10T00:00:00.000Z

As others have mentioned, you haven't given a clear example of what you want the output to be; however, here's my attempt assuming you want one record per employee and one additional record per account.
Each row of the result set contains every possible column. These can be removed/reordered in the final select.
Query
with accounts_and_employees as (
select
'accounts' as table_name,
accounts.id,
accounts.id as account_id,
accounts.timestamp,
account_name,
null as first_name,
null as last_name
from accounts
union
select
'employees' as table_name,
employees.id,
account_id,
employees.timestamp,
account_name,
first_name,
last_name
from employees
join accounts
on employees.account_id = accounts.id
)
select accounts_and_employees.*, full_name
from accounts_and_employees
left join short_name
on short_name.account_id = accounts_and_employees.account_id
where timestamp between '2010-01-10' and '2010-10-30'
order by timestamp;
table_name
id
account_id
timestamp
account_name
first_name
last_name
full_name
employees
32
2
2010-01-10T00:00:00.000Z
Kurzweil's invetions moneyz baby!
Ray2
Kurzweil2
city 2
employees
33
2
2010-02-10T00:00:00.000Z
Kurzweil's invetions moneyz baby!
Ray3
Kurzweil3
city 2
accounts
1
1
2010-05-10T00:00:00.000Z
DK Banana Account
city 1
employees
3432
3
2010-06-10T00:00:00.000Z
Amazonian Emergency Fund
Percy
Fawcett
accounts
3
3
2010-10-09T00:00:00.000Z
Amazonian Emergency Fund
employees
1
1
2010-10-10T00:00:00.000Z
DK Banana Account
Donkey
Kong
city 1
accounts
2
2
2010-10-10T00:00:00.000Z
Kurzweil's invetions moneyz baby!
city 2
View on DB Fiddle

If your output is just table name and timestamp, then you don't need any JOIN.
Just UNION employees and accounts.
select tablename, timestamp from
(select 'accounts' tablename, timestamp from accounts
union
select 'employees' tablename, timestamp from employees) a
order by timestamp
Otherwise, since the tables don't have same columns names, you'll need to make them having same names using column aliases.
select tablename, name, timestamp from
(select 'accounts' tablename, account_name 'name', timestamp from accounts
union
select 'employees' tablename, concat(first_name,last_name) 'name', timestamp from employees
) a
order by timestamp

I am unsure what you try to achieve. but you have to "pad" the number of columns missing, but the second query doesn't have any rpws so you don't see it in you fiddle
select *, e.timestamp, sn.full_name from employees e
INNER JOIN short_name as sn on sn.account_id = e.id
Union all
select null,null,null,null,null,NULL,NULL, a.timestamp,''
from accounts a
where timestamp >= '2022-03-25T13:00:00'
and timestamp < '2022-04-04T13:00:00'
AND timestamp IS NOT NULL
order by 8;

SELECT 'employee' as type, e.id, e.timestamp, sn.full_name
FROM employees e
INNER JOIN
short_name as sn on sn.account_id = e.id
UNION ALL
SELECT 'account' as type, a.id, a.timestamp, '' as short_name
FROM
accounts a
WHERE timestamp IS NOT NULL
ORDER BY timestamp;
You can use dummy-columns to make the column-count fitting.
To show the concept, I dropped the timestamp-filter. You can of course re-add it and also select additional columns (but of course then you also have to use some more dummy-columns)

SQL query to compare multiple columns in same table in oracle

I have a requirement to find emplid having data difference in same table. Table consist of 50-60 columns.. I need to check if any column has change in data from previous row, emplidshould get pick up as well as if any new employee get add that also needs to pick up..
I have created a basic query and it is working but need some way to achieve same purpose as I do not want to write every column name.
My query:
select
emplid
from
ps_custom_tbl t, ps_custom_tbl prev_t
where
prev_t.emplid = t.emplid
and t.effdt = (select max effdt from ps_custom_tbl t2
where t2.emplid = t.emplid)
and prev_t.effdt = (select max(effdt) from ps_custom_tbl prev_t2
where emplid = prev_t.emplid and effdt < t.effdt)
and (t.first_name prev_t.first_name Or t.last_name prev_t.last_name …. 50 columns);
Can you please suggest another way to achieve same thing?

You can use MINUS.
if no_data then both are the same, if there are some records - mean that there is a difference between
create table emp as select * from hr.employees;
insert into emp select employee_id+1000, first_name, last_name, email, phone_number, hire_date, job_id, salary, commission_pct, manager_id,
decode(department_id ,30,70, department_id)
from hr.employees;
select first_name, last_name, email, phone_number, hire_date, job_id, salary, commission_pct, manager_id, department_id
from emp where employee_id <= 1000
minus
select first_name, last_name, email, phone_number, hire_date, job_id, salary, commission_pct, manager_id, department_id
from emp where employee_id > 1000;
But you have to list all columns, because if you have eg different dates or ids - they will be compared too. But it's easier to list columns in SELECT clause then write for everyone WHERE condition.
Maybe it will help.
-- or if different tables and want to compare all cols simply do
drop table emp;
create table emp as select * from hr.employees;
create table emp2 as
select employee_id, first_name, last_name, email, phone_number, hire_date, job_id, salary, commission_pct, manager_id,
decode(department_id ,30,70, department_id) department_id
from hr.employees;
select * from emp
minus
select * from emp2;
---- ADD DATE CRITERIA
-- yes, you can add date criteria and using analytical functions check which
-- is newer and which is
older and then compare one to another. like below:
drop table emp;
create table emp as select * from hr.employees;
insert into emp
select
employee_id,
first_name,
last_name,
email,
phone_number,
hire_date+1,
job_id,
salary,
commission_pct,
manager_id,
decode(department_id ,30,70, department_id)
from hr.employees;
with data as --- thanks to WITH you retrieve data only once
(select employee_id, first_name, last_name, email, phone_number,
hire_date,
row_number() over(partition by employee_id order by hire_date desc) rn -- distinguish newer and older record,
job_id, salary, commission_pct, manager_id, department_id
from emp)
select employee_id, first_name, last_name, email, phone_number, department_id from data where rn = 1
MIUNUS--- find the differences
select employee_id, first_name, last_name, email, phone_number, department_id from data where rn = 2;

You will have to write all columns in some sense no matter what you do.
In terms of comparing current and previous, you might find this easier
select
col1,
col2,
...
lag(col1) over ( partition by empid order by effdt ) as prev_col1,
lag(col2) over ( partition by empid order by effdt ) as prev_col2
...
and then you comparison will be along the lines of
select *
from ( <query above >
where
decode(col1,prev_col1,0,1) = 1 or
decode(col2,prev_col2,0,1) = 1 or
...
The use of DECODE in this way handles the issues of nulls.

My requirement is to send out data to managers, they change any/all/none of the data in the columns, and send back to me. I then have to identify each column that has a difference from what I sent, and mark those columns as changed for a central office reviewer to visually scan and approve/deny the changes for integration back into the central data set.
This solution may not fit your needs of course, but a template structure is offered here that you can augment to meet your needs no matter the number of columns. In the case of your question, 50-60 columns will make this SQL query huge, but I've written heinously long queries in the past with great success. Add columns a few at a time rather than all wholesale according to this template and see if they work along the way.
You could easily write pl/sql to write this query for you for the tables in question.
This would get very cumbersome if you had to compare columns from 3 or more tables or bi-directional changes. I only care about single direction changes. Did the person change my original row columns or not. If so, what columns did they change, and what was my before value and what is their after value, and show me nothing else please.
In other words, only show me rows with columns that have changes with their before values and nothing else.
create table thing1 (id number, firstname varchar2(10), lastname varchar2(10));
create table thing2 (id number, firstname varchar2(10), lastname varchar2(10));
insert into thing1 values (1,'Buddy', 'Slacker');
insert into thing2 values (1,'Buddy', 'Slacker');
insert into thing1 values (2,'Mary', 'Slacker');
insert into thing2 values (2,'Mary', 'Slacke');
insert into thing1 values (3,'Timmy', 'Slacker');
insert into thing2 values (3,'Timm', 'Slacker');
insert into thing1 values (4,'Missy', 'Slacker');
insert into thing2 values (4,'Missy', 'Slacker');
commit;
Un-comment commented select * queries one at a time after each data set to understand what is in each data set at each stage of the refinement process.
with rowdifferences as
(
select
id
,firstname
,lastname
from thing2
minus
select
id
,firstname
,lastname
from thing1
)
--select * from rowdifferences
,thing1matches as
(
select
t1.id
,t1.firstname
,t1.lastname
from thing1 t1
join rowdifferences rd on t1.id = rd.id
)
--select * from thing1matches
, col1differences as
(
select
id
,firstname
from rowdifferences
minus
select
id
,firstname
from thing1matches
)
--select * from col1differences
, col2differences as
(
select
id
,lastname
from rowdifferences
minus
select
id
,lastname
from thing1matches
)
--select * from col2differences
,truedifferences as
(
select
case when c1.id is not null then c1.id
when c2.id is not null then c2.id
end id
,c1.firstname
,c2.lastname
from col1differences c1
full join col2differences c2 on c1.id = c2.id
)
--select * from truedifferences
select
t1m.id
,case when td.firstname is not null then t1m.firstname end beforefirstname
,td.firstname afterfirstname
,case when td.lastname is not null then t1m.lastname end beforelastname
,td.lastname afterlastname
from thing1matches t1m
join truedifferences td on t1m.id = td.id
;

Extract Department each Employee worked for 6 months or more in a year continoulsy

Please below data
Emp. Date. Dept
1. 01/21. Abc
1. 02/15. Xyz
1. 10/19. Cba
2. 01/21. Abc
2. 02/15. Xyz
2. 04. Uvw
Using Oracl Sql i need to extract for each employee in a year if employee worked in department more than or equal to 6 months get that department else current department
Expected result for above data
Emp. Dept
1. Xyz. (Emp workes more than 6 months)
2. Uvw. (No department with 6 months tenure so current department)

You can use the following:
I have created the table and data according to the example.
-- Preparing the data
CREATE TABLE EMPLOYEES (
EMP NUMBER,
DATE1 DATE,
DEPT VARCHAR2(100)
);
INSERT INTO EMPLOYEES VALUES (
1,
TO_DATE('01/21', 'MM/DD'),
'Abc'
);
INSERT INTO EMPLOYEES VALUES (
1,
TO_DATE('02/15', 'MM/DD'),
'Xyz'
);
INSERT INTO EMPLOYEES VALUES (
1,
TO_DATE('10/19', 'MM/DD'),
'Cba'
);
INSERT INTO EMPLOYEES VALUES (
2,
TO_DATE('01/21', 'MM/DD'),
'Abc'
);
INSERT INTO EMPLOYEES VALUES (
2,
TO_DATE('02/15', 'MM/DD'),
'Xyz'
);
INSERT INTO EMPLOYEES VALUES (
2,
TO_DATE('04', 'MM'),
'Uvw'
);
--
-- Final data in the table
SELECT
*
FROM
EMPLOYEES;
-- Your Query
SELECT
EMP,
DEPT
FROM
(
SELECT
EMP,
DEPT,
ROW_NUMBER() OVER(
PARTITION BY EMP
ORDER BY
DURATION DESC NULLS LAST
) AS RN
FROM
(
SELECT
EMP,
DATE1,
DEPT,
MONTHS_BETWEEN(LEAD(DATE1, 1) OVER(
PARTITION BY EMP
ORDER BY
DATE1
), DATE1) AS DURATION
FROM
EMPLOYEES
)
WHERE
DURATION >= 6
OR DURATION IS NULL
)
WHERE
RN = 1;
Output:
Hope, This will be useful to you.
Cheers!!

SQL queries related to join

There are three tables -
employee (emp_id, emp_name)
department (emp_id, emp_dept)
salary (emp_id, emp_sal)
I have to find out maximum salary from each department.
Need following columns in output -
emp_id, emp_name, emp_sal, emp_dept
Note -
emp_id is primary key.
In output, emp_sal should be the maximum salary of the department.

I have put sample data with the query below. I have tested this in SQL Server
create table #dept(emp_id int, emp_dept varchar(30))
create table #emp(emp_id int, emp_name varchar(30))
create table #salary(emp_id int, salary int)
insert into #dept values(1,'HR'), (2,'HR'), (3,'Finance')
insert into #emp values (1, 'Venkat'), (2,'raman'), (3, 'Murugan')
insert into #salary values(1, 5000), (2, 10000), (3, 20000)
select emp_dept, emp_id, emp_name
from
(
select d.emp_dept,d.emp_id, e.emp_name, Row_Number() over (partition by d.emp_dept order by s.salary desc) as rnk
from #Dept AS d
LEFT JOIN #Salary AS s
ON d.emp_id = s.emp_id
LEFT JOIN #emp AS e
ON d.emp_id = e.emp_id
) as t
where rnk = 1

Always try to fetch some data first using simple query and then just make it filter one by one as you want data from that query, so you get a clear idea. Also try below query.
SELECT emp_id, emp_name, emp_dept, max(emp_sal) from (SELECT emp_id, emp_name, emp_dept, emp_sal from salary inner join department on salary.emp_id = department.emp_id inner join employee on salary.emp_id = employee.emp_id ) group by emp_dept
Also see this link.
SELECT rows with MAX(Column value), DISTINCT by another column

Try the below query:
select max(emp_salary), emp_dept from department d join salary s on d.emp_id = s.emp_id group by emp_dept

SQL: How to get the row when max is on more than 1 column

I am working with Microsoft SQL Server 2008 R2.
I have a table named employee:
create table employee (
employee_id bigint not null primary key,
first_name varchar(50) not null,
middle_name varchar(50) null,
last_name varchar(50) not null
)
I have a table named eligibility. It has a FK to employee table. It has a unique key comprise of 3 columns: employee_id + effective_date + sequence_number.
create table eligibility (
eligibility_id bigint not null primary key,
employee_id bigint not null foreign key references employee (employee_id),
effective_date date not null,
sequence_number int not null,
value varchar(20) not null,
constraint UK_eligibility unique (employee_id, effective_date, sequence_number)
)
I have 1 row in employee table with employee_id = 1001:
insert into employee (employee_id, first_name, middle_name, last_name) values (1001, 'A', 'B', 'C')
I have 4 rows in eligibility table for the same employee_id:
insert into eligibility (eligibility_id, employee_id, effective_date, sequence_number, value) values (1, 1001, '2016-04-13', 1, 'NS')
insert into eligibility (eligibility_id, employee_id, effective_date, sequence_number, value) values (2, 1001, '2016-05-25', 1, 'EX')
insert into eligibility (eligibility_id, employee_id, effective_date, sequence_number, value) values (3, 1001, '2016-05-25', 2, 'VR')
insert into eligibility (eligibility_id, employee_id, effective_date, sequence_number, value) values (4, 1001, '2016-06-05', 1, 'LS')
From the eligibility table, for a given date I want to get the row with the max (effective_date + sequence_number) combination which is less than or equal to that given date.
Examples:
For 2016-04-30 date I would want the row with eligibility_id = 1.
For 2016-05-30 date I would want the row with eligibility_id = 3.
For 2016-06-30 date I would want the row with eligibility_id = 4.
I have wrote the query to get the desired results. This is the query for 2016-05-30 date:
select * from eligibility e
where
e.effective_date = (select max(e1.effective_date)
from eligibility e1
where e1.employee_id = e.employee_id and
e1.effective_date <= '2016-05-30') AND
e.sequence_number = (select max(e2.sequence_number)
from eligibility e2
where e2.employee_id = e.employee_id and
e2.effective_date = e.effective_date)
The query is ok but I want to try write it in some different way to get the same results. What other way you would recommend?

Hmmm, I would use row_number():
select e.*
from (select e.*,
row_number() over (partition by employee_id order by effective_date desc, sequence_number desc
) as seqnum
from eligibility e
) e
where seqnum = 1;

This looks to me like TOP-1 with ties:
SELECT TOP 1 WITH TIES *
FROM eligibility e
WHERE e.effective_date <= '2016-05-30'
ORDER BY e.effective_date DESC, sequence_number DESC

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Oracle recursive CTE to check consecutive date rows - sql

Related

Get all rows from table and sort them by timestamp

SQL query to compare multiple columns in same table in oracle

Extract Department each Employee worked for 6 months or more in a year continoulsy

SQL queries related to join

SQL: How to get the row when max is on more than 1 column

Categories

Resources