Oracle SQL Uniquely Update Duplicate Records - sql

I have a STUDENT table and need to update the STUDENT_ID values by prefixing with the letter SS followed by STUDENT_ID value. For any duplicate STUDENT_ID records, I should prefix the duplicate records as SS1 SS2. Below is an example
Before Update:
NUM
STUDENT_ID
1
9234
2
9234
3
9234
4
3456
5
3456
6
789
7
956
After Update:
NUM
STUDENT_ID
1
SS9234
2
SS19234
3
SS29234
4
SS3456
5
SS13456
6
SS789
7
SS956
Below is the query for updating the STUDENT_ID for unique records.
update student set student_id = 'SS'||student_id ;
commit;
Need suggestion for updating the STUDENT_ID for duplicate records. There are around 1 million duplicate records in the table and total volume is around 40 million. Appreciate for any inputs for performance enhancement.

You can use a MERGE statement correlated on the ROWID pseudo-column and using the ROW_NUMBER() analytic function:
MERGE INTO table_name dst
USING (
SELECT ROWID as rid,
ROW_NUMBER() OVER (PARTITION BY student_id ORDER BY num) AS rn
FROM table_name
) src
ON (src.rid = dst.ROWID)
WHEN MATCHED THEN
UPDATE
SET student_id = 'SS' || CASE WHEN rn > 1 THEN rn - 1 END || dst.student_id;
Which, for the sample data:
CREATE TABLE table_name (NUM, STUDENT_ID) AS
SELECT 1, CAST('9234' AS VARCHAR2(20)) FROM DUAL UNION ALL
SELECT 2, '9234' FROM DUAL UNION ALL
SELECT 3, '9234' FROM DUAL UNION ALL
SELECT 4, '3456' FROM DUAL UNION ALL
SELECT 5, '3456' FROM DUAL UNION ALL
SELECT 6, '789' FROM DUAL UNION ALL
SELECT 7, '956' FROM DUAL;
Then after the MERGE the table contains:
NUM
STUDENT_ID
1
SS9234
2
SS19234
3
SS29234
4
SS3456
5
SS13456
6
SS789
7
SS956
fiddle

I'm sure there must be a better way, but this query can get the job done:
update t
set student_id = (
select new_student_id
from (
select x.*, 'SS' || case when rn = 1 then '' else '' || rn end
|| student_id as new_student_id
from (
select t.*, row_number() over(partition by student_id order by num) as rn
from t
) x
) y
where t.num = y.num
)
Result:
NUM STUDENT_ID
---- ----------
1 SS9234
2 SS29234
3 SS39234
4 SS3456
5 SS23456
6 SS789
7 SS956
See running example at db<>fiddle.

Maybe you could do it without updating!?
I would probably try to :
CREATE NEW_TABLE AS
SELECT [do the "update" here] FROM OLD_TABLE;
- add indexes on new table
- add constraints on new table
- add anything else you need on new table (foreign keys, grants...)
and then
DROP TABLE OLD_TABLE;
-- and
RENAME NEW_TABLE To OLD_TABLE;
SELECT with your sample data:
WITH
tbl as
(
Select 1 "NUM", 9234 "STUDENT_ID" From Dual Union All
Select 2 "NUM", 9234 "STUDENT_ID" From Dual Union All
Select 3 "NUM", 9234 "STUDENT_ID" From Dual Union All
Select 4 "NUM", 3456 "STUDENT_ID" From Dual Union All
Select 5 "NUM", 3456 "STUDENT_ID" From Dual Union All
Select 6 "NUM", 789 "STUDENT_ID" From Dual Union All
Select 7 "NUM", 956 "STUDENT_ID" From Dual
)
Select
NUM,
CASE WHEN Count(NUM) Over(Partition By STUDENT_ID) = 1 THEN 'SS' || STUDENT_ID
ELSE 'SS' || Replace(Sum(1) Over(Partition By STUDENT_ID Order By NUM) - 1, 0, '') || STUDENT_ID
END "STUDENT_ID"
From
tbl
Order By NUM
Result:
NUM
STUDENT_ID
1
SS9234
2
SS19234
3
SS29234
4
SS3456
5
SS13456
6
SS789
7
SS956

Related

Find rows with the same values in a column

I've a table LESSON like this
| Student_id | Lesson_id |
| ---------- |- -------- |
| 352-03-3624| 10359427 |
| 352-03-3624| 10359449 |
| 805-17-4144| 58149917 |
| 805-17-4144| 58149968 |
I have to look for students who have taken the same lessons, ie who have the same lesson_id.
I used this query:
select * from lesson e
where exists
(select null from lesson i
where e.lesson_id = i.lesson_if and e.student_id <> i.student_id)
order by lesson_id
but it doesn't work very well.
Is there someone who can help me?
thanks
I'm finding for every studend if there is another student who follows the same lessons.
Just use the HAVING clause:
WITH lessons AS
(
SELECT '352-03-3624' as student_id, '10359427' as lesson_id FROM dual UNION ALL
SELECT '352-03-3624', '10359449' FROM dual UNION ALL
SELECT '805-17-4144', '58149917' FROM dual UNION ALL
SELECT '805-17-4144', '58149968' FROM dual UNION ALL
SELECT '805-17-4144', '10359427' FROM dual UNION ALL
SELECT '805-17-4143', '10359427' FROM dual UNION ALL
SELECT '805-17-4144', '10359449' FROM dual
)
SELECT lsns.lesson_id, lsns.student_id
FROM lessons lsns
, (SELECT COUNT(1), lesson_id
FROM lessons
GROUP BY lesson_id
HAVING COUNT(1) > 1) lsns_cnt
WHERE lsns_cnt.lesson_id = lsns.lesson_id;
PS. I added more data in order to have a result set. It contains the course and the student that are matching the criteria.
Another option might be to use count in its analytic form and then fetch rows whose count is larger than 1 (which means that there are two or more students who took the same lesson).
Sample data:
SQL> WITH lessons (student_id, lesson_id) AS
2 (
3 SELECT '352-03-3624', '10359427' FROM dual UNION ALL
4 SELECT '352-03-3624', '10359449' FROM dual UNION ALL
5 SELECT '805-17-4144', '58149917' FROM dual UNION ALL
6 SELECT '805-17-4144', '58149968' FROM dual UNION ALL
7 SELECT '805-17-4144', '10359427' FROM dual UNION ALL
8 SELECT '805-17-4143', '10359427' FROM dual UNION ALL
9 SELECT '805-17-4144', '10359449' FROM dual
10 )
Query begins here:
11 select lesson_id, student_id
12 from (select lesson_id,
13 student_id,
14 count(*) over (partition by lesson_id) cnt
15 from lessons
16 )
17 where cnt > 1
18 order by lesson_id, student_id;
LESSON_I STUDENT_ID
-------- -----------
10359427 352-03-3624
10359427 805-17-4143
10359427 805-17-4144
10359449 352-03-3624
10359449 805-17-4144
SQL>

How to get distinct employees that do not have a particular skillset

I have a table that has two columns. Employee_id (which is unique per employee) and next column for employee skillset. One employee can have multiple skillset. How do I retrieve the list of distinct employees who don't have skillset 'c' if A,B,C,D,E are the five types of skillset that employees can have.
employee_id skillset
1 A
1 C
2 E
3 A
3 B
3 C
4 D
4 C
5 B
I have tried self join and other methods but it is not working.
select distinct employee_id from employee_skillset where skillset not like 'C'
When I run my query, it is still giving me employee_ids that have skillset of "c"
You can group by employee_id and set a condition in the HAVING clause:
select employee_id
from employee_skillset
group by employee_id
having sum(case when skillset = 'C' then 1 else 0 end) = 0
Or with NOT EXISTS:
select distinct s.employee_id
from employee_skillset s
where not exists (
select 1 from employee_skillset
where employee_id = s.employee_id and skillset = 'C'
)
What are your expected results from your data set? 2 and 5?
Why not something like below
SELECT DISTINCT employee_id
FROM Table1
WHERE skillset <> 'C';
MINUS set operator is one option:
SQL> with employee_skillset (employee_id, skillset) as
2 (select 1, 'a' from dual union all
3 select 1, 'c' from dual union all
4 select 2, 'e' from dual union all
5 select 3, 'a' from dual union all
6 select 3, 'b' from dual union all
7 select 3, 'c' from dual union all
8 select 4, 'd' from dual union all
9 select 4, 'c' from dual union all
10 select 5, 'b' from dual
11 )
12 select employee_id from employee_skillset
13 minus
14 select employee_id from employee_skillset where skillset = 'c';
EMPLOYEE_ID
-----------
2
5
SQL>
Yet another option:
<snip>
12 select employee_id
13 from (select employee_id,
14 case when skillset = 'c' then 1 else 0 end flag
15 from employee_skillset
16 )
17 group by employee_id
18 having sum(flag) = 0;
EMPLOYEE_ID
-----------
2
5
SQL>
Or:
<snip>
12 select employee_id
13 from (select employee_id,
14 listagg(skillset, ',') within group (order by null) lagg
15 from employee_skillset
16 group by employee_id
17 )
18 where instr(lagg, 'c') = 0;
EMPLOYEE_ID
-----------
2
5
SQL>

select rows between two character values of a column

I have a table which shows as below:
S.No | Action
1 | New
2 | Dependent
3 | Dependent
4 | Dependent
5 | New
6 | Dependent
7 | Dependent
8 | New
9 | Dependent
10 | Dependent
I here want to select the rows between the first two 'New' values in the Action column, including the first row with the 'New' action. Like [New,New)
For example:
In this case, I want to select rows 1,2,3,4.
Please let me know how to do this.
Hmmm. Let's count up the cumulative number of times that New appears as a value and use that:
select t.*
from (select t.*,
sum(case when action = 'New' then 1 else 0 end) over (order by s_no) as cume_new
from t
) t
where cume_new = 1;
you can do some magic with analytic functions
1 select group of NEW actions, to get min and max s_no
2 select lead of 2 rows
3 select get between 2 sno (min and max)
with t as (
select 1 sno, 'New' action from dual union
select 2,'Dependent' from dual union
select 3,'Dependent' from dual union
select 4,'Dependent' from dual union
select 5,'New' from dual union
select 6,'Dependent' from dual union
select 7,'Dependent' from dual union
select 8,'New' from dual union
select 9,'Dependent' from dual union
select 10,'Dependent' from dual
)
select *
from (select *
from (select sno, lead(sno) over (order by sno) a
from ( select row_number() over (partition by action order by Sno) t,
t.sno
from t
where t.action = 'New'
) a
where t <=2 )
where a is not null) a, t
where t.sno >= a.sno and t.sno < a.a

How to do select count(*) group by and select * at same time?

For example, I have table:
ID | Value
1 hi
1 yo
2 foo
2 bar
2 hehe
3 ha
6 gaga
I want my query to get ID, Value; meanwhile the returned set should be in the order of frequency count of each ID.
I tried the query below but don't know how to get the ID and Value column at the same time:
SELECT COUNT(*) FROM TABLE group by ID order by COUNT(*) desc;
The count number doesn't matter to me, I just need the data to be in such order.
Desire Result:
ID | Value
2 foo
2 bar
2 hehe
1 hi
1 yo
3 ha
6 gaga
As you can see because ID:2 appears most times(3 times), it's first on the list,
then ID:1(2 times) etc.
you can try this -
select id, value, count(*) over (partition by id) freq_count
from
(
select 2 as ID, 'foo' as value
from dual
union all
select 2, 'bar'
from dual
union all
select 2, 'hehe'
from dual
union all
select 1 , 'hi'
from dual
union all
select 1 , 'yo'
from dual
union all
select 3 , 'ha'
from dual
union all
select 6 , 'gaga'
from dual
)
order by 3 desc;
select t.id, t.value
from TABLE t
inner join
(
SELECT id, count(*) as cnt
FROM TABLE
group by ID
)
x on x.id = t.id
order by x.cnt desc
How about something like
SELECT t.ID,
t.Value,
c.Cnt
FROM TABLE t INNER JOIN
(
SELECT ID,
COUNT(*) Cnt
FROM TABLE
GROUP BY ID
) c ON t.ID = c.ID
ORDER BY c.Cnt DESC
SQL Fiddle DEMO
I see the question is already answered, but since the most obvious and most simple solution is missing, I'm posting it anyway. It doesn't use self joins nor subqueries:
SQL> create table t (id,value)
2 as
3 select 1, 'hi' from dual union all
4 select 1, 'yo' from dual union all
5 select 2, 'foo' from dual union all
6 select 2, 'bar' from dual union all
7 select 2, 'hehe' from dual union all
8 select 3, 'ha' from dual union all
9 select 6, 'gaga' from dual
10 /
Table created.
SQL> select id
2 , value
3 from t
4 order by count(*) over (partition by id) desc
5 /
ID VALU
---------- ----
2 bar
2 hehe
2 foo
1 yo
1 hi
6 gaga
3 ha
7 rows selected.

Get distinct rows based on priority?

I have a table as below.i am using oracle 10g.
TableA
------
id status
---------------
1 R
1 S
1 W
2 R
i need to get distinct ids along with their status. if i query for distinct ids and their status i get all 4 rows.
but i should get only 2. one per id.
here id 1 has 3 distinct statuses. here i should get only one row based on priority.
first priority is to 'S' , second priority to 'W' and third priority to 'R'.
in my case i should get two records as below.
id status
--------------
1 S
2 R
How can i do that? Please help me.
Thanks!
select
id,
max(status) keep (dense_rank first order by instr('SWR', status)) as status
from TableA
group by id
order by 1
fiddle
select id , status from (
select TableA.*, ROW_NUMBER()
OVER (PARTITION BY TableA.id ORDER BY DECODE(
TableA.status,
'S',1,
'W',2,
'R',3,
4)) AS row_no
FROM TableA)
where row_no = 1
This is first thing i would do, but there may be a better way.
Select id, case when status=1 then 'S'
when status=2 then 'W'
when status=3 then 'R' end as status
from(
select id, max(case when status='S' then 3
when status='W' then 2
when status='R' then 1
end) status
from tableA
group by id
);
To get it done you can write a similar query:
-- sample of data from your question
SQL> with t1(id , status) as (
2 select 1, 'R' from dual union all
3 select 1, 'S' from dual union all
4 select 1, 'W' from dual union all
5 select 2, 'R' from dual
6 )
7 select id -- actual query
8 , status
9 from ( select id
10 , status
11 , row_number() over(partition by id
12 order by case
13 when upper(status) = 'S'
14 then 1
15 when upper(status) = 'W'
16 then 2
17 when upper(status) = 'R'
18 then 3
19 end
20 ) as rn
21 from t1
22 ) q
23 where q.rn = 1
24 ;
ID STATUS
---------- ------
1 S
2 R
select id,status from
(select id,status,decode(status,'S',1,'W',2,'R',3) st from table) where (id,st) in
(select id,min(st) from (select id,status,decode(status,'S',1,'W',2,'R',3) st from table))
Something like this???
SQL> with xx as(
2 select 1 id, 'R' status from dual UNION ALL
3 select 1, 'S' from dual UNION ALL
4 select 1, 'W' from dual UNION ALL
5 select 2, 'R' from dual
6 )
7 select
8 id,
9 DECODE(
10 MIN(
11 DECODE(status,'S',1,'W',2,'R',3)
12 ),
13 1,'S',2,'W',3,'R') "status"
14 from xx
15 group by id;
ID s
---------- -
1 S
2 R
Here, logic is quite simple.
Do a DECODE for setting the 'Priority', then find the MIN (i.e. one with Higher Priority) value and again DECODE it back to get its 'Status'
Using MOD() example with added values:
SELECT id, val, distinct_val
FROM
(
SELECT id, val
, ROW_NUMBER() OVER (ORDER BY id) row_seq
, MOD(ROW_NUMBER() OVER (ORDER BY id), 2) even_row
, (CASE WHEN id = MOD(ROW_NUMBER() OVER (ORDER BY id), 2) THEN NULL ELSE val END) distinct_val
FROM
(
SELECT 1 id, 'R' val FROM dual
UNION
SELECT 1 id, 'S' val FROM dual
UNION
SELECT 1 id, 'W' val FROM dual
UNION
SELECT 2 id, 'R' val FROM dual
UNION -- comment below for orig data
SELECT 3 id, 'K' val FROM dual
UNION
SELECT 4 id, 'G' val FROM dual
UNION
SELECT 1 id, 'W' val FROM dual
))
WHERE distinct_val IS NOT NULL
/
ID VAL DISTINCT_VAL
--------------------------
1 S S
2 R R
3 K K
4 G G