How to update sql records by comparing duplicate strings?

How to update sql records by comparing duplicate strings? - sql

I have a table below:
id | echantillon_dta | Est_en_double
1 | Bonjour | null
2 | Bonjour | null
3 | Bonjour | null
4 | Joke | null
5 | Joke | null
6 | | null
And after process query will show below:
id | echantillon_dta | Est_en_double
1 | Bonjour | 1
2 | Bonjour | 1
3 | Bonjour | 1
4 | Joke | 4
5 | Joke | 4
6 | | null
How to compare string vesus string? And how to update column like so?

you can use update by min(id) when Record_Details is same.
and there is a misdescription :
6 | Nope | 6 //No duplicates found, stay null
id 6 is no duplicate but isDuplicate column value is 6,shouldn't it be null?
so i use having count(1) > 1 to slove it.
CREATE TABLE Table1
("id" int, "Record_Details" varchar2(11), "isDuplicate" varchar2(4))
;
INSERT ALL
INTO Table1 ("id", "Record_Details", "isDuplicate")
VALUES (1, 'Hello World', NULL)
INTO Table1 ("id", "Record_Details", "isDuplicate")
VALUES (2, 'Hello World', NULL)
INTO Table1 ("id", "Record_Details", "isDuplicate")
VALUES (3, 'Hello World', NULL)
INTO Table1 ("id", "Record_Details", "isDuplicate")
VALUES (4, 'Joke', NULL)
INTO Table1 ("id", "Record_Details", "isDuplicate")
VALUES (5, 'Joke', NULL)
INTO Table1 ("id", "Record_Details", "isDuplicate")
VALUES (6, 'Nope', NULL)
SELECT * FROM dual
;
update (
select T.*
, (select min("id")
from Table1 Tmp
where Tmp."Record_Details" = T."Record_Details"
group by Tmp."Record_Details" having count(1) > 1 --No duplicates found, stay null
) as "new_isDuplicate"
from Table1 T
)
set "isDuplicate" = "new_isDuplicate"
6 rows affected
select * from Table1
id | Record_Details | isDuplicate
-: | :------------- | :----------
1 | Hello World | 1
2 | Hello World | 1
3 | Hello World | 1
4 | Joke | 4
5 | Joke | 4
6 | Nope | null
db<>fiddle here

You seem to want the minimum id with the same record_details.
This should work:
select t.*,
min(id) over (partition by record_details) as isDuplicate
from t;
If you want this as an update, a correlated subquery is a simple approach:
update t
set isduplicate = (select min(t2.id)
from t t2
where t2.record_details = t.record_details
);

You can use a MERGE statement and analytic function to find the duplicates:
Oracle Setup:
CREATE TABLE Table_name ( id, Record_Details, isDuplicate ) AS
SELECT 1, 'Hello World', CAST( NULL AS NUMBER ) FROM DUAL UNION ALL
SELECT 2, 'Hello World', NULL FROM DUAL UNION ALL
SELECT 3, 'Hello World', NULL FROM DUAL UNION ALL
SELECT 4, 'Joke', NULL FROM DUAL UNION ALL
SELECT 5, 'Joke', NULL FROM DUAL UNION ALL
SELECT 6, 'Nope', NULL FROM DUAL;
Merge:
MERGE INTO table_name dst
USING (
SELECT ROWID rid,
MIN( id ) OVER ( PARTITION BY Record_details ) AS dupe_id
FROM table_name
) src
ON (
dst.ROWID = src.RID
AND dst.id <> src.dupe_id -- remove this line if you want to update all rows
)
WHEN MATCHED THEN
UPDATE SET isDuplicate = dupe_id;
Output:
ID | RECORD_DETAILS | ISDUPLICATE
-: | :------------- | ----------:
1 | Hello World | null
2 | Hello World | 1
3 | Hello World | 1
4 | Joke | null
5 | Joke | 4
6 | Nope | null
db<>fiddle here

Related

Condense or merge rows with null values not using group by

Let's say I have a select which returns the following Data:
select nr, name, val_1, val_2, val_3
from table
Nr. | Name | Value 1 | Value 2 | Value 3
-----+------------+---------+---------+---------
1 | Max | 123 | NULL | NULL
1 | Max | NULL | 456 | NULL
1 | Max | NULL | NULL | 789
9 | Lisa | 1 | NULL | NULL
9 | Lisa | 3 | NULL | NULL
9 | Lisa | NULL | NULL | Hello
9 | Lisa | 9 | NULL | NULL
I'd like to condense the rows down to the bare minimum with.
I want the following result:
Nr. | Name | Value 1 | Value 2 | Value 3
-----+------------+---------+---------+---------
1 | Max | 123 | 456 | 789
9 | Lisa | 1 | NULL | Hello
9 | Lisa | 3 | NULL | NULL
9 | Lisa | 9 | NULL | NULL
For condensing the rows with Max (Nr. 1) a group by of the max values would help.
select nr, name, max(val_1), max(val_2), max(val_3)
from table
group by nr, name
But I am unsure how to get the desired results for Lisa (Nr. 9). The row for Lisa contains a value in the Value 3 column, in this example it's condensed with the first row that matches Nr and Name and has a Null value in Value 3.
I'm thankful for every input!

Basic principle is same as Vladimir's solution. This uses UNPIVOT and PIVOT
with cte as
(
select nr, name, col, val,
rn = row_number() over(partition by nr, name, col order by val)
from [table]
unpivot
(
val
for col in (val_1, val_2, val_3)
) u
)
select *
from (
select nr, name, rn, col, val
from cte
) d
pivot
(
max (val)
for col in ([val_1], [val_2], [val_3])
) p

Here is one way to do it. Assign a unique row number for each column by sorting them in such a way that NULLs come last and then join them back together using these row numbers and remove rows with all NULLs.
Run just the CTE first and examine the intermediate result to understand how it works.
Sample data
DECLARE #T TABLE (Nr varchar(10), Name varchar(10), V1 varchar(10), V2 varchar(10), V3 varchar(10));
INSERT INTO #T VALUES
('1', 'Max ', '123' , NULL , NULL ),
('1', 'Max ', NULL , '456', NULL ),
('1', 'Max ', NULL , NULL , '789'),
('9', 'Lisa', '1' , NULL , NULL ),
('9', 'Lisa', '3' , NULL , NULL ),
('9', 'Lisa', NULL , NULL , 'Hello'),
('9', 'Lisa', '9' , NULL , NULL );
Query
WITH CTE
AS
(
SELECT
Nr
,Name
,V1
,V2
,V3
-- here we use CASE WHEN V1 IS NULL THEN 1 ELSE 0 END to put NULLs last
,ROW_NUMBER() OVER (PARTITION BY Nr ORDER BY CASE WHEN V1 IS NULL THEN 1 ELSE 0 END, V1) AS rn1
,ROW_NUMBER() OVER (PARTITION BY Nr ORDER BY CASE WHEN V2 IS NULL THEN 1 ELSE 0 END, V2) AS rn2
,ROW_NUMBER() OVER (PARTITION BY Nr ORDER BY CASE WHEN V3 IS NULL THEN 1 ELSE 0 END, V3) AS rn3
FROM #T AS T
)
SELECT
T1.Nr
,T1.Name
,T1.V1
,T2.V2
,T3.V3
FROM
CTE AS T1
INNER JOIN CTE AS T2 ON T2.Nr = T1.Nr AND T2.rn2 = T1.rn1
INNER JOIN CTE AS T3 ON T3.Nr = T1.Nr AND T3.rn3 = T1.rn1
WHERE
T1.V1 IS NOT NULL
OR T2.V2 IS NOT NULL
OR T3.V3 IS NOT NULL
ORDER BY
T1.Nr, T1.rn1
;
Result
+----+------+-----+------+-------+
| Nr | Name | V1 | V2 | V3 |
+----+------+-----+------+-------+
| 1 | Max | 123 | 456 | 789 |
| 9 | Lisa | 1 | NULL | Hello |
| 9 | Lisa | 3 | NULL | NULL |
| 9 | Lisa | 9 | NULL | NULL |
+----+------+-----+------+-------+

Conditional operations without using SWITCH CASE

I have a couple of complex tables. But their mapping is something like:
TABLE_A:
_________________________________
| LINK_ID | TYPE_ID |
_________________________________
| adfasdnf23454 | TYPE 1 |
| 43fsdfsdcxsh7 | TYPE 1 |
| dfkng037sdfbd | TYPE 1 |
| sd09734fdfhsf | TYPE 2 |
| khsadf94u5dfc | TYPE 2 |
| piukvih66wfa8 | TYPE 3 |
_________________________________
TABLE_B:
_____________________________________________
| LINK_ID | CODE_ID | CODE_VALUE |
_____________________________________________
| adfasdnf23454 | 7 | test 1 |
| fgubk65esdfj7 | 6 | test 2 |
| ooogfsg354fds | 7 | test 3 |
| sd09734fdfhsf | 5 | test 4 |
_____________________________________________
The LINK_ID column links these two tables.
My requirement is to have all the records from TABLE_A checked whether they have a specific CODE_ID or not.
If the record has CODE_ID as 7 - populate CODE_VALUE in a column.
If the record has CODE_ID as 6 - populate CODE_VALUE in another column.
If the record doesn't have a CODE_ID show CODE_VALUE as null.
The catch is, TABLE_B may have records that TABLE_A don't. But the final result should contain the records of TABLE_A alone.
PS: SWITCH CASE not suggested since I would require the fields in the same row. (Please see the multiple rows for same LINK_ID in OBTAINED RESULT on using SWITCH CASE)
OBTAINED RESULT:
_______________________________________________
| LINK_ID | CODE_VALUE_1 | CODE_VALUE_1 |
_______________________________________________
| adfasdnf23454 | test 1 | null |
| adfasdnf23454 | null | test 4 |
| sd09734fdfhsf | test 6 | null |
_______________________________________________
EXPECTED RESULT:
__________________________________________________
| LINK_ID | CODE_VALUE_1 | CODE_VALUE_2 |
__________________________________________________
| adfasdnf23454 | test 1 | test 4 |
| 43fsdfsdcxsh7 | null | null |
| dfkng037sdfbd | null | null |
| sd09734fdfhsf | test 6 | null |
| khsadf94u5dfc | null | null |
| piukvih66wfa8 | null | null |
__________________________________________________
Can someone help on this?

One option uses two correlated subqueries:
select
a.link_id,
(select code_value from table_b b where b.link_id = a.link_id and b.code_id = 7) code_value_1,
(select code_value from table_b b where b.link_id = a.link_id and b.code_id = 6) code_value_2
from table_a a
Note that this assumes no duplicate (link_id, code_id) in table_b. You could also write this with two left joins - which is quite the same logic.
Another solution is a single left join, then conditional aggregation:
select
a.link_id,
max(case when b.code_id = 7 then b.code_value end) code_value_1,
max(case when b.code_id = 6 then b.code_value end) code_value_2
from table_a a
left join table_b b on b.link_id = a.link_id and b.code_id in (6, 7)
group by a.link_id

Problematic part of your question is what to do if two entries in B have same link_id and type_id. You can use min, max, last entry (but for that you need ordering column in B). Or you can list them all:
select *
from a left join b using (link_id)
pivot (listagg(code_value, ', ') within group (order by code_value)
for code_id in (6 as code6, 7 as code7))
Data:
create table a (link_id, type_id) as (
select 'a', 'TYPE 1' from dual union all
select 'b', 'TYPE 1' from dual union all
select 'c', 'TYPE 1' from dual union all
select 'd', 'TYPE 2' from dual );
create table b(LINK_ID, CODE_ID, CODE_VALUE) as (
select 'a', 6, 'test 1' from dual union all
select 'a', 7, 'test 2' from dual union all
select 'a', 7, 'test 3' from dual union all
select 'b', 7, 'test 4' from dual union all
select 'd', 6, 'test 5' from dual );
Result:
LINK_ID TYPE_ID CODE6 CODE7
a TYPE 1 test 1 test 2, test 3
b TYPE 1 test 4
c TYPE 1
d TYPE 2 test 5
dbfiddle

Add randomly selected column to result

I'd like to know if there is a more optimal query to get what I want from the database.
My database schema is as follows :
Table1:
(NUM_T1C1;ID_T1C2)
Table2:
(ID_T2C1;RES_T2C2)
First, I want to get a random row defined as:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT T1.NUM_T1C1, T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.RES_T2C2 IS NOT NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 3
) WHERE ROWNUM = 1
I will call this query: QUERY1.
My question is as follows, I want to get a row as (random NUM_T1C1 != QUERY1.NUM_T1C1; ID_T2C1 == QUERY1.ID_T2C1), so I have tried:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT R2.NUM_T1C1, R1.ID_T2C1, DBMS_RANDOM.VALUE
FROM (
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT T1.NUM_T1C1, T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.RES_T2C2 IS NOT NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 3
) WHERE ROWNUM = 1
) R1, TABLE1 R2
WHERE R2.NUM_T1C1 <> R1.NUM_T1C1
ORDER BY 3
) WHERE ROWNUM = 1
It's working, but I think this is not the optimal way to do so.
Is there a better way to get the expected result?
EDIT:
I found another way to get those random rows but i still don't know if it's optimal:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT R1.NUM_T1C1, R2.ID_T2C1
FROM
(
SELECT T1.NUM_T1C1, T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.T2C2 IS NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 3
) R1,
(
SELECT T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.T2C2 IS NOT NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 2
) R2
) WHERE ROWNUM = 1
Here is an example :
Table1 Table2
+----------+---------+ +---------+----------+
| NUM_T1C1 | ID_T1C2 | | ID_T2C1 | RES_T2C2 |
+----------+---------+ +---------+----------+
| 23 | 5 | | 9 | NULL |
| 521 | 4 | | 4 | DG_513 |
| 71 | 7 | | 7 | FN_731 |
| 97 | 9 | | 5 | NULL |
+----------+---------+ +---------+----------+
Result would be one of those (select one randomly) :
+----------+---------+
| NUM_T1C1 | ID_T2C1 |
+----------+---------+
| 23 | 4 |
| 23 | 7 |
| 97 | 4 |
| 97 | 7 |
+----------+---------+

Here is one way with only a single (cross) join:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE Table1 ( NUM_T1C1, ID_T1C2 ) AS
SELECT 23, 5 FROM DUAL UNION ALL
SELECT 521, 4 FROM DUAL UNION ALL
SELECT 71, 7 FROM DUAL UNION ALL
SELECT 97, 9 FROM DUAL;
CREATE TABLE Table2 ( ID_T2C1, RES_T2C2 ) As
SELECT 9, NULL FROM DUAL UNION ALL
SELECT 4, 'DG_513' FROM DUAL UNION ALL
SELECT 7, 'FN_731' FROM DUAL UNION ALL
SELECT 5, NULL FROM DUAL;
Query 1:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT NUM_T1C1,
ID_T2C1,
COUNT( CASE WHEN T1.ID_T1C2 = T2.ID_T2C1 THEN 1 END )
OVER ( PARTITION BY T1.NUM_T1C1 ) AS num_matches
FROM Table1 T1
CROSS JOIN
(
SELECT *
FROM Table2
WHERE RES_T2C2 IS NOT NULL
) T2
ORDER BY DBMS_RANDOM.VALUE
)
WHERE num_matches = 0
--AND ROWNUM = 1
Results:
| NUM_T1C1 | ID_T2C1 |
|----------|---------|
| 23 | 7 |
| 97 | 4 |
| 97 | 7 |
| 23 | 4 |

Select distinct one field other first non empty or null

I have table
| Id | val |
| --- | ---- |
| 1 | null |
| 1 | qwe1 |
| 1 | qwe2 |
| 2 | null |
| 2 | qwe4 |
| 3 | qwe5 |
| 4 | qew6 |
| 4 | qwe7 |
| 5 | null |
| 5 | null |
is there any easy way to select distinct 'id' values with first non null 'val' values. if not exist then null. for example
result should be
| Id | val |
| --- | ---- |
| 1 | qwe1 |
| 2 | qwe4 |
| 3 | qwe5 |
| 4 | qew6 |
| 5 | null |

In your case a simple GROUP BY should be the solution:
SELECT Id
,MIN(val)
FROM dbo.mytable
GROUP BY Id
Whenever using a GROUP BY, you have to use an aggregate function on all columns, which are not listed in the GROUP BY.
If an Id has a value (val) other than NULL, this value will be returned.
If there are just NULLs for the Id, NULL will be returned.
As far as i unterstood (regarding your comment), this is exactly what you're going to approach.
If you always want to have "the first" value <> NULL, you'll need another sort criteria (like a timestamp column) and might be able to solve it with a WINDOW-function.

If you want the first non-NULL value (where "first" is based on id), then MIN() doesn't quite do it. Window functions do:
select t.*
from (select t.*,
row_number() over (partition by id
order by (case when val is not null then 1 else 2 end),
id
) as seqnum
from t
) t
where seqnum = 1;

SQL Fiddle:
Create Table from SQL Fiddle:
CREATE TABLE tab1(pid integer, id integer, val varchar(25))
Insert dummy records :
insert into tab1
values (1, 1 , null),
(2, 1 , 'qwe1' ),
(3, 1 , 'qwe2'),
(4, 2 , null ),
(5, 2 , 'qwe4' ),
(6, 3 , 'qwe5' ),
(7, 4 , 'qew6' ),
(8, 4 , 'qwe7' ),
(9, 5 , null ),
(10, 5 , null );
fire below query:
SELECT Id ,MIN(val) as val FROM tab1 GROUP BY Id;

SELECT inherit values from parent in a hierarchy

I'm trying to acheive through T-SQL (in a stored procedure) a way to copy a value from a parent into the child when retrieving rows. Here is some example data:
DROP TABLE TEST_LEVELS
CREATE TABLE TEST_LEVELS(
ID INT NOT NULL
,VALUE INT NULL
,PARENT_ID INT NULL
,LEVEL_NO INT NOT NULL
)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (1, 10000, NULL, 1)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (2, NULL, 1, 2)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (3, NULL, 2, 3)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (4, 20000, NULL, 1)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (5, NULL, 4, 2)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (6, 25000, 5, 3)
INSERT INTO TEST_LEVELS (ID, VALUE, PARENT_ID, LEVEL_NO) VALUES (7, NULL, 6, 4)
Selecting the data as follows:
SELECT ID, VALUE, LEVEL_NO
FROM TEST_LEVELS
results in:
+----+-------+----------+
| ID | VALUE | LEVEL_NO |
+----+-------+----------+
| 1 | 10000 | 1 |
| 2 | NULL | 2 |
| 3 | NULL | 3 |
| 4 | 20000 | 1 |
| 5 | NULL | 2 |
| 6 | 25000 | 3 |
| 7 | NULL | 4 |
+----+-------+----------+
But I need something like this (values are inherited by the parent):
+----+-------+----------+
| ID | VALUE | LEVEL_NO |
+----+-------+----------+
| 1 | 10000 | 1 |
| 2 | 10000 | 2 |
| 3 | 10000 | 3 |
| 4 | 20000 | 1 |
| 5 | 20000 | 2 |
| 6 | 25000 | 3 |
| 7 | 25000 | 4 |
+----+-------+----------+
Can this be achieved without using cursors (it must also run on SQL Server 2005)?

Use:
;with cte
as
(
select t.ID, t.VALUE, t.PARENT_ID, t.LEVEL_NO
from #t t
where t.Value is not null
union all
select t.ID, c.Value, t.PARENT_ID, t.LEVEL_NO
from cte c
join #t t on t.PARENT_ID = c.ID
where t.Value is null
)
select c.ID, c.Value, c.LEVEL_NO
from cte c
order by c.ID
Output:
ID Value LEVEL_NO
----------- ----------- -----------
1 10000 1
2 10000 2
3 10000 3
4 20000 1
5 20000 2
6 25000 3
7 25000 4

Maybe something like this:
;WITH cte_name(ID,VALUE,PARENT_ID,LEVEL_NO)
AS
(
SELECT
tbl.ID,
tbl.VALUE,
tbl.PARENT_ID,
tbl.LEVEL_NO
FROM
TEST_LEVELS AS tbl
WHERE
tbl.PARENT_ID IS NULL
UNION ALL
SELECT
tbl.ID,
ISNULL(tbl.VALUE,cte_name.VALUE),
tbl.PARENT_ID,
tbl.LEVEL_NO
FROM
cte_name
JOIN TEST_LEVELS AS tbl
ON cte_name.ID=tbl.PARENT_ID
)
SELECT
*
FROM
cte_name
ORDER BY
ID

One way to do it:
SELECT T.ID,
case when T.VALUE IS NULL
THEN (SELECT A.VALUE FROM TEST_LEVELS A WHERE A.ID = T.PARENT_ID)
ELSE T.VALUE
END,
T.LEVEL_NO
FROM TEST_LEVELS T

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to update sql records by comparing duplicate strings? - sql

Related

Condense or merge rows with null values not using group by

Conditional operations without using SWITCH CASE

Add randomly selected column to result

Select distinct one field other first non empty or null

SELECT inherit values from parent in a hierarchy

Categories

Resources