SQL Self Join Update - sql

I need to procedurally update a table by self-joining itself. Using SQL Server 2019.
CREATE TABLE Sect
(
Section_Id INT,
Locale VARCHAR(10),
Record_Id INT,
Section_Id_1 INT
);
INSERT INTO Sect (Section_Id, Locale, Record_Id, Section_Id_1)
VALUES
(100, 'US', 1, Null),
(101, 'CA', Null, 100),
(101, 'MD', Null, 100)
The goal is to update the null values of Record_Id where Section_Id_1 equals Section_ID with the matching Record_ID.
This is the intended result:
100|US|1|Null
101|CA|1|100
101|MD|1|100
I think I am close with:
UPDATE t1
SET Record_Id = t2.Record_Id
FROM Sect t1
INNER JOIN Sect t2 ON t1.Section_Id_1 = t2.Section_Id
WHERE t1.Record_Id IS NULL
Appreciate your all's help.

I'm not familiar with SQL Server 2019. But I think you don't need to use INNER JOIN, try this.
update
Sect t1
set
Record_Id = (select t2.Record_Id
from Sect t2
where t2.Section_ID = t1.Section_Id_1
and t2.Record_Id is not null
limit 1)
where Record_Id is null
and Section_Id_1 is not null

Related

SQL inner join condition by actual date

I need to join actual document to people. Documents has date issued (passport, for example).
SQL Fiddle: http://sqlfiddle.com/#!9/3a8118/2/0
Table structure:
CREATE TABLE people
(
p_id INT NOT NULL AUTO_INCREMENT,
p_name VARCHAR(50) NOT NULL,
PRIMARY KEY(p_id)
);
INSERT INTO people (p_id, p_name)
VALUES (1, 'Name_1'),
(2, 'Name_2');
CREATE TABLE documents
(
d_id INT NOT NULL AUTO_INCREMENT,
d_people INT(10) NOT NULL,
d_date VARCHAR(10) NOT NULL,
PRIMARY KEY(d_id)
);
INSERT INTO documents (d_id, d_people, d_date)
VALUES (1, 1, '01.01.2022'),
(2, 2, '01.12.2021'),
(3, 1, '05.02.2022'),
(4, 1, '10.02.2022'),
(5, 2, '04.01.2022'),
(6, 1, '20.01.2022');
Query: condition is select actual document when date is 21.01.2022, it must return d_id = 6:
SELECT *
FROM people
INNER JOIN documents ON d_people = p_id
WHERE p_id = 1 AND ??? d_date 21.01.2022 ???
;
I need to do an inner join to return only this row:
use this query:
Fiddle
SELECT * FROM people
INNER JOIN documents ON d_people = p_id
WHERE p_id = 1 and d_date='20.01.2022';

SQL query takes more than an hour to execute for 200k rows

I have two tables each with around 200,000 rows. I have run the query below and it still hasn't completed after running for more than an hour. What could be the explanation for this?
SELECT
dbo.[new].[colom1],
dbo.[new].[colom2],
dbo.[new].[colom3],
dbo.[new].[colom4],
dbo.[new].[Value] as 'nieuwe Value',
dbo.[old].[Value] as 'oude Value'
FROM dbo.[new]
JOIN dbo.[old]
ON dbo.[new].[colom1] = dbo.[old].[colom1]
and dbo.[new].[colom2] = dbo.[old].[colom2]
and dbo.[new].[colom3] = dbo.[old].[colom3]
and dbo.[new].[colom4] = dbo.[old].[colom4]
where dbo.[new].[Value] <> dbo.[old].[Value]
from comment;
It seems that for an equality join on a single column, the rows with NULL value in the join key are being filtered out, but this is not the case for joins on multiple columns.
As a result, the hash join complexity is changed from O(N) to O(N^2).
======================================================================
In that context I would like to recommend a great article written by Paul White on similar issues -
Hash Joins on Nullable Columns
======================================================================
I have generated a small simulation of this use-case and I encourage you to test your solutions.
create table mytab1 (c1 int null,c2 int null)
create table mytab2 (c1 int null,c2 int null)
;with t(n) as (select 1 union all select n+1 from t where n < 10)
insert into mytab1 select null,null from t t0,t t1,t t2,t t3,t t4
insert into mytab2 select null,null from mytab1
insert into mytab1 values (111,222);
insert into mytab2 values (111,222);
select * from mytab1 t1 join mytab2 t2 on t1.c1 = t2.c1 and t1.c2 = t2.c2
For the OP query we should remove rows with NULL values in any of the join key columns.
SELECT
dbo.[new].[colom1],
dbo.[new].[colom2],
dbo.[new].[colom3],
dbo.[new].[colom4],
dbo.[new].[Value] as 'nieuwe Value',
dbo.[old].[Value] as 'oude Value'
FROM dbo.[new]
JOIN dbo.[old]
ON dbo.[new].[colom1] = dbo.[old].[colom1]
and dbo.[new].[colom2] = dbo.[old].[colom2]
and dbo.[new].[colom3] = dbo.[old].[colom3]
and dbo.[new].[colom4] = dbo.[old].[colom4]
where dbo.[new].[Value] <> dbo.[old].[Value]
and dbo.[new].[colom1] is not null
and dbo.[new].[colom2] is not null
and dbo.[new].[colom3] is not null
and dbo.[new].[colom4] is not null
and dbo.[old].[colom1] is not null
and dbo.[old].[colom2] is not null
and dbo.[old].[colom3] is not null
and dbo.[old].[colom4] is not null
Using EXCEPT join, you only have to make the larger HASH join on those values that have changed, so much faster:
/*
create table [new] ( colom1 int, colom2 int, colom3 int, colom4 int, [value] int)
create table [old] ( colom1 int, colom2 int, colom3 int, colom4 int, [value] int)
insert old values (1,2,3,4,10)
insert old values (1,2,3,5,10)
insert old values (1,2,3,6,10)
insert old values (1,2,3,7,10)
insert old values (1,2,3,8,10)
insert old values (1,2,3,9,10)
insert new values (1,2,3,4,11)
insert new values (1,2,3,5,10)
insert new values (1,2,3,6,11)
insert new values (1,2,3,7,10)
insert new values (1,2,3,8,10)
insert new values (1,2,3,9,11)
*/
select n.colom1, n.colom2 , n.colom3, n.colom4, n.[value] as newvalue, o.value as oldvalue
from new n
inner join [old] o on n.colom1=o.colom1 and n.colom2=o.colom2 and n.colom3=o.colom3 and n.colom4=o.colom4
inner join
(
select colom1, colom2 , colom3, colom4, [value] from new
except
select colom1, colom2 , colom3, colom4, [value] from old
) i on n.colom1=i.colom1 and n.colom2=i.colom2 and n.colom3=i.colom3 and n.colom4=i.colom4

copy a column from one table to another where table1.col = table2.col

Suppose there are two tables which have the data mentioned in the insert query. There is no foreign key references between the two table.
create table uref.slave (
SLAVE_ID SMALLINT NOT NULL PRIMARY KEY,
DESC VARCHAR(20)
);
INSERT INTO uref.SLAVE values (1, null)
INSERT INTO uref.SLAVE values (2, null)
create table uref.master (
MASTER_ID SMALLINT NOT NULL PRIMARY KEY,
SLAVE_ID SMALLINT,
DESC VARCHAR(20)
);
INSERT INTO uref.MASTER values (1,1,'value1')
INSERT INTO uref.MASTER values (2,2,'value2')
Now I need a query which will copy uref.master.DESC into uref.slave.DESC based on uref.master.SLAVE_ID = uref.slave.SLAVE_ID.
The simplest solution may be to use MERGE.
MERGE INTO uref.SLAVE s
USING uref.MASTER m
ON (s.SLAVE_ID = m.SLAVE_ID)
WHEN MATCHED
THEN UPDATE SET Desc = m.Desc
It could be refined to update only when there is a change to be made
MERGE INTO uref.SLAVE s
USING uref.MASTER m
ON (s.SLAVE_ID = m.SLAVE_ID)
WHEN MATCHED
and ( s.Desc <> m.Desc
or (s.Desc is null and m.Desc is not null)
)
THEN UPDATE SET Desc = m.Desc
UPDATE uref.SLAVE t1
SET Desc =
(
SELECT t2.Desc
FROM uref.MASTER t2
WHERE t1.SLAVE_ID = t2.SLAVE_ID
)
WHERE EXISTS
(
SELECT *
FROM uref.MASTER t2
WHERE t1.SLAVE_ID = t2.SLAVE_ID
AND NOT t1.Desc=t2.Desc
)
AND t1.Desc IS NULL
if sql server, Try below sql: (recheck the table name and fields)
declare #urefSlave table (
SLAVE_ID SMALLINT ,
[DESC] VARCHAR(20)
);
INSERT INTO #urefSlave values (1, null)
INSERT INTO #urefSlave values (2, null)
Declare #urefMaster table (
MASTER_ID SMALLINT,
SLAVE_ID SMALLINT,
[DESC] VARCHAR(20)
);
INSERT INTO #urefMaster values (1,1,'value1')
INSERT INTO #urefMaster values (2,2,'value2')
select * from #urefMaster
select * from #urefSlave
update #urefSlave
set [DESC] = b.[DESC]
from #urefSlave a inner join #urefMaster b on a.SLAVE_ID = b.SLAVE_ID
select * from #urefSlave
REsult:
MASTER_ID SLAVE_ID DESC
--------- -------- --------------------
1 1 value1
2 2 value2
SLAVE_ID DESC
-------- --------------------
1 value1
2 value2
Updated
cannot help much in db2, because i don't have the tools to run the syntax
but from this link db2 update help
you can modify an example in there to meet your requirement:
UPDATE EMPLOYEE EU
SET (EU.SALARY, EU.COMM)
=
(SELECT AVG(ES.SALARY), AVG(ES.COMM)
FROM EMPLOYEE ES
WHERE ES.WORKDEPT = EU.WORKDEPT)
WHERE EU.EMPNO = '000120'
Hope this help.

SQL Command to get all rows from a specific set of groups

Let say I have the following table (The ID is self incremental)
ID Name Serial Status
0 Pie A Fail
1 Pie A Fail
2 Pie A Pass
3 Pie B Fail
4 Pie B Pass
5 Pie C Pass
6 Pie C Fail
How can I get all the rows where the last row of each Group By (Name, Serial) is Pass?
This is the result I should get from the query. The serial C is removed since the last entry of the group by (Name, Serial) is 'Fail'
ID Name Serial Status
0 Pie A Fail
1 Pie A Fail
2 Pie A Pass
3 Pie B Fail
4 Pie B Pass
Thanks!
I would try something like this (assuming SQL Server):
DECLARE #myTable AS TABLE(
ID INT,
Name VARCHAR(10),
Serial VARCHAR(1),
[Status] VARCHAR(10))
INSERT INTO #myTable VALUES(0, 'Pie', 'A', 'Fail')
INSERT INTO #myTable VALUES(1, 'Pie', 'A', 'Fail')
INSERT INTO #myTable VALUES(2, 'Pie', 'A', 'Pass')
INSERT INTO #myTable VALUES(3, 'Pie', 'B', 'Fail')
INSERT INTO #myTable VALUES(4, 'Pie', 'B', 'Pass')
INSERT INTO #myTable VALUES(5, 'Pie', 'C', 'Pass')
INSERT INTO #myTable VALUES(6, 'Pie', 'C', 'Fail')
SELECT *
FROM #myTable
WHERE Serial NOT IN
(
--Get all Serial that end with a 'Fail'
SELECT T1.Serial
FROM #myTable T1
JOIN (
--Get Max ID for a serial
SELECT MAX(ID) as [ID] FROM #myTable GROUP BY Serial
) T2 ON T1.[ID] = T2.[ID]
WHERE T1.[Status] = 'Fail'
)
ORDER BY [ID]
or if you prefer NOT EXISTS (which is usually faster than NOT IN):
SELECT *
FROM #myTable T
WHERE NOT EXISTS
(
SELECT
T1.Serial
FROM #myTable T1
JOIN (
--Get Max ID for a serial
SELECT MAX(ID) as [ID] FROM #myTable GROUP BY Serial
) T2 ON T1.[ID] = T2.[ID]
WHERE
T1.[Status] = 'Fail'
AND T1.[Serial] = T.[Serial]
)
ORDER BY [ID]
We can use CTE to improve readability by implementing as a series of sequential steps:
Get max ids
Get serials for max ids that have status 'Fail'
Remove those rows that match the serials
It would look like this:
with maxIds as ( --Get max Ids
SELECT MAX(ID) as [ID] FROM myTable GROUP BY Serial
),
serials as ( -- Get serials for max ids that have status 'Fail'
SELECT T1.Serial FROM myTable T1 JOIN maxIds ON T1.[ID] = maxIds.[ID] WHERE [Status] = 'Fail'
)
select * from myTable where serial not in (select * from serials) -- Remove serials that match

How can I write a better multiple join that matches multiple values across rows?

I'm trying to write a SQL statement that will allow me to select a series of articles from a table based on their keywords. What I've got so far is a token table, an article table, and a many-to-many table for tokens & articles:
tokens
rowid
token
token_article
token_rowid
article_rowid
articles
rowid
What I'm doing is taking a search query, splitting it up by spaces, then select all articles that contains those keywords. So far I've come up with this:
select * from
(select * from tokens
inner join token_article on
tokens.rowid = token_article.token_rowid and
token = 'ABC'
) as t1,
(select * from tokens
inner join token_article on
tokens.rowid = token_article.token_rowid and
token = 'DEF'
) as t2
where t1.article_rowid = t2.article_rowid and t2.article_rowid = articles.rowid
Which works but of course its doing a select on all articles that match ABC and all articles that DEF then selecting them.
Now I'm trying to figure out a better way. What I imagine in my mind that would work would be to select all the articles that match ABC and from those match any with DEF. This is what I imagine it to look like but does not work (receive error message "no such columns: tokens.rowid")
select * from
(select * from
(select * from tokens
inner join token_article on
tokens.rowid = token_article.token_rowid and
token = 'ABC'
)
inner join token_article on
tokens.rowid = token_article.token_rowid and
token = 'DEF'
)
Because there is more than one way to do this...this method uses GROUP BY and HAVING clauses. The query is looking for all articles that have either the ABC or DEF token, but then grouping by the article ID where the count of tokens for the article is equal to the number of tokens being queried.
Note that I've used MSSQL syntax here, but the concept should work in most SQL implementations.
Edit: I should point out that this has a fairly clean syntax as you add more tokens to the query. If you add more tokens, then you just need to modify the t.token_in criteria and adjust the HAVING COUNT(*) = x clause accordingly.
DECLARE #tokens TABLE
(
rowid INT NOT NULL,
token VARCHAR(255) NOT NULL
)
DECLARE #articles TABLE
(
rowid INT NOT NULL,
title VARCHAR(255) NOT NULL
)
DECLARE #token_article TABLE
(
token_rowid INT NOT NULL,
article_rowid INT NOT NULL
)
INSERT INTO #tokens VALUES (1, 'ABC'), (2, 'DEF')
INSERT INTO #articles VALUES (1, 'This is article 1.'), (2, 'This is article 2.'), (3, 'This is article 3.'), (4, 'This is article 4.'), (5, 'This is article 5.'), (6, 'This is article 6.')
INSERT INTO #token_article VALUES (1, 1), (2, 1), (1, 2), (2, 3), (1, 4), (2, 4), (1, 5), (1, 6)
-- Get the article IDs that have all of the tokens
-- Use this if you just want the IDs
SELECT a.rowid FROM #articles a
INNER JOIN #token_article ta ON a.rowid = ta.article_rowid
INNER JOIN #tokens t ON ta.token_rowid = t.rowid
WHERE t.token IN ('ABC', 'DEF')
GROUP BY a.rowid
HAVING COUNT(*) = 2 -- This should match the number of tokens
rowid
-----------
1
4
-- Get the articles themselves
-- Use this if you want the articles
SELECT * FROM #articles WHERE rowid IN (
SELECT a.rowid FROM #articles a
INNER JOIN #token_article ta ON a.rowid = ta.article_rowid
INNER JOIN #tokens t ON ta.token_rowid = t.rowid
WHERE t.token IN ('ABC', 'DEF')
GROUP BY a.rowid
HAVING COUNT(*) = 2 -- This should match the number of tokens
)
rowid title
----------- ------------------
1 This is article 1.
4 This is article 4.
Here is one way to do it. The script was tested in SQL Server 2012 database.
Script:
CREATE TABLE dbo.tokens
(
rowid INT NOT NULL IDENTITY
, token VARCHAR(10) NOT NULL
);
CREATE TABLE dbo.articles
(
rowid INT NOT NULL IDENTITY
, name VARCHAR(10) NOT NULL
);
CREATE TABLE dbo.token_article
(
token_rowid INT NOT NULL
, article_rowid INT NOT NULL
);
INSERT INTO dbo.tokens (token) VALUES
('ABC'),
('DEF');
INSERT INTO dbo.articles (name) VALUES
('Article 1'),
('Article 2'),
('Article 3');
INSERT INTO dbo.token_article (token_rowid, article_rowid) VALUES
(1, 2),
(2, 3),
(1, 3),
(1, 1),
(2, 2);
SELECT out1.rowid
, out1.token
, out1.token_rowid
, out1.article_rowid
, ta2.token_rowid
, ta2.article_rowid
, t2.rowid
, t2.token
FROM
(
SELECT t.rowid
, t.token
, ta1.token_rowid
, ta1.article_rowid
FROM dbo.tokens t
INNER JOIN dbo.token_article ta1
ON ta1.token_rowid = t.rowid
WHERE t.token = 'ABC'
) out1
INNER JOIN dbo.token_article ta2
ON ta2.article_rowid = out1.article_rowid
INNER JOIN dbo.tokens t2
ON t2.rowid = ta2.token_rowid
AND t2.token = 'DEF';
Output:
rowid token token_rowid article_rowid token_rowid article_rowid rowid token
----- ----- ----------- ------------- ----------- ------------- ----- -----
1 ABC 1 2 2 2 2 DEF
1 ABC 1 3 2 3 2 DEF