Delete row if column2 value exists anywhere in column1 - sql

A program starts with a table like the one below:
ID data1 data2 ... copyID
15 a b NULL
16 c d 11
You instruct the program to create new rows based off of information in existing rows. The copyID field holds the ID of the row in which data was copied from. After copying rows 1 and 2, the new table looks like the one below:
ID data1 data2 ... copyID
15 a b NULL
16 c d 11
17 a b 15
18 c d 16
Now, I would like to only select rows that aren't "duplicates" of rows we are already capturing. Since ID 15 is "original," it should be maintained. Since there is no ID = 11, we want to maintain ID 16. Since we already have ID = 15 and ID = 16, we do not need ID = 17 or ID = 18. What is the process to handle this decision making in SQL?

I think that the code below will resolve your issue:
select a.*
from yourtable a
left join yourtable b
on a.copid = b.id
where b.id is null

You can use EXISTS
For example:
declare #T table (ID int, data1 varchar(30), data2 varchar(30), copyID int);
-- adding the parent records
insert into #T (ID, data1, data2, copyID) values
(15,'a','b',NULL),
(16,'c','d',11);
-- adding the duplicates
insert into #T (ID, copyID, data1, data2)
select m.maxID + row_number() over (order by id) as newID, t.ID as copyID, t.data1, t.data2
from #T t
cross apply (select max(id) as maxID from #T) m;
-- Only selecting those the parent record does not exist
select *
from #T t1
where not exists (
select 1
from #T t2
where t1.copyID = t2.ID
and t1.data1 = t2.data1 and t1.data2 = t2.data2
);
-- Or via a LEFT JOIN and keeping those that are only on the left side
select t1.*
from #T t1
left join #T t2 on (t1.copyID = t2.ID and t1.data1 = t2.data1 and t1.data2 = t2.data2)
where t2.ID is null;
Returns:
ID data1 data2 copyID
15 a b NULL
16 c d 11

This is a simple exists:
select t.*
from t
where t.copyid is null or
not exists (select 1 from t t2 where t2.id = t.copyid);
This handles both the NULL case and the NOT EXISTS case.
This can be simplified to:
select t.*
from t
where not exists (select 1 from t t2 where t2.id = t.copyid);

Related

SQL: Select rows in a table by filtering multiple columns from the same table by a 3 column select result

I have a table where I want to filter all rows that have a Code,Life and TC equal to the results of a select query on the same table filtered by ID
ID Code|Life|TC|PORT
62 XX101 1 1 1
63 XX101 1 1 2
64 AB123 1 1 1
65 AB123 1 1 2
66 AB123 1 1 3
67 CD321 1 1 1
68 CD321 1 1 2
This is the best I have come up with but it doesn't seem to be very efficient.
select ID from #table
where Code = (Select Code from #table where ID = #Port1) and
Life = (Select Life from #table where ID = #Port1) and
TC = (Select TC from #table where ID = #Port1)
Here is the query you need:
select t2.*
from #table t1
join #table t2 on t1.Code = t2.Code and
t1.Life = t2.Life and
t1.TC = t2.TC and
t1.PORT = t2.PORT
where t1.id = #Port1
With cross apply:
select ca.*
from #table t1
cross apply (select * from #table t2 where t1.Code = t2.Code and
t1.Life = t2.Life and
t1.TC = t2.TC and
t1.PORT = t2.PORT) ca
where where t1.id = #Port1
With cte:
with cte as(select * from #table where id = #Port1)
select t.*
from #table t
join cte c on t.Code = c.Code and
t.Life = c.Life and
t.TC = c.TC and
t.PORT = c.PORT
You could use an EXIST statement for this scenario
SELECT
ID
FROM
#table t1
WHERE
EXISTS ( SELECT
*
FROM
#table t2
WHERE
t2.ID = #Port1
AND t2.Code = t1.Code
AND t2.Life = t1.Life
AND t2.TC = t1.TC )
Your code looks to provide the same result of
SELECT ID
FROM #table AS tbl1
INNER JOIM#table AS tbl2 on
tbl2.ID =#Port1 AND
tbl1.Life =tbl2.Life AND
tbl1.TC =tbl2.TC
but it's more expensive
You are asking always for the same record in the selects under the where clause.
Then each time you pick a different field to match.
But pay attention because if there is more than one record with that ID your query gives error because, since you used the = operator it expects only one instance of the field you are checking.
Using window functions:
;WITH CTE AS (
SELECT *, RANK() OVER (ORDER BY [Code], [Life], [TC]) AS grp
FROM mytable
), CTE2 AS (SELECT grp FROM CTE WHERE ID = #Port1)
SELECT *
FROM CTE
WHERE grp = (SELECT grp FROM CTE2)
The above query finds the [Code], [Life], [TC] partition to which row with ID = #Port1 belongs and then selects all rows of this partition.

Query for earliest datetime and corresponding number field

I'm attempting to update a table with a dollar amount based on the earliest datetime field from another table. For example:
Table 1
ID|INITIAL_ANNUAL_RATE_AMT|
1 | NULL (I want to update this to 25.02)
Table 2
ID|ANNUAL_RATE_AMT|STARTING_DATE|
1 |25.01 |1/1/2014
1 |25.02 |1/1/2013
I've got a query like this that retreives the earliest date from table 2 and the corresponding objects ID:
select ID,
MIN(t2.STARTING_DATE) as EARLIEST_START_DATE
from t2
group by t2.ID
But how can I leverage this into an update statement that sets the INITIAL_ANNUAL_RATE_AMT in table 1 to the earliest corresponding value in table 2?
Something like this (which currently fails):
update t1
set t1.Initial_Annual_Rate__c = t3.ANNUAL_RATE_AMT
from t1, t2
left join
(select t2.ID
MIN(t2.STARTING_DATE) as EARLIEST_START_DATE
from t2
group by t2.DEAL_ID)
as t3 ON (t3.DEAL_ID = t1.DEAL_ID)
One way is to use a CTE
;WITH C AS(
SELECT t.ID, EARLIEST_START_DATE, ANNUAL_RATE_AMT FROM(
select ID,
MIN(t2.STARTING_DATE) as EARLIEST_START_DATE
from #Table2 AS t2
group by t2.ID) t
INNER JOIN #Table2 AS t2 ON t2.ID = t.ID AND t.EARLIEST_START_DATE = t2.STARTING_DATE
)
UPDATE t1
SET INITIAL_ANNUAL_RATE_AMT = C.ANNUAL_RATE_AMT
FROM #Table1 AS t1
INNER JOIN C ON C.ID = t1.ID
SQLFIDDLE
Another method, using a window function to get the first row in each ID partitioned set:
-- Setup test data
declare #table1 table (ID int, INITIAL_ANNUAL_RATE_AMT decimal(9,2))
declare #table2 table (ID int, ANNUAL_RATE_AMT decimal(9,2), STARTING_DATE date)
INSERT INTO #table1 (ID, INITIAL_ANNUAL_RATE_AMT)
SELECT 1, NULL
INSERT INTO #table2 (ID, ANNUAL_RATE_AMT, STARTING_DATE)
SELECT 1,25.01,'1/1/2014'
UNION SELECT 1,25.02,'1/1/2013'
-- Do the update
;with table2WithIDRowNumbers as (
select ID, ANNUAL_RATE_AMT, STARTING_DATE, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY STARTING_DATE) as rowNumber
FROM #table2
)
UPDATE t1
SET INITIAL_ANNUAL_RATE_AMT=t2.ANNUAL_RATE_AMT
FROM table2WithIDRowNumbers t2
INNER JOIN #table1 t1 ON t1.ID=t2.ID
where t2.rowNumber=1
-- Show the result
SELECT * from #table1

MS SQL Server : update with less amount of rows in source table

What is the best way to update Table1 with all values from Table2 if Table2 has less rows than Table1? This considering that Table2 has no key that can be joined to Table1 for update.
TABLE1 TABLE2 RESULT TABLE1
id value value id value
----------------------------------------------------
1 NULL 4 1 4
2 NULL 6 2 6
3 NULL 8 3 8
4 NULL 4 4
5 NULL 5 6
6 NULL 6 8
7 NULL 7 4
Hope I make sense.
Thanks in advance.
EDIT: Pardon, did not specify its Microsoft SQL Server 2012. :/
EXAMPLE for SOLUTION:
DECLARE #t1 TABLE(id int, avalue int)
DECLARE #t2 TABLE(id INT, avalue int)
-- Generate 20 rows in #t1 table
INSERT
INTO #t1 (id)
SELECT Number
FROM dbo.Numbers
WHERE Number BETWEEN 1 AND 20
-- Generate 5 rows and value #t2 table
INSERT
INTO #t2 (id,avalue)
SELECT Number,
Number
FROM dbo.Numbers
WHERE Number BETWEEN 1 AND 5
-- The goal is to take all rows from #t2
-- and repeatively insert them in order into #t1
UPDATE t1
SET t1.avalue = t2.avalue
FROM #t1 t1
JOIN ( SELECT t2.*, COUNT(*) OVER () AS cnt
FROM #t2 t2
) AS t2
ON (t1.id - 1) % t2.cnt = t2.id - 1;
SELECT *
FROM #t1
Interesting problem. This first solution is for MySQL (I originally read the question as being about that database). After this solution is the one for SQL Server.
You need to generate a join key. Let me assume that id is really sequential. Then you can use modulo arithmetic to do the match:
update table1 t1
(select (#rn := #rn + 1) as seqnum, value
from table2 cross join
(select #rn := -1) vars
) t2 cross join
(select count(*) as cnt from table2) cnt
on mod((t1.id - 1), cnt.cnt) = t2.seqnum
set t1.value = t2.value;
If the id in table1 is not sequential, you can use a variable for that as well. It just further complicates the query:
update table1 t1 join
(select #rn1 := #rn + 1) as seqnum, id
from table1 t1 cross join
(select #rn1 := 0) vars
order by id
) t1s
on t1.id = t1s.id join
(select (#rn := #rn + 1) as seqnum, value
from table2 cross join
(select #rn := -1) vars
) t2 cross join
(select count(*) as cnt from table2) cnt
on mod((t1s.seqnum - 1), cnt.cnt) = t2.seqnum
set t1.value = t2.value;
EDIT:
You can readily do the same thing in SQL Server. It is actually easier:
update table1 t1
set t1.value = t2.value;
from table1 t1 join
(select t2.*, count(*) over () as cnt
from table2 t2
) t2
on (t1.id - 1) % t2.cnt = (t2.id - 1);
This formulation depends on the ids being sequential with no gaps. It is easy enough to loosen this restriction, but the query gets a wee bit more complicated.
Try this query.
DECLARE #Table1 AS TABLE
(
ID INT,
Value INT
)
DECLARE #Table2 AS TABLE
(
Value INT
)
INSERT INTO #Table1
SELECT 1, NULL UNION
SELECT 2, NULL UNION
SELECT 3, NULL UNION
SELECT 4, NULL UNION
SELECT 5, NULL UNION
SELECT 6, NULL UNION
SELECT 7, NULL
INSERT INTO #Table2
SELECT 4 UNION
SELECT 6 UNION
SELECT 8
DECLARE #nCOUNT as INT
SET #nCOUNT = (SELECT COUNT(*) FROM #Table2)
UPDATE TB1 SET TB1.Value = TB2.Value FROM #Table1 AS TB1
INNER JOIN
(SELECT T1.ID, T2.Value FROM
(SELECT *, CASE WHEN (ROW_NUMBER() OVER(ORDER BY ID) % #nCOUNT) = 0
THEN #nCOUNT
ELSE (ROW_NUMBER() OVER(ORDER BY ID) % #nCOUNT)
END AS ROID
FROM #Table1) AS T1
LEFT JOIN (SELECT VALUE, ROW_NUMBER() OVER(ORDER BY Value) AS ID FROM #Table2) AS T2 ON T2.ID = T1.ROID) AS TB2
ON TB1.ID = TB2.ID
SELECT * FROM #Table1

alternative solution to too many JOINs

There is a table containing all names:
CREATE TABLE Names(
Name VARCHAR(20)
)
And there are multiple tables with similar schema.
Let's say:
CREATE TABLE T1
(
Name VARCHAR(20),
Description VARCHAR(30),
Version INT
)
CREATE TABLE T2
(
Name VARCHAR(20),
Description VARCHAR(30),
Version INT
)
I need to query description for each name, by following priority:
any records in T1 with matching name and version = 1
any records in T1 with matching name and version = 2
any records in T2 with matching name and version = 1
any records in T2 with matching name and version = 2
I want result from lower priority source only if there are no result from higher priority source.
So far that's I've got:
SELECT
N.Name AS Name, Description =
CASE
WHEN (T11.Description IS NOT NULL) THEN T11.Description
WHEN (T12.Description IS NOT NULL) THEN T12.Description
WHEN (T21.Description IS NOT NULL) THEN T21.Description
WHEN (T22.Description IS NOT NULL) THEN T22.Description
ELSE NULL
END
FROM Names AS N
LEFT JOIN T1 AS T11 ON T11.Name = N.Name AND T11.Version = 1
LEFT JOIN T1 AS T12 ON T12.Name = N.Name AND T12.Version = 2
LEFT JOIN T2 AS T21 ON T21.Name = N.Name AND T21.Version = 1
LEFT JOIN T2 AS T22 ON T22.Name = N.Name AND T22.Version = 2
It's working, but are there too much JOIN here? Is there any better approach?
sqlfiddle
Sample Input:
INSERT INTO Names VALUES('name1')
INSERT INTO Names VALUES('name2')
INSERT INTO Names VALUES('name3')
INSERT INTO Names VALUES('name4')
INSERT INTO Names VALUES('name5')
INSERT INTO Names VALUES('name6')
INSERT INTO T1 VALUES ('name1','name1_T1_1', 1)
INSERT INTO T1 VALUES ('name2','name2_T1_1', 1)
INSERT INTO T1 VALUES ('name3','name3_T1_1', 1)
INSERT INTO T1 VALUES ('name3','name3_T1_2', 2)
INSERT INTO T1 VALUES ('name5','name5_T1_2', 2)
INSERT INTO T2 VALUES ('name1','name1_T2_1', 1)
INSERT INTO T2 VALUES ('name4','name4_T2_1', 1)
Excepted result:
--
-- Excepted result:
-- Name Description
-- name1 name1_T1_1
-- name2 name2_T1_1
-- name3 name3_T1_1
-- name4 name4_T2_1
-- name5 name5_T1_2
-- name6 NULL
Well, this is a solution to eliminate the case statement and minimize the repetitive part of the query, it requires some joins of it's own of course, so you'd need quite some tables and/or versions to get any real benefit out of it:
;WITH
AllDescriptions AS
(
SELECT 1 AS Rank, * FROM T1
UNION ALL SELECT 2 AS Rank, * FROM T2
-- UNION ALL SELECT 3 AS Rank, * FROM T3
-- UNION ALL SELECT 4 AS Rank, * FROM T4
-- etc
),
Ranks AS
(
SELECT
AllDescriptions.Name,
MIN(AllDescriptions.Rank) AS Rank
FROM
AllDescriptions
GROUP BY
Name
),
Versions AS
(
SELECT
AllDescriptions.Name,
AllDescriptions.Rank,
MIN(AllDescriptions.Version) AS Version
FROM
AllDescriptions
INNER JOIN Ranks
ON Ranks.Name = AllDescriptions.Name
AND Ranks.Rank = AllDescriptions.Rank
GROUP BY
AllDescriptions.Name,
AllDescriptions.Rank
),
Descriptions AS
(
SELECT
AllDescriptions.Name,
AllDescriptions.Description
FROM
AllDescriptions
INNER JOIN Versions
ON Versions.Name = AllDescriptions.Name
AND Versions.Rank = AllDescriptions.Rank
AND Versions.Version = AllDescriptions.Version
)
SELECT
Names.*,
Descriptions.Description
FROM
Names
LEFT OUTER JOIN Descriptions
ON Descriptions.Name = Names.Name
Try this query and it will also give you the expected result.
SELECT N.name AS Name,
Description =
CASE
WHEN ( t1.description IS NOT NULL ) THEN t1.description
WHEN ( t2.description IS NOT NULL ) THEN t2.description
ELSE NULL
END
FROM names AS N
LEFT JOIN t1
ON t1.name = N.name
AND t1.version IN( 1, 2 )
LEFT JOIN t2
ON t2.name = N.name
AND t2.version IN ( 1, 2 )
select n.name, isnull(d.description,d1.Description) description
from Names n
outer apply (select top 1 t1.Name, t1.Description
from T1
WHERE t1.Name = n.name
order by Version asc
) d
outer apply (select top 1 t2.Name, t2.Description
from T2
WHERE t2.Name = n.name
order by Version asc
) d1

Select query in MySQL

Two tables
Table1
ID FileName
1 abc
2 abc
3 abc
4 xyz
Table2
ID Table1_ID isDeleted
1 1 1
2 2 1
3 3 0
4 4 0
I need to get the count of filename for the isDeleted=1 by passing any ID of table1, i.e for all the values(1,2,3) of ID, i need the count as 2
I tried with the following query
SELECT COUNT(t1.FileName) FROM Table1 t1
LEFT OUTER JOIN Table1 t11 ON t1.FileName=t11.FileName
INNER JOIN table2 t2 ON t2.Table1_ID =t1.ID AND t2.isDeleted=1
WHERE t1.ID=X;
X-1,2,3
This always returns 3.
Edit: I need to get the count of the filename from the first table by passing the ID from the first table. The count should be based on the isdeleted column in second table. The tables are related by the column ID (table1) and Table1_ID (table2)
Give this a shot:
select SUM(isDeleted)
from Table2
where Table1_ID in (
select ID from Table1
where FileName = (select FileName
from Table1
where ID = 1)
)
Edit: to get file count:
select count(*)
from Table1 a
join Table2 b on a.ID = b.Table1_ID and b.isDeleted = 1
where a.FileName = (select FileName
from Table1
where ID = 1)
This works for me:
declare #id int
set #id = 1 /*Or 2 or 3 or 4, etc.*/
select sum(isdeleted)
from table2
where table1_id in
(select id
from table1
where filename = (select filename
from table1
where id = #id))
Edit: I can't see how this is different from Fosco's answer.
SELECT COUNT(t1.FileName) FROM Table1 t1
INNER JOIN table2 t2 ON t2.Table1_ID =t1.ID AND t2.isDeleted=1
WHERE t1.ID=X;