delete statement that contains join - sql

i found duplicate records in my database from a year ago. i'm trying to clean them up i found all ID's i need to remove using:
select id
from impWthrDta as a
right join (
SELECT dte, rltn
FROM impWthrDta
GROUP BY dte, rltn
HAVING count(*) > 1
) as b
on a.dte = b.dte
and a.rltn = b.rltn
where id >= '0'
order by a.rltn, a.dte
why can i not simply replace
select *
with
delete

You can. Just add the DELETE statement and use your query as a filter for the IDs:
DELETE FROM WhateverTable
WHERE ID IN
(select id
from impWthrDta as a
right join (
SELECT dte, rltn
FROM impWthrDta
GROUP BY dte, rltn
HAVING count(*) > 1
) as b
on a.dte = b.dte
and a.rltn = b.rltn
where id >= '0')

Related

How can I efficiently extract a sub-table which only contains rows that have duplicated elements in SQL?

The main task is obtaining a sub-table (apologies if this is not quite the correct term) from an existing table, where only a few rows of interested are kept. Essentially, the rows of interest are any such row that has an element which has an identical value in any other element in any other row.
Any explanation or help for which is the best way to go around this would be very helpful.
I have considered performing queries to check for each element in each row, and then simply making a union out of all the query results.
This is the basic of what I tried, although it is probably inefficient. Note that there are 3 columns and I am actually only checking for duplicated values within 2 columns (PARTICIPANT_1, PARTICIPANT_2).
SELECT * FROM
(
team_table
)
WHERE PARTICIPANT_2 in (SELECT PARTICIPANT_2
FROM
(
select startdate, PARTICIPANT_1, PARTICIPANT_2
from team_table
)
GROUP BY PARTICIPANT_2
HAVING COUNT(distinct PARTICIPANT_1) > 1
)
UNION
SELECT * FROM
(
team_table
)
WHERE PARTICIPANT_1 in (SELECT PARTICIPANT_1
FROM
(
select startdate, PARTICIPANT_1, PARTICIPANT_2
from team_table
)
GROUP BY PARTICIPANT_1
HAVING COUNT(distinct PARTICIPANT_2) > 1
)
For an example table:
startdate PARTICIPANT_1 PARTICIPANT_2
1-1-19 A B
1-1-19 A C
1-1-19 C D
1-1-19 Q R
1-1-19 S T
1-1-19 U V
should yield the following since A and C are the repeated elements
startdate PARTICIPANT_1 PARTICIPANT_2
1-1-19 A B
1-1-19 A C
1-1-19 C D
I think this is what you need:
SELECT * FROM team_table t1
WHERE exists (SELECT 1 from team_table t2
WHERE t1.startdate = t2.startdate -- don't know if you need this
-- Get all rows with duplicate values:
AND (t2.PARTICIPANT_1 IN (t1.PARTICIPANT_1, t1.PARTICIPANT_2)
OR t2.PARTICIPANT_2 IN (t1.PARTICIPANT_1, t1.PARTICIPANT_2))
-- Exclude the record itself:
AND (t1.PARTICIPANT_1 != t2.PARTICIPANT_1
OR t1.PARTICIPANT_2 != t2.PARTICIPANT_2))
If you have a unique id column, you can use:
select tt.*
from team_table tt
where exists (select 1
from team_table tt2
where (tt.participant_1 in (tt2.participant_1, tt2.participant_2) or
tt.participant_2 in (tt2.participant_1, tt2.participant_2)
) and
tt2.id <> tt.id
);
If you don't have one, you can actually generate one:
with tt as (
select tt.*,
row_number() over (partition by participant_1, participant_2, start_date) as seqnum
from test_table tt
)
select tt.*
from team_table tt
where exists (select 1
from team_table tt2
where (tt.participant_1 in (tt2.participant_1, tt2.participant_2) or
tt.participant_2 in (tt2.participant_1, tt2.participant_2)
) and
tt2.seqnum <> tt.seqnum
);

Group by and Count to select repeated rows

I wrote this query but it does not work as I expected.
1st Goal: select rows that have repeated in certain columns and return whole columns.
2nd Goal: Update a flag (a column) to identify which records have repeated.
Could you please help me?
SELECT
*
FROM AvvalV2NS AS M
WHERE EXISTS
(SELECT
M.Astate,
M.Acity,
M.Azone,
M.Abvillage,
M.Avillage,
COUNT(*)
FROM AvvalV2NS AS M
GROUP BY M.Astate,
M.Acity,
M.Azone,
M.Abvillage,
M.Avillage
HAVING COUNT(*) > 1)
If you want to get the rows that are duplicated, window functions are probably the easiest way:
select a.*
from (select a.*,
count(*) over (partition by M.Astate, M.Acity, M.Azone, M.Abvillage, M.Avillage) as cnt
from AvvalV2NS a
) a
where cnt > 1;
You can update a flag by doing something like this:
with toupdate as (
select a.*
from (select a.*,
count(*) over (partition by M.Astate, M.Acity, M.Azone, M.Abvillage, M.Avillage) as cnt
from AvvalV2NS a
) a
)
update toupdate
set isduplicate = (case when cnt > 1 then 1 else 0 end);
Suppose your table have an id column:
SELECT * FROM THE_TABLE WHERE ID IN (
SELECT ID FROM
(SELECT ID, REPEATING_COLUMNS, COUNT(*) FROM THE_TABLE GROUP BY REPEATING_COLUMNS HAVING COUNT(*) > 1)
)
UPDATE THE_TABLE SET THE_FLAG = "HERE WE GO" WHERE ID IN (
SELECT ID FROM
(SELECT ID, REPEATING_COLUMNS, COUNT(*) FROM THE_TABLE GROUP BY REPEATING_COLUMNS HAVING COUNT(*) > 1)
)
Hope this helps.

Joining two different queries under one answer

I have two different queries that have produced the correct result, but I would like to have them produce the answer out in one table. How do I do that?
Here is my code:
SELECT count(distinct ID) as NoOfEmployees
FROM Table_Name
WHERE date<= '2012-05-31';
select count(subA.ID) as EmployeesChanged from (
SELECT A.ID
FROM Table_Name A
WHERE A.date < '2012-06-01'
GROUP BY 1
HAVING COUNT(A.Service_type) > 1 ) subA
Currently I have the following output:
Number of Employees
x
Employees Changed
x
How do I make it
Number of Employees | Employees Changed | (Number of employees - number changed)
x | x | x
I don't know what database do you use. But for some databases you can try:
select q1.Value, q2.Value, q1.Value - q2.Value from
(SELECT count(distinct ID) as Value FROM Table_Name
WHERE date<= '2012-05-31') q1,
(select count(subA.ID) as Value from
( SELECT A.ID FROM Table_Name A
WHERE A.date < '2012-06-01' GROUP BY 1
HAVING COUNT(A.Service_type) > 1 ) subA) q2
If date<= '2012-05-31' is the same as A.date < '2012-06-01' ?
SELECT COUNT(1) AS NoOfEmployees,
SUM(CASE WHEN STCount > 0 then 1 else 0 end) as HasChange,
SUM(CASE WHEN STCount = 0 then 1 else 0 end) as NoChange
FROM
(SELECT ID,
COUNT(A.Service_type) STCount
FROM Table_Name
WHERE date<= '2012-05-31'
GROUP BY ID) AS Data
You can use CROSS JOIN:
SELECT a.*, b.*, a.NoOfEmployees - b.EmployeesChanged
FROM
(
SELECT count(distinct ID) as NoOfEmployees
FROM Table_Name
WHERE date<= '2012-05-31'
) a
CROSS JOIN
(
SELECT count(subA.ID) as EmployeesChanged
FROM
(
SELECT A.ID
FROM Table_Name A
WHERE A.date < '2012-06-01'
GROUP BY 1
HAVING COUNT(A.Service_type) > 1
) subA
) b
Edit:
You might be able to greatly optimize your query by using conditional aggregation instead of executing two separate queries:
SELECT a.NoOfEmployees, a.EmployeesChanged, a.NoOfEmployees - a.EmployeesChanged
FROM
(
SELECT
COUNT(DISTINCT CASE WHEN date <= '2012-05-31' THEN ID END) as NoOfEmployees,
COUNT(DISTINCT CASE WHEN date < '2012-06-01' AND COUNT(Service_type) > 1 THEN ID END) AS EmployeesChanged
FROM Table_Name
GROUP BY ID
) a

How to filter out records grouped by date with a large date difference

I have some records, grouped by name and date.
I would like to find any records in a table that have a date difference between them larger than a week, from the most recent record.
Would this be possible to do with a cte?
I am thinking something along these lines (it is difficult to explain)
; with mycte as (
select *
from #GroupedRecords)
select *
from mycte a
join (select *
from #GroupedRecords) b on a.Name = b.Name
where datediff(day, a.DateCreated, b.DateCreated) > 7
For example:
Id Name Date
1 Foo 02/03/2010
2 Bar 23/02/2010
3 Ram 21/01/2010
4 Foo 29/02/2010
5 Foo 22/02/2010
6 Foo 05/12/2009
The results should be:
Id Name Date
1 Foo 02/03/2010
5 Foo 22/02/2010
6 Foo 05/12/2009
You can try:
SELECT id,
name,
DATE
FROM groupedrecords AS gr1
WHERE ( (SELECT MAX(DATE) AS md
FROM groupedrecords gr2
WHERE gr1.name = gr2.name) - gr1.DATE ) > 7;
Or probably better yet:
SELECT id,
name,
DATE
FROM groupedrecords AS gr1
INNER JOIN (SELECT name,
MAX(DATE) AS md
FROM groupedrecords AS gr2
GROUP BY name) AS q1
ON gr1.name = q1.name
WHERE ( q1.md - gr1.DATE ) > 7;
UPDATE: As suggested in the comments, here is a version that uses union to get the id with the max date per group AND the ids of those that are 7 days or older than the max date. I used a CTE for fun, it was not necessary. Note that if there is more than 1 ID that shares the max date in a group, this query will need to be modified-
WITH CTE
AS (SELECT name,
Max(date) AS MD
FROM Records
GROUP BY name)
SELECT R.ID,
R.name,
R.date
FROM CTE
INNER JOIN Records AS R
ON CTE.Name = R.Name
AND CTE.MD = R.date
UNION ALL
SELECT r1.id,
r1.name,
r1.DATE
FROM Records AS R1
INNER JOIN CTE
ON CTE.name = R1.name
WHERE ( CTE.md - R1.DATE ) > 7
ORDER BY name ASC,
date DESC
I wonder if this gets close to a solution:
; with tableWithRow as (
select *, row_number() over (order by name, date) as rowNum
from t
)
select t1.*, t2.id t2id, t2.name t2name, t2.date t2date, t2.rowNum t2rowNum
from tableWithRow t1
join tableWithRow t2
on t1.rowNum = t2.rowNum + 1 and t1.name = t2.name

How to get Original Rows filtered by a HAVING Condition?

What is the method in T-SQL to select the orginal values limited by a HAVING attribute. For example, if I have
A|B
10|1
11|2
10|3
How would I get all the values of B (Not An Average or some other summary stat), Grouped by A, having a Count (Occurrences of A) greater than or equal two 2?
Actually, you have several options to choose from
1. You could make a subquery out of your original having statement and join it back to your table
SELECT *
FROM YourTable yt
INNER JOIN (
SELECT A
FROM YourTable
GROUP BY
A
HAVING COUNT(*) >= 2
) cnt ON cnt.A = yt.A
2. another equivalent solution would be to use a WITH clause
;WITH cnt AS (
SELECT A
FROM YourTable
GROUP BY
A
HAVING COUNT(*) >= 2
)
SELECT *
FROM YourTable yt
INNER JOIN cnt ON cnt.A = yt.A
3. or you could use an IN statement
SELECT *
FROM YourTable yt
WHERE A IN (SELECT A FROM YourTable GROUP BY A HAVING COUNT(*) >= 2)
A self join will work:
select B
from table
join(
select A
from table
group by 1
having count(1)>1
)s
using(A);
You can use window function (no joins, only one table scan):
select * from (
select *, cnt=count(*) over(partiton by A) from table
) as a
where cnt >= 2