Group by and Count to select repeated rows - sql

I wrote this query but it does not work as I expected.
1st Goal: select rows that have repeated in certain columns and return whole columns.
2nd Goal: Update a flag (a column) to identify which records have repeated.
Could you please help me?
SELECT
*
FROM AvvalV2NS AS M
WHERE EXISTS
(SELECT
M.Astate,
M.Acity,
M.Azone,
M.Abvillage,
M.Avillage,
COUNT(*)
FROM AvvalV2NS AS M
GROUP BY M.Astate,
M.Acity,
M.Azone,
M.Abvillage,
M.Avillage
HAVING COUNT(*) > 1)

If you want to get the rows that are duplicated, window functions are probably the easiest way:
select a.*
from (select a.*,
count(*) over (partition by M.Astate, M.Acity, M.Azone, M.Abvillage, M.Avillage) as cnt
from AvvalV2NS a
) a
where cnt > 1;
You can update a flag by doing something like this:
with toupdate as (
select a.*
from (select a.*,
count(*) over (partition by M.Astate, M.Acity, M.Azone, M.Abvillage, M.Avillage) as cnt
from AvvalV2NS a
) a
)
update toupdate
set isduplicate = (case when cnt > 1 then 1 else 0 end);

Suppose your table have an id column:
SELECT * FROM THE_TABLE WHERE ID IN (
SELECT ID FROM
(SELECT ID, REPEATING_COLUMNS, COUNT(*) FROM THE_TABLE GROUP BY REPEATING_COLUMNS HAVING COUNT(*) > 1)
)
UPDATE THE_TABLE SET THE_FLAG = "HERE WE GO" WHERE ID IN (
SELECT ID FROM
(SELECT ID, REPEATING_COLUMNS, COUNT(*) FROM THE_TABLE GROUP BY REPEATING_COLUMNS HAVING COUNT(*) > 1)
)
Hope this helps.

Related

Can't find largest duplicate value in SQL Server

I have some multiple duplicate data in my table what I am trying to do I want to fetch only the largest values from the duplicate data.
I added an image for example from which I want to get only the last two row data because the first row's first column value is lower than the others and service ids are same I am trying to do this by counting the data but can't get the final result.
Currently I am using this query to count data
SELECT
ServiceId, COUNT(*) Count_Duplicate
FROM
TestDeleteTable
GROUP BY
ServiceId
HAVING
COUNT(*) > 1
ORDER BY
COUNT(*) DESC
Thanks for any help
Following query should work for you.
SELECT ServiceId,RowId FROM
(
SELECT *, COUNT(ServiceId) OVER(PARTITION BY ServiceId ORDER BY ROWID) CT, ROW_NUMBER() OVER(PARTITION BY ServiceId ORDER BY ROWID) RN
FROM TestDeleteTable
)T
WHERE T.RN> 1 AND T.CT > 1
DEMO
Another approach can be
;WITH CTE AS
(
SELECT ServiceId, MIN(ROWID) M
FROM TestDeleteTable
GROUP BY ServiceId
HAVING COUNT(*) > 1
)
SELECT * FROM TestDeleteTable T
WHERE EXISTS
(
SELECT 1 FROM CTE C WHERE C.ServiceId=T.ServiceId AND T.ROWID > C.M
)
Or simply with a INNER JOIN with CTE like following.
;WITH CTE AS
(
SELECT ServiceId, MIN(ROWID) MinValue, Count(ServiceId) CountService
FROM #t
GROUP BY ServiceId
HAVING COUNT(*) > 1
)
SELECT T.* FROM #T T
INNER JOIN CTE C ON T.ServiceId= C.ServiceId
WHERE C.CountService> 1 AND T.ROWID > C.MinValue

Get MAX ID from multiple records in table where ID2 is the same and where Value 3<>0

In the above screenshot I need to get the subplanid where MAX(ID) in that group of subplanid does not have a formularymixtype of 0
I think this does what you want:
select t.*
from (select t.*,
row_number() over (partition by subplanid order by id desc) as seqnum
from t
) t
where seqnum = 1 and formularymixtype <> 0;
This query will query out subplainid from table where will take last id and formularymixtype is not equal to 0
SELECT subplainid FROM table t
where id = (select max(id) from table where formularymixtype <> 0 )

SQL query: how to distinct count of a column group by another column

In my table I need to know if each ID has one and only one ID_name. How can I write such query?
I tried:
select ID, count(distinct ID_name) as count_name
from table
group by ID
having count_name > 1
But it takes forever to run.
Any thoughts?
select ID
from YourTable
group by
ID
having count(distinct ID_name) > 1
or
select *
from YourTable yt1
where exists
(
select *
from YourTable yt2
where yt1.ID = yt2.ID
and yt1.ID_Name <> yt2.ID_Name
)
Now, most ID columns are defined as primary key and are unique. So in a regular database you'd expect both queries to return an empty set.
select tt.ID,max(tt.myRank)
from
(
select
ip.ID,ip.ID_name,
ROW_Number() over (partition by ip.ID,ip.ID_nameorder by ip.ID) as myRank
from YourTable ip
) tt
group by tt.ID
This gives you every ID with it's total number of ID_Name
If you want only those ID's which have more than one name associated just add a where clause
e.g.
select tt.ID,max(tt.myRank)
from
(
select
ip.ID,ip.ID_name,
ROW_NUMBER() over (partition by ip.ID,ip.ID_nameorder by ip.ID) as myRank
from YourTable ip
) tt
**where tt.myRank > 1**
group by tt.ID

How do I get records before and after given one?

I have the following table structure:
Id, Message
1, John Doe
2, Jane Smith
3, Error
4, Jane Smith
Is there a way to get the error record and the surrounding records? i.e. find all Errors and the record before and after them.
;WITH numberedlogtable AS
(
SELECT Id,Message,
ROW_NUMBER() OVER (ORDER BY ID) AS RN
FROM logtable
)
SELECT Id,Message
FROM numberedlogtable
WHERE RN IN (SELECT RN+i
FROM numberedlogtable
CROSS JOIN (SELECT -1 AS i UNION ALL SELECT 0 UNION ALL SELECT 1) n
WHERE Message='Error')
WITH err AS
(
SELECT TOP 1 *
FROM log
WHERE message = 'Error'
ORDER BY
id
),
p AS
(
SELECT TOP 1 l.*
FROM log
WHERE id <
(
SELECT id
FROM err
)
ORDER BY
id DESC
)
SELECT TOP 3 *
FROM log
WHERE id >
(
SELECT id
FROM p
)
ORDER BY
id
Adapt this routine to pick out your target.
DECLARE #TargetId int
SET #TargetId = 3
select *
from LogTable
where Id in (-- "before"
select max(Id)
from LogTable
where Id < #TargetId
-- target
union all select #TargetId
-- "after"
union all select min(Id)
from LogTable
where Id > #TargetId)
select id,messag from
(Select (Row_Number() over (order by ID)) as RNO, * from #Temp) as A,
(select SubRNO-1 as A,
SubRNO as B,
SubRNO+1 as C
from (Select (Row_Number() over (order by ID)) as SubRNO, * from #Temp) as C
where messag = 'Error') as B
where A.RNO = B.A or A.RNO = B.B or A.RNO = B.C
;WITH Logs AS
(
SELECT ROW_NUMBER() OVER (ORDER BY id), id, message as rownum FROM LogTable lt
)
SELECT curr.id, prev.id, next.id
FROM Logs curr
LEFT OUTER JOIN Logs prev ON curr.rownum+1=prev.rownum
RIGHT OUTER JOIN Logs next ON curr.rownum-1=next.rownum
WHERE curr.message = 'Error'
select id, message from tbl where id in (
select id from tbl where message = "error"
union
select id-1 from tbl where message = "error"
union
select id+1 from tbl where message = "error"
)
Get fixed number of rows before & after target
Using UNION for a simple, high performance query (I found selected answer WITH query above to be extremely slow)
Here is a high performance alternative to the WITH top selected answer, when you know an ID or specific identifier for a given record, and you want to select a fixed number of records BEFORE and AFTER that record. Requires a number field for ID, or something like date that can be sorted ascending / descending.
Example: You want to select the 10 records before and after a specific error was recorded, you know the error ID, and can sort by date or ID.
The following query gets (inclusive) the 1 result above, the identified record itself, and the 1 record below. After the UNION, the results are sorted again in descending order.
SELECT q.*
FROM(
SELECT TOP 2
id, content
FROM
the_table
WHERE
id >= [ID]
ORDER BY id ASC
UNION
SELECT TOP 1
id, content
FROM
the_table
WHERE
id < [ID]
ORDER BY id DESC
) q
ORDER BY q.id DESC

Filter on count(*) in oracle

I have a grouped query, and would like to filter it based on count(*)
Can I do this without a subquery?
This is what I have currently:
select *
from (select ID,
count(*) cnt
from name
group by ID)
where cnt > 1;
what you are looking for is the HAVING clause:
select ID, count(*) cnt
from name
group by ID
having count(*) > 1;