Find and delete duplicate friend connections in SQL Server - sql

I need a script to find Member who has a friend connection more than once so it doesn't show their friendship more than once on the website.
The MemberConnection table is as follows:
+----------------------+------------+----------------+
| MemberConnectionID | MemberID | ConnMemberID |
+----------------------+------------+----------------+
| 25 | 33 | 43 |
| 26 | 43 | 33 |
| 27 | 13 | 143 |
| 28 | 143 | 13 |
| 29 | 33 | 43 |
+----------------------+------------+----------------+
As you can see rows 25 and 29 are identical and I need to find these duplicates so they can be deleted.
How can I write a script for this?
Any help that anyone can provide would be greatly appreciated.
Many thanks in advance
neojakey

Try this if you want to leave one connection:
Use this to select connections:
;WITH CTE
AS
(
SELECT
MemberConnectionID,
ROW_NUMBER() OVER (PARTITION BY MemberID, ConnMemberID ORDER BY MemberConnectionID) RN
FROM MemberConnection
)
SELECT
MemberConnectionID
FROM CTE
WHERE RN > 1
or use this to delete from table:
;WITH CTE
AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY MemberID, ConnMemberID ORDER BY MemberConnectionID) RN
FROM MemberConnection
)
DELETE
FROM CTE
WHERE RN > 1

You can do something like :
SELECT * FROM
(SELECT MemberID,ConnMemberID ,count( MemberID,ConnMemberID ) as c FROM MemberConnection group by MemberID,ConnMemberID ) x
WHERE x.c >1
This will show you all repeated rows.

Using this as a test:
DECLARE #Table AS TABLE
(
MemberConnectionID INT ,
MemberID INT ,
ConMemberID INT
)
INSERT INTO #Table
SELECT 1 ,
2 ,
3
INSERT INTO #Table
SELECT 2 ,
3 ,
4
INSERT INTO #Table
SELECT 3 ,
2 ,
3
Select occurence of member connections
SELECT COUNT(MemberConnectionID) AS Occurence ,
MemberID ,
ConMemberID
FROM #Table
GROUP BY MemberID ,
ConMemberID
Delete Statement
DELETE FROM #Table
FROM #Table t
INNER JOIN ( SELECT MemberID ,
ConMemberID
FROM ( SELECT COUNT(MemberConnectionID) AS Occurence ,
MemberID ,
ConMemberID
FROM #Table
GROUP BY MemberID ,
ConMemberID
) t2
WHERE t2.Occurence > 1
) t3 ON t3.MemberID = T.MemberID
AND t3.ConMemberID = t.ConMemberID

I always use this little query (changed to meet your table).
Delete MemberConnection
from MemberConnection
join
(select max(MemberConnectionId)as CountIt, MemberId, ConnMemberId
from #MemberConnection
group by MemberId, ConnMemberId
having count(1) > 1
) as derived
on MemberConnection.MemberId = derived.MemberId
and MemberConnection.ConnMemberId = derived.ConnMemberId
and CountIt > MemberConnectionId

Related

How to update one row that has max value in column (SQL Server)

I am trying to update the Rows that have the Max score with the value of 'Yes' in the Text1 column.
This is straightforward except when there are multiple rows with the max score.
If they have the same score, I just want to select the top row to have the value 'Yes'. Only one row with identical Vendor IDs should have the 'Yes' value.
UPDATE Suppliers
SET Text1='Yes'
--SELECT DISTINCT *
FROM Suppliers INNER JOIN
(
SELECT Vendor, MAX(VCScore) as MaxVCScore
FROM Suppliers
GROUP BY Vendor
) maxTable
ON Suppliers.Vendor=maxTable.Vendor
AND Suppliers.VCScore=maxTable.MaxVCScore
I do not want to use TOP 1 because that will only update one row in the whole table. I instead want only one row for each Vendor to be updated. (Vendor can be identical which is what I am trying to fix.) I cannot add a Group By clause to the Update statement as I would like to group by Vendor but that is incorrect syntax.
with t as (
select * , row_number() over (partition by Vendor order by VCScore desc) rn
from Suppliers
)
update s
set Text1 = 'Yes'
from supplier s
join t on s.pkey = t.pkey and t.rn = 1
Here's one way you may about this using a Common Table Expression (CTE). The following may be run in SSMS:
DECLARE #Suppliers table ( Vendor varchar(20), VCScore int, Text1 varchar(3), SupplierPK int IDENTITY (1,1) );
INSERT INTO #Suppliers ( Vendor, VCScore ) VALUES
( 'Vendor1', 85 ), ( 'Vendor1', 85 ), ( 'Vendor1', 85 ), ( 'Vendor2', 65 ), ( 'Vendor2', 65 );
DECLARE #Vendor table ( Vendor varchar(20), VCScore int );
INSERT INTO #Vendor VALUES
( 'Vendor1', 85 ), ( 'Vendor1', 25 ), ( 'Vendor1', 45 ), ( 'Vendor2', 45 ), ( 'Vendor2', 65 );
WITH cte AS (
SELECT
Vendor,
Text1,
SupplierPK,
ROW_NUMBER() OVER ( PARTITION BY Vendor ORDER BY SupplierPK ) AS RowNo
FROM #Suppliers AS s
OUTER APPLY (
SELECT MAX ( VCScore ) AS MaxVCScore FROM #Vendor AS v WHERE v.Vendor = s.Vendor
) AS x
WHERE
s.VCScore = x.MaxVCScore
)
UPDATE cte
SET
Text1 = 'Yes'
WHERE
cte.RowNo = 1;
SELECT * FROM #Suppliers ORDER BY Vendor, SupplierPK;
Returns
+---------+---------+-------+------------+
| Vendor | VCScore | Text1 | SupplierPK |
+---------+---------+-------+------------+
| Vendor1 | 85 | Yes | 1 |
| Vendor1 | 85 | NULL | 2 |
| Vendor1 | 85 | NULL | 3 |
| Vendor2 | 65 | Yes | 4 |
| Vendor2 | 65 | NULL | 5 |
+---------+---------+-------+------------+
I am making the assumption that you have a primary key value that can be sorted in your Suppliers table.
I recommend using an updatable CTE:
with toupdate as (
select s.* ,
row_number() over (partition by Vendor order by VCScore desc) as seqnum
from Suppliers s
)
update toupdate
set Text1 = 'Yes'
where seqnum = 1;
Note that no JOIN is needed.

Select non existing Numbers from Table each ID

I‘m new in learning TSQL and I‘m struggling getting the numbers that doesn‘t exist in my table each ID.
Example:
CustomerID Group
1 1
3 1
6 1
4 2
7 2
I wanna get the ID which does not exist and select them like this
CustomerID Group
2 1
4 1
5 1
5 2
6 2
....
..
The solution by usin a cte doesn‘t work well or inserting first the data and do a not exist where clause.
Any Ideas?
If you can live with ranges rather than a list with each one, then an efficient method uses lead():
select group_id, (customer_id + 1) as first_missing_customer_id,
(next_ci - 1) as last_missing_customer_id
from (select t.*,
lead(customer_id) over (partition by group_id order by customer_id) as next_ci
from t
) t
where next_ci <> customer_id + 1
Cross join 2 recursive CTEs to get all the possible combinations of [CustomerID] and [Group] and then LEFT join to the table:
declare #c int = (select max([CustomerID]) from tablename);
declare #g int = (select max([Group]) from tablename);
with
customers as (
select 1 as cust
union all
select cust + 1
from customers where cust < #c
),
groups as (
select 1 as gr
union all
select gr + 1
from groups where gr < #g
),
cte as (
select *
from customers cross join groups
)
select c.cust as [CustomerID], c.gr as [Group]
from cte c left join tablename t
on t.[CustomerID] = c.cust and t.[Group] = c.gr
where t.[CustomerID] is null
and c.cust > (select min([CustomerID]) from tablename where [Group] = c.gr)
and c.cust < (select max([CustomerID]) from tablename where [Group] = c.gr)
See the demo.
Results:
> CustomerID | Group
> ---------: | ----:
> 2 | 1
> 4 | 1
> 5 | 1
> 5 | 2
> 6 | 2

How to find max value from each group and display their information when using "group by"

For example, i create a table about people contribue to 2 campaigns
+-------------------------------------+
| ID Name Campaign Amount (USD) |
+-------------------------------------+
| 1 A 1 10 |
| 2 B 1 5 |
| 3 C 2 7 |
| 4 D 2 9 |
+-------------------------------------+
Task: For each campaign, find the person (Name, ID) who contribute the most to
Expected result is
+-----------------------------------------+
| Campaign Name ID |
+-----------------------------------------+
| 1 A 1 |
| 2 D 4 |
+-----------------------------------------+
I used "group by Campaign" but the result have 2 columns "Campagin" and "max value" when I need "Name" and "ID"
Thanks for your help.
Edited: I fix some values, really sorry
You can use analytic functions for this:
select name, id, amount
from (select t.*, max(amount) over (partition by campaign) as max_amount
from t
) t
where amount = max_amount;
You can also do it by giving a rank/row_number partiton by campaign and order by descending order of amount.
Query
;with cte as(
select [num] = dense_rank() over(
partition by [Campaign]
order by [Amount] desc
), *
from [your_table_name]
)
select [Campaign], [Name], [ID]
from cte
where [num] = 1;
Try the next query:-
SELECT Campaign , Name , ID
FROM (
SELECT Campaign , Name , ID , MAX (Amount)
FROM MyTable
GROUP BY Campaign , Name , ID
) temp;
Simply use Where Clause with the max of amount group by Campaign:-
As following generic code:-
select a, b , c
from tablename
where d in
(
select max(d)
from tablename
group by a
)
Demo:-
Create table #MyTable (ID int , Name char(1), Campaign int , Amount int)
go
insert into #MyTable values (1,'A',1,10)
insert into #MyTable values (2,'B',1,5)
insert into #MyTable values (3,'C',2,7)
insert into #MyTable values (4,'D',2,9)
go
select Campaign, Name , ID
from #MyTable
where Amount in
(
select max(Amount)
from #MyTable
group by Campaign
)
drop table #MyTable
Result:-
Please find the below code for the same
SELECT *
FROM #MyTable T
OUTER APPLY (
SELECT COUNT(1) record
FROM #MyTable T1
where t.Campaign = t1.Campaign
and t.amount < t1.amount
)E
where E.record = 0

Get latest rows by date from aggregate

Hey i'm kinda stuck with this query. Using SQL-server
i have in the table, UNIQUE(date, medId, userId)
I have this table
date | medId | userId | Quantity
2016-06-10 | 2 | 1 | 28
2016-06-07 | 1 | 1 | 19
2016-06-06 | 1 | 1 | 10
i want to get the row with the max date, per group of medId,userId, in this case
i would get
2016-06-10 | 2 | 1 | 28
2016-06-07 | 1 | 1 | 19
thanks in advance!
i've tried this
SELECT
a.userMedStockDate,
a.userMedStockMedId,
a.userMedStockUserId,
a.userMedStockQuantity
FROM (SELECT
MAX(userMedStockDate) AS userMedStockDate,
userMedStockQuantity,
userMedStockUserId,
userMedStockMedId,
ROW_NUMBER() OVER (partition by userMedStockMedId,userMedStockUserId
ORDER BY MAX(userMedStockDate) desc) AS rnk
FROM UserMedStock
GROUP BY
userMedStockUserId,
userMedStockQuantity,
userMedStockMedId) a
WHERE a.rnk = 1
[SOLVED]
this should work
select * from
(
select
[date] , medId, userId ,Quantity
,row_number() over (partition by medId, userId order by [date] desc) as rowid
from yourtable
) as x
where rowid = 1
Could also try this:
select y.* from
table1 y inner join
(
SELECT [Date] = MAX([Date]), medId, userId
FROM table1
GROUP BY medId, userId
) x on y.[Date] = x.[Date] and y.medId = x.medId and y.userId = x.userId
i changed the fields to my actual table but here
SELECT
a.userMedStockDate, a.userMedStockMedId, a.userMedStockUserId, a.userMedStockQuantity
FROM(
SELECT
MAX(userMedStockDate) AS userMedStockDate,
userMedStockQuantity,
userMedStockUserId,
userMedStockMedId,
ROW_NUMBER()OVER(partition by userMedStockMedId, userMedStockUserId ORDER BY MAX(userMedStockDate) desc) AS rnk
FROM UserMedStock
GROUP BY userMedStockUserId, userMedStockQuantity, userMedStockMedId
) a
WHERE a.rnk = 1

T-sql rank for max and min value

I need help with a t-sql query.
I have a table with this structure:
id | OverallRank | FirstRank | SecondRank | Nrank..
1 | 10 | 20 | 30 | 5
2 | 15 | 24 | 12 | 80
3 | 10 | 40 | 37 | 12
I need a query that produces this kind of result:
When id: 1
id | OverallRank | BestRankLabel | BestRankValue | WorstRankLabel | WorkRankValue
1 | 10 | SecondRank | 30 | Nrank | 5
When id: 2
id | OverallRank | BestRankLabel | BestRankValue | WorstRankLabel | WorkRankValue
1 | 15 | FirstRank | 24 | SecondRank | 12
How can I do it?
Thanks in advance
with cte(id, RankValue,RankName) as (
SELECT id, RankValue,RankName
FROM
(SELECT id, OverallRank, FirstRank, SecondRank, Nrank
FROM ##input) p
UNPIVOT
(RankValue FOR RankName IN
(OverallRank, FirstRank, SecondRank, Nrank)
)AS unpvt)
select t1.id, max(case when RankName = 'OverallRank' then RankValue else null end) as OverallRank,
max(case when t1.RankValue = t2.MaxRankValue then RankName else null end) as BestRankName,
MAX(t2.MaxRankValue) as BestRankValue,
max(case when t1.RankValue = t3.MinRankValue then RankName else null end) as WorstRankName,
MAX(t3.MinRankValue) as WorstRankValue
from cte as t1
left join (select id, MAX(RankValue) as MaxRankValue from cte group by id) as t2 on t1.id = t2.id
left join (select id, min(RankValue) as MinRankValue from cte group by id) as t3 on t1.id = t3.id
group by t1.id
Working good with your test data. You should only edit RankName IN (OverallRank, FirstRank, SecondRank, Nrank) by adding right columns' names.
CASE
WHEN OverallRank > FirstRank and OverallRank > FirstSecondRand and OverallRank > nRank THEN 'OverallRank'
WHEN FirstRank > OverallRank ... THEN 'FirstRank'
END
This kind of query is why you should normalise your data.
declare #id int, #numranks int
select #id = 1, #numranks = 3 -- number of Rank columns
;with cte as
(
select *
from
(
select *,
ROW_NUMBER() over (partition by id order by rank desc) rn
from
(
select * from YourBadlyDesignedTable
unpivot (Rank for RankNo in (FirstRank, SecondRank, ThirdRank))u -- etc
) v2
) v1
where id=#id and rn in (1, #numranks)
)
select
tMin.id,
tMin.OverallRank,
tMin.RankNo as BestRankLabel,
tMin.Rank as BestRankValue,
tMax.RankNo as WorstRankLabel,
tMax.Rank as WorstRankValue
from (select * from cte where rn=1) tMin
inner join (select * from cte where rn>1) tMax
on tMin.id = tmax.id
You can take out the id = #id if you want all rows.