Delete older from a duplicate select - sql

I have been working on a query to search and delete duplicate column values. Currently I have this query (returns duplicates):
SELECT NUIP, FECHA_REGISTRO
FROM registros_civiles_nacimiento
WHERE NUIP IN (
SELECT NUIP
FROM registros_civiles_nacimiento
GROUP BY NUIP
HAVING (COUNT(NUIP) > 1)
) order by NUIP
This work returning a table like this:
NUIP FECHA_REGISTRO
38120100138 1975-05-30
38120100138 1977-08-31
40051800275 1980-09-24
40051800275 1999-11-29
42110700118 1972-10-26
42110700118 1982-04-22
44030700535 1982-10-19
44030700535 1993-05-05
46072300777 1991-01-17
46072300777 1979-03-30
The thing is that I need to delete the rows with duplicate column values. But I need to delete the row with the oldest date, for example, for the given result, once the needed query is performed, this is the list of result that must be kept:
NUIP FECHA_REGISTRO
38120100138 1977-08-31
40051800275 1999-11-29
42110700118 1982-04-22
44030700535 1993-05-05
46072300777 1991-01-17
How can I do this using plain SQL?

--PULL YOUR SELECT OF RECS WITH DUPES INTO A TEMP TABLE
--(OR CREATE A NEW TABLE SO THAT YOU CAN KEEP THEM AROUND FOR LATER IN CASE)
SELECT NUIP,FECHA_REGISTRO
INTO #NUIP
FROM SO_NUIP
WHERE NUIP IN (
SELECT NUIP
FROM SO_NUIP
GROUP BY NUIP
HAVING (COUNT(NUIP) > 1)
)
--CREATE FLAG FOR DETERMINIG DUPES
ALTER TABLE #NUIP ADD DUPLICATETOREMOVE bit
--USE `RANK()` TO SET FLAG
UPDATE #NUIP
SET DUPLICATETOREMOVE = CASE X.RANK
WHEN 1 THEN 1
ELSE 0
END
--SELECT *
FROM #NUIP A
INNER JOIN (SELECT NUIP,FECHA_REGISTRO,RANK() OVER (PARTITION BY [NUIP] ORDER BY FECHA_REGISTRO ASC) AS RANK
FROM #NUIP) X ON X.NUIP = A.NUIP AND X.FECHA_REGISTRO = A.FECHA_REGISTRO
--HERE IS YOUR DELETE LIST
SELECT *
FROM so_registros_civiles_nacimiento R
JOIN #NUIP N ON N.NUIP = R.NUIP AND N.FECHA_REGISTRO = R.FECHA_REGISTRO
WHERE N.DUPLICATETOREMOVE = 1
--HERE IS YOUR KEEP LIST
SELECT *
FROM so_registros_civiles_nacimiento R
JOIN #NUIP N ON N.NUIP = R.NUIP AND N.FECHA_REGISTRO = R.FECHA_REGISTRO
WHERE N.DUPLICATETOREMOVE = 0
--ZAP THEM AND COMMIT YOUR TRANSACTION, YOU'VE STILL GOT A REC OF THE DELETEDS FOR AS LONG AS THE SCOPE OF YOUR #NUIP
BEGIN TRAN --COMMIT --ROLLBACK
DELETE FROM so_registros_civiles_nacimiento
JOIN #NUIP N ON N.NUIP = R.NUIP AND N.FECHA_REGISTRO = R.FECHA_REGISTRO
WHERE N.DUPLICATETOREMOVE = 1

You can use analytical functions for this:
;WITH CTE AS
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY NUIP ORDER BY FECHA_REGISTRO DESC) RN
FROM registros_civiles_nacimiento
)
DELETE FROM CTE
WHERE RN > 1;

Use RANK() to create the result set ordered by date
Use WHERE EXISTS to delete from the source.
(Note: if you run the rank function over your duplicates, you should get your results. I've just referred to the whole table below)
This statement works in Oracle (replace the select * with delete if it works for you:
SELECT *
FROM registros_civiles_nacimiento ALL_
WHERE EXISTS
(SELECT * FROM
(SELECT * FROM
(SELECT NUIP,
FECHA_REGISTRO,
RANK() OVER (PARTITION BY NUIP ORDER BY FECHA_REGISTRO) AS ORDER_
FROM registros_civiles_nacimiento)
WHERE ORDER_ = 1) OLDEST
WHERE ALL_.NUIP = OLDEST.NUIP
AND ALL_.FECHA_REGISTRO = OLDEST.FECHA_REGISTRO);

Related

need to use update the alternate rows of the data getting by the below query. Not able to use widows function in update statement ms sql server

I need some help with below query: I want to update every alternate row of a table given some conditions, which includes multiple tables I am not able to use windows function under update how can I modify this query to work
UPDATE loanacct
SET
collection_officer_no =
(
CASE
WHEN
ROW_NUMBER()OVER (ORDER BY acctrefno) %2 = 0
THEN
4
ELSE
7
END
)
WHERE acctrefno in
(
SELECT
[acctrefno]
FROM
[NLS].[dbo].[loanacct] L
INNER JOIN nlsusers U ON U.userno = L.collection_officer_no
WHERE
U.username like 'house' AND
L.loan_group_no in ( '2', '4', '5') AND`enter code here`
L.days_past_due > 25 AND
status_code_no = 0)
You can use a updatable CTE. This is pseudo-SQL, but should get you on the right path:
WITH CTE AS(
SELECT {YourColumns},
ROW_NUMBER() OVER (/* PARTITION BY ??? */ ORDER BY {Column} AS RN
FROM YourTable
WHERE ...
)
UPDATE CTE
SET ...
WHERE RN % 2 = 0;

Update table with another column in the same table

I have a table like this
Test_order
Order Num Order ID Prev Order ID
987Y7OP89 919325 0
987Y7OP90 1006626 919325
987Y7OP91 1029350 1006626
987Y7OP92 1756689 0
987Y7OP93 1756690 0
987Y7OP94 1950100 1756690
987Y7OP95 1977570 1950100
987Y7OP96 2160462 1977570
987Y7OP97 2288982 2160462
Target table should be like below,
Order Num Order ID Prev Order ID
987Y7OP89 919325 0
987Y7OP90 1006626 919325
987Y7OP91 1029350 1006626
987Y7OP92 1756689 1029350
987Y7OP93 1756690 1756689
987Y7OP94 1950100 1756690
987Y7OP95 1977570 1950100
987Y7OP96 2160462 1977570
987Y7OP97 2288982 2160462
987Y7OP97 2288900 2288982
Prev Order ID should be updated with the Order ID from the previous record from the same table.
I'm trying to create a dummy data set and update..but it's not working..
WITH A AS
(SELECT ORDER_NUM, ORDER_ID, PRIOR_ORDER_ID,ROWNUM RID1 FROM TEST_ORDER),B AS (SELECT ORDER_NUM, ORDER_ID, PRIOR_ORDER_ID,ROWNUM+1 RID2 FROM TEST_ORDER)
SELECT A.ORDER_NUM,B.ORDER_ID,A.PRIOR_ORDER_ID,B.PRIOR_ORDER_ID FROM A,B WHERE RID1 = RID2
You could use Oracles Analytical Functions (also called Window functions) to pick up the value from the previous order:
UPDATE Test_Order
SET ORDERID = LAG(ORDERID, 1, 0) OVER (ORDER BY ORDERNUM ASC)
WHERE PrevOrderId = 0
See here for the documentation on LAG()
In sql-server you cannot use window function in update statement, not positive but don't think so in Oracle either. Anyway to get around that you can just update a cte as follows.
WITH cte AS (
SELECT
*
,NewPreviousOrderId = LAG(OrderId,1,0) OVER (ORDER BY OrderNum)
FROM
TableName
)
UPDATE cte
SET PrevOrderId = NewPreviousOrderId
And if you want to stick with the ROW_NUMBER route you were going this would be the way of doing it.
;WITH cte AS (
SELECT
*
,ROW_NUMBER() OVER (ORDER BY OrderNum) AS RowNum
FROM
TableName
)
UPDATE c1
SET PrevOrderId = c2.OrderId
FROM
cte c1
INNER JOIN cte c2
ON (c1.RowNum - 1) = c2.RowNum

Insert into using a default value if select returns null

I have a table with a non-null field I wish to populate from another table. Trouble is the query into the other table may return null. How do I get a value (0 will do) when the query returns null?
My query is:
update Packages
set PackageTypeId = (SELECT TOP 1 PackageTypeId
FROM PackageTypes
WHERE Packages.PackageTypeName = PackageTypes.Name
ORDER BY PackageTypeId ASC)
I tried using coalesce, but it still fails:
update Packages
set PackageTypeId = (SELECT TOP 1 coalesce(PackageTypeId, 0) as id
FROM PackageTypes
WHERE Packages.PackageTypeName = PackageTypes.Name
ORDER BY PackageTypeId ASC)
Any ideas?
update Packages
set PackageTypeId = coalesce((SELECT TOP 1 PackageTypeId FROM PackageTypes
WHERE Packages.PackageTypeName = PackageTypes.Name
ORDER BY PackageTypeId ASC), 0)
I recommend UPDATE FROM statement combined with ISNULL function:
UPDATE
Packages
SET
Packages.PackageTypeId = ISNULL(PackageTypes.PackageTypeId,0)
FROM
Packages
INNER JOIN
(
SELECT
PackageTypeId,
Name
FROM
(
SELECT
PackageTypeId,
Name,
ROW_NUMBER() OVER (PARTITION BY Name ORDER BY PackageTypeId ASC) R
FROM
PackageTypes
) X
WHERE
R = 1
) PackageTypes
ON
Packages.PackageTypeName = PackageTypes.Name
Note: Subquery return smallest PackageTypeId for each Name in PackageType

Need help creating SQL query from example of data

I have a database table below.
And I want to get list of all DBKey that have: at least one entry with Staled=1, and the last entry is Staled=0
The list should not contain DBKey that has only Staled=0 OR Staled=1.
In this example, the list would be: DBKey=2 and DBKey=3
I think this should do the trick:
SELECT DISTINCT T.DBKey
FROM TABLE T
WHERE
-- checks that the DBKey has at least one entry with Staled = 1
EXISTS (
SELECT DISTINCT Staled
FROM TABLE
WHERE DBKey = T.DBKey
AND Staled = 1
)
-- checks that the last Staled entry for this DBKey is 0
AND EXISTS (
SELECT DISTINCT Staled
FROM TABLE
WHERE DBKey = T.DBKey
AND Staled = 0
AND EntryDateTime = (
SELECT MAX(EntryDateTime)
FROM TABLE
WHERE DBKey = T.DBKey
)
)
Here is a working SQLFiddle of the query, using your sample data.
The idea is to use EXISTS to look for those individual conditions that you've described. I've added comments to my code to explain what each does.
Should be done with a simple JOIN... Starting FIRST with any 1 qualifiers, joined to itself by same key AND 0 staled qualifier AND the 0 record has a higher date. Ensure you have an index on ( DBKey, Staled, EntryDateTime )
SELECT
YT.DBKey,
MAX( YT.EntryDateTime ) as MaxStaled1,
MAX( YT2.EntryDateTime ) as MaxStaled0
from
YourTable YT
JOIN YourTable YT2
ON YT.DBKey = YT2.DBKey
AND YT2.Staled = 0
AND YT.EntryDateTime < YT2.EntryDateTime
where
YT.Staled = 1
group by
YT.DBKey
having
MAX( YT.EntryDateTime ) < MAX( YT2.EntryDateTime )
Maybe this:
With X as
(
Select Row_Number() Over (Partition By DBKey Order By EntryDateTime Desc) RN, DBKey, Staled
From table
)
Select *
From X
Where rn = 1 and staled = 0 and
Exists (select 1 from x x2 where x2.dbkey = x.dbkey and Staled = 1)

How to create a User Defined Function for SQL server

I have a fairly large stored procedure that I'm trying to simplify for readability.
It contains many union clauses with statements like the one shown below:
Fig.1
SELECT COUNT(1) AS Total
FROM Orders
WHERE (NOT EXISTS (
SELECT 1
FROM (
SELECT Id
FROM OrderLineItems
WHERE Orders.Id = Order_Id) AS Sub
WHERE EXISTS (
SELECT 1
FROM NormalizedLineItems
WHERE (Sub.Id = OrderLineItem_Id)
AND (OutOfStock = 1))))
AND (EXISTS (
SELECT 1 AS Total
FROM OrderShipments
WHERE (Orders.Id = Order_Id)
AND (CarrierApproved = 0)))
AND (IsQuote = 0)
AND (Cancelled = 0)
AND (Archived = 0)
AND (Completed = 0)
AND (Holding = 0)
However there are many reoccurring patterns in each statement.
The following pattern turns up several times Fig.2
Fig.2
WHERE (NOT EXISTS (
SELECT 1
FROM (
SELECT Id
FROM OrderLineItems
WHERE Orders.Id = Order_Id) AS Sub
WHERE EXISTS (
SELECT 1
FROM NormalizedLineItems
WHERE (Sub.Id = OrderLineItem_Id)
AND (OutOfStock = 1))))
I'm trying to (for readability purposes) reduce the code in the master stored procedure
So i thought id performance test a UDF, I've come up with the following Fig.3
Fig.3
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION TestFunction (#OrderId int)
RETURNS TABLE
AS
RETURN
(
-- Add the SELECT statement with parameter references here
SELECT 1 AS Total
FROM (
SELECT OrderLineItems.Id AS Id
FROM OrderLineItems
WHERE #OrderId = Order_Id) AS Sub
WHERE EXISTS (
SELECT 1 AS Total
FROM NormalizedLineItems
WHERE (Sub.Id = OrderLineItem_Id)
AND (OutOfStock = 1)))
GO
All though the above compiles, I'm not really sure I'm on the right track, I'm having all sorts of problems trying to apply the above UDF to the original query.
I am seeking a concrete example of how to abstract Fig.2 from Fig.1 into a UDF so I can at least performance-test the solution to see if it's worthwhile.
Note: I do know user defined functions can be a performance nightmare, however I'm not even at a stage where I can test.
Create an order_ids table with only one column named order_id
Insert into order_ids
select order_id from
FROM Orders
WHERE (NOT EXISTS (
SELECT 1
FROM (
SELECT Id
FROM OrderLineItems
WHERE Orders.Id = Order_Id) AS Sub
WHERE EXISTS (
SELECT 1
FROM NormalizedLineItems
WHERE (Sub.Id = OrderLineItem_Id)
AND (OutOfStock = 1))))
Then you can simplify your Sql like this:
SELECT COUNT(1) AS Total
FROM Orders
join order_ids
on order_ids.order_id = Orders.order_id
...
If your reoccurring statement only in one query,
Common Table Expression is the best choice:
with CTE_order_ids as
(select order_id from
FROM Orders
WHERE (NOT EXISTS (
SELECT 1
FROM (
SELECT Id
FROM OrderLineItems
WHERE Orders.Id = Order_Id) AS Sub
WHERE EXISTS (
SELECT 1
FROM NormalizedLineItems
WHERE (Sub.Id = OrderLineItem_Id)
AND (OutOfStock = 1))))
)
SELECT COUNT(1) AS Total
FROM Orders
join CTE_order_ids
on order_ids.order_id = Orders.order_id
...