I have a SQL.Table
CREATE TABLE [dbo].[Stack_Example](
[ID] [bigint] NOT NULL,
[Product_ID] [bigint] NULL,
[Quantity] [decimal](18, 2) NULL,
[Price] [decimal](18, 2) NULL,
CONSTRAINT [PK_Stack_Example] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
With data like shown under:
INSERT [dbo].[Stack_Example] ([ID], [Product_ID], [Quantity], [Price]) VALUES (1, 25, CAST(55.00 AS Decimal(18, 2)), CAST(1000.00 AS Decimal(18, 2)))
GO
INSERT [dbo].[Stack_Example] ([ID], [Product_ID], [Quantity], [Price]) VALUES (2, 25, CAST(1.00 AS Decimal(18, 2)), CAST(1000.00 AS Decimal(18, 2)))
GO
INSERT [dbo].[Stack_Example] ([ID], [Product_ID], [Quantity], [Price]) VALUES (3, 26, CAST(1.00 AS Decimal(18, 2)), CAST(500.00 AS Decimal(18, 2)))
GO
So my problem here is i need to group those items by Price, Product_Id and SUM(Quantity).
There is need for Update/Delete query assuming my output window need to looks like
Its simple when you do a select query
select Product_ID,SUM(Quantity) AS Quantity,Price from Stack_Example
Group by Product_ID,Price
So at very begin what i need to do is to delete the row with id : 2 And update the row with id 1 set Quantity = Quantity + 1 ( Quantity of deleted row ).
So when i run the select query without grouping and summing i need to get the same output
UPDATE Stack_Example SET Quantity = (SELECT SUM(Quantity)
FROM Stack_Example child
WHERE child.Product_ID = Stack_Example.Product_ID
GROUP BY Product_ID)
DELETE FROM Stack_Example WHERE ID IN (SELECT TOP 1 ID
FROM Stack_Example
WHERE Product_Id IN ( (SELECT TOP 1 Product_ID
FROM Stack_Example
GROUP BY Product_ID
HAVING COUNT(Product_ID)>1)))
First update your table with total amount of products with same price
with cte as (
select
Product_ID, Price, Quantity = sum(Quantity)
from
Stack_Example
group by Product_ID, Price
)
update s
set s.Quantity = c.Quantity
from
Stack_Example s
join cte c on s.Product_ID = c.Product_ID and s.Price = c.Price
Then run another script to delete duplicate rows
with cte as (
select
*, row_number() over (partition by Product_ID, Price order by Product_ID) rn
from
Stack_Example
)
delete from cte where rn > 1
I'm a bit unclear - but couldn't you just create a view with the aggrgates?
Anyhow.....
Not sure why you would do this but...you could insert the aggregated values and then delete the older ones....as per below.
DECLARE #MaxId INT;
DECLARE #ProductId = 25;
--GET THE MAX ID OF PRODUCT IN QUESTION
SELECT #MaxId = MAX(ID) FROM Stack_Example WHERE Product_Id = #ProductId;
--INSERT THE AGGREGATE VALUES INTO TABLE
INSERT INTO Stack_Example
SELECT #MaxId + 1, #ProductId, Price, SUM(Quantity)
FROM Stack_Example
WHERE
Product_Id = #ProductId
GROUP BY
#MaxId + 1, [Product_ID], [Price];
--DELETE THE PRE-AGGREGATE VALUES
DELETE FROM Stack_Example WHERE ProductId = #ProductId and Id <= #MaxId;
Well, I used aggregates and additional temporary table to achieve that:
--group records and insert them into temporary table
select distinct
Min(id) over (partition by product_id, price) [ID],
Product_id,
sum(Quantity) over (partition by product_id, price) [Quantity],
Price
into #new_stack_example
from #Stack_Example
--delete old values
delete #Stack_example
--insert new values
insert into #Stack_example
select * from #new_stack_example
First SELECT statement reflects your logic:
So my problem here is i need to group those items by Price, Product_Id and SUM(Quantity).
But instead of grouping, I use window functions and then select distinct records.
Check this:
UPDATE Stack_Example SET Quantity = a.Quantity
from ( select Product_ID,sum(Quantity) Quantity
from Stack_Example Group by Product_ID) a
where Stack_Example.Product_ID=a.Product_ID
;with cte as (
select *,ROW_NUMBER () over (partition by product_id order by product_id) rn
from Stack_Example)
delete from cte where rn > 1
Related
I having a table LedgerData and need to update the Balance.
Table Structure
CREATE TABLE [dbo].[LedgerData]
(
[Id] INT NOT NULL,
[CustomerId] INT NOT NULL,
[Credit] INT,
[Debit] INT,
[Balance] INT
CONSTRAINT [PK_dbo_LedgerData]
PRIMARY KEY CLUSTERED ([Id] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,
IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON,
ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY];
Sample Data
INSERT INTO [dbo].[LedgerData] VALUES (1, 1, 50, 0, 0);
INSERT INTO [dbo].[LedgerData] VALUES (2, 1, 0, 25, 0);
INSERT INTO [dbo].[LedgerData] VALUES (3, 2, 0, 75, 0);
INSERT INTO [dbo].[LedgerData] VALUES (4, 1, 0, 10, 0);
INSERT INTO [dbo].[LedgerData] VALUES (5, 2, 5, 0, 0);
INSERT INTO [dbo].[LedgerData] VALUES (6, 1, 10, 25, 0);
I tried to update the balance column customer wise ORDER BY [Id] ASC, but its not updating as expected. Also I explored the sql query to calculate sum and add sum from previous rows
Please assist me to calculate the balance column Balance = (Previous Row Balance + Credit - Debit)
Ideally this is something you should be doing as you INSERT the data, by getting the previous value (and locking the table so that other INSERT statements can't occur to avoid races) and then supplying a value for the Balance. You can, however, UPDATE all the rows with a cumulative SUM and an updatable CTE:
WITH CTE AS(
SELECT ID,
CustomerID,
Balance,
0 + SUM(Credit) OVER (PARTITION BY CustomerID ORDER BY ID) - SUM(Debit) OVER (PARTITION BY CustomerID ORDER BY ID) AS NewBalance
FROM dbo.LedgerData)
UPDATE CTE
SET Balance = NewBalance;
GO
SELECT *
FROM dbo.LedgerData;
Alternatively, don't store the aggregate value at all, and use a VIEW so that the value can always be calculated (accurately) with the same expression I have used in the CTE. For example:
CREATE VIEW dbo.LedgerDataCumulative
AS
SELECT Id,
CustomerId,
Credit,
Debit,
SUM(Credit) OVER (PARTITION BY CustomerID ORDER BY ID) - SUM(Debit) OVER (PARTITION BY CustomerID ORDER BY ID) AS Balance
FROM dbo.LedgerData;
GO
The update can be performed with a single window function.
with upd_cte as (
select *, sum([Credit]-[Debit]) over (partition by customerId order by id) sum_over
from #LedgerData)
update upd_cte
set Balance=sum_over;
I have two tables:
Sales table:
Returns table:
I have to loop through the Sales table and get sum of all the Qty based on Material+Batch+customer combination until it exceeds the value of Return_qty, and update the Summed value in the Returns table.
This is the desired output:
As you can see, from the Sales table until Sales_Invoice 4 only it considered as it exceeded the value of return_Qty.
What I have tried till now?
I have tried to use while loop to loop through and calculate running total. But its not working out. Maybe approach is wrong.
Any inputs will be highly appreciated.
Try this:
DECLARE #Sales TABLE
(
[Sales_Invoice] SMALLINT
,[Invoice_Date] DATE
,[Material] VARCHAR(3)
,[Batch] VARCHAR(2)
,[Customer] VARCHAR(4)
,[Qty] SMALLINT
);
DECLARE #Returns TABLE
(
[Return_Invoice] SMALLINT
,[Invoice_Date] DATE
,[Material] VARCHAR(3)
,[Batch] VARCHAR(2)
,[Customer] VARCHAR(4)
,[Return_Qty] SMALLINT
,[Sales_Qty] SMALLINT
);
INSERT INTO #Sales ([Sales_Invoice], [Invoice_Date], [Material], [Batch], [Customer], [Qty])
VALUES (1, '2019-06-07', 'AB1', 'B1', 'B001', 50)
,(2, '2019-06-07', 'AB1', 'B1', 'B001', 20)
,(3, '2019-06-06', 'AB1', 'B1', 'B001', 25)
,(4, '2019-06-06', 'AB1', 'B1', 'B001', 11)
,(5, '2019-06-06', 'AB1', 'B1', 'B001', 20)
,(6, '2019-06-01', 'BA2', 'C1', 'Y001', 100);
INSERT INTO #Returns ([Return_Invoice], [Invoice_Date], [Material], [Batch], [Customer], [Return_Qty])
VALUES (212, '2019-06-08', 'AB1', 'B1', 'B001', 100);
WITH DataSource AS
(
SELECT [Material], [Batch], [Customer]
,SUM([Qty]) OVER (PARTITION BY [Material], [Batch], [Customer] ORDER BY [Sales_Invoice] ASC) AS [Return_Qty]
FROM #Sales
)
UPDATE #Returns
SET [Sales_Qty] = DS.[Return_Qty]
FROM #Returns R
INNER JOIN
(
SELECT [Material], [Batch], [Customer]
,MIN([Return_Qty]) AS [Return_Qty]
FROM DataSource
WHERE [Return_Qty] >= 100
GROUP BY [Material], [Batch], [Customer]
) DS
ON R.[Material] = DS.[Material]
AND R.[Batch] = DS.[Batch]
AND R.[Customer] = DS.[Customer];
SELECT *
FROM #Returns;
If you want to be more dynamical, you can use the following:
WITH DataSource AS
(
SELECT [Material], [Batch], [Customer]
,SUM([Qty]) OVER (PARTITION BY [Material], [Batch], [Customer] ORDER BY [Sales_Invoice] ASC) AS [Return_Qty]
FROM #Sales
)
UPDATE #Returns
SET [Sales_Qty] = DataSource.[Return_Qty]
FROM #Returns R
CROSS APPLY
(
SELECT DS.[Material], DS.[Batch], DS.[Customer]
,MIN(DS.[Return_Qty]) AS [Return_Qty]
FROM DataSource DS
WHERE DS.[Return_Qty] >= R.[Return_Qty]
AND R.[Material] = DS.[Material]
AND R.[Batch] = DS.[Batch]
AND R.[Customer] = DS.[Customer]
GROUP BY [Material], [Batch], [Customer]
) DataSource;
you should really show your while statement in your post - can you do that please?
I think a common table expression using recursion is a good solution for you. something along the lines of ...
;
WITH
cte1 AS
(
SELECT
RANK() OVER
(ORDER BY S.Material, S.Batch, S.Customer) GroupId,
RANK() OVER
(
PARTITION BY S.Material, S.Batch, S.Customer,
ORDER BY S.INVOICE_Date) Seqn,
S.Material, S.Batch, S.Customer, S.qty, R.Return_qty
FROM
Sales S
JOIN
Returns R
ON S.Material = R.Material AND S.Batch = R.Batch AND S.Customer = R.Customer
),
cte2 AS
(
SELECT
GroupId, Seqn,Material, Batch, Customer, qty AS TriggeringQty, Return_qty
FROM cte1
WHERE seqn =1
UNION ALL
SELECT
cte1.GroupId, cte1.Seqn, cte1.Material, cte1.Batch, cte1.Customer,
cte1.qty + cte2.qty, cte1.Return_qty
FROM cte2
JOIN cte1
ON cte1.GroupId = cte2.GroupID AND cte1.seqn = cte2.seqn+1
WHERE
cte2.qty < 100 AND cte1.seqn + cte2.seqn+1 >= Return_qty )
UPDATE R
SET R.Sales_qty = cte2.triggeringqty
FROM Returns R
JOIN cte2 S ON
S.Material = R.Material AND S.Batch = R.Batch AND S.Customer = R.Customer
WHERE cte2.triggeringqty >= 100;
Sorry I haven't tried the above so probably won't run, but hopefully you see what's happening.
I'm struggling to think of a way to do this with T-SQL.
I have a table which is populated every 5 seconds with the prices of three currencies (GBP, EUR & USD)
I've created a trigger (after insert), which selects the last 5 records entered for a given currency:
SELECT TOP 5 Price from dbo.prices where coin='GBP' ORDER BY Date Desc
I want to determine if the last inserted currency price is greater than the selected 5 above, how do i do this?
Thanks
As I guess: there cant be two entries for the same currency at one time. Only one insert per currency per some time (5sec). So this should fit yours requirements:
declare #prices table ([Date] int IDENTITY(1,1) primary key, Price float, coin varchar(3));
insert into #prices (coin, Price) values
('GBP', 3.20),('EUR', 3.14),('USD', 3.14),
('GBP', 3.17),('EUR', 3.16),('USD', 3.11),
('GBP', 3.14),('EUR', 3.13),('USD', 3.16),
('GBP', 3.15),('EUR', 3.12),('USD', 3.17),
('GBP', 3.16),('EUR', 3.17),('USD', 3.11),
('GBP', 3.15),('EUR', 3.14),('USD', 3.12),
('GBP', 3.19),('EUR', 3.14),('USD', 3.16)
select
case
when NEW.Price > PREV.Price Then 'yes'
else 'No'
end as CURR_JUMP_UP
from
(
select top 1 COALESCE(Price,0) Price, [Date]
from #prices where coin='GBP' order by [Date] desc
) NEW
cross apply
(
select MAX(Price) Price from
(
select top 5 Price
from #prices
where coin='GBP' and [Date]<NEW.[Date]
order by [Date] desc
) t
) PREV
Try this query:
DECLARE #AmountLastFiveEntry DECIMAL= (SELECT TOP 5 SUM(Price) FROM dbo.prices WHERE
ID NOT IN (SELECT TOP 1 ID
FROM dbo.prices where coin='GBP' ORDER BY Date Desc) where coin='GBP' ORDER BY Date Desc)
IF #AmountLastFiveEntry<(SELECT TOP 1 Price
FROM dbo.prices where coin='GBP' ORDER BY Date Desc)
BEGIN
SELECT #AmountLastFiveEntry --To do task
END
Trigger part is confusing
This will report if the latest price is higher (or equal) to the largest of the prior 5.
declare #currency table (iden int IDENTITY(1,1) primary key, exchange smallint, coin tinyint);
insert into #currency (coin, exchange) values
(1, 1)
, (1, 2)
, (1, 3)
, (1, 4)
, (1, 5)
, (1, 6)
, (2, 1)
, (2, 2)
, (2, 3)
, (2, 4)
, (2, 5)
, (2, 3);
select cccc.coin, cccc.exchange
, case when cccc.rn = cccc.rne then 'yes'
else 'no'
end as 'high'
from ( select ccc.iden, ccc.coin, ccc.exchange, ccc.rn
, ROW_NUMBER() over (partition by ccc.coin order by ccc.exchange desc, ccc.rn) rne
from ( select cc.iden, cc.coin, cc.exchange, cc.rn
from ( select c.iden, c.coin, c.exchange
, ROW_NUMBER() over (partition by coin order by iden desc) as rn
from #currency c
) cc
where cc.rn <= 6
) ccc
) cccc
where cccc.rn = 1
order by cccc.coin
I have some subqueries that retreives the same values for each PolicyNumber. How can I substitute repeated value with '-' and only display it one in a top row for each policy?
Right now I have this:
But I need something like this:
SELECT
-------------/* GrossPremium*/
(SELECT ISNULL(SUM(tblFin_InvoiceDetails.AmtBilled), 0)
FROM tblFin_InvoiceDetails WITH (NOLOCK)
WHERE (tblFin_InvoiceDetails.ChargeType = 'P')
AND (tblFin_InvoiceDetails.InvoiceNum = INV.InvoiceNum))
AS GrossPremium
--------------/*CompanyCommissionPercentage*/
,((SELECT ISNULL(SUM(tblFin_InvoiceDetails.MGAAmt), 0)
FROM tblFin_InvoiceDetails
WHERE (tblFin_InvoiceDetails.ChargeType = 'P')
AND (tblFin_InvoiceDetails.InvoiceNum = INV.InvoiceNum))
+
CASE WHEN INV.Remitter = 'B' then
(SELECT ISNULL(SUM(tblFin_InvoiceDetails.RemitterAmt), 0)
FROM tblFin_InvoiceDetails
WHERE (tblFin_InvoiceDetails.ChargeType = 'P')
AND (tblFin_InvoiceDetails.InvoiceNum = INV.InvoiceNum))----------------RemitterCommission
ELSE
(SELECT ISNULL(SUM(tblFin_InvoicedItemsPayees.PayeeAmt), 0)
FROM tblFin_InvoicedItemsPayees
INNER JOIN tblFin_PolicyCharges pc on pc.ChargeCode = tblFin_InvoicedItemsPayees.ChargeCode and pc.chargeType = 'P'
WHERE (tblFin_InvoicedItemsPayees.InvoiceNum = INV.InvoiceNum and tblFin_InvoicedItemsPayees.PayeeGuid = INV.ProducerLocationGuid))
END) * 100 /
NULLIF((SELECT ISNULL(SUM(tblFin_InvoiceDetails.AmtBilled), 0)
FROM tblFin_InvoiceDetails WITH (NOLOCK)
WHERE (tblFin_InvoiceDetails.ChargeType = 'P')
AND (tblFin_InvoiceDetails.InvoiceNum = INV.InvoiceNum)),0)
AS CompanyCommissionPercentage
FROM [tblFin_PayablesWorking] PW
INNER JOIN tblFin_Invoices INV ON PW.InvoiceNumber=INV.InvoiceNum
Well since you do not mention you full query and Table schema, i will give your answer with two simple example.If you want replace your repeated value with - the follow this query(please change the columns name according to your needs).
IF YOU WANT TO SELECT YOUR EXISTING TABLE:
;with ts as (
select S1.[ProductID], row_number() over (partition by S1.[ProductID] order by S1.[ProductID]) as seqnum
from (SELECT [SalesID],[ProductID] FROM [Sales]) AS S1 --Replace 'SELECT [ProductID] FROM [Sales]' with your Subquery and change the column accordingly
)
SELECT
(case when seqnum = 1 then [ProductID] ELSE '-' end) as [ProductID]
FROM ts
FOR USING SUBQUERY:
--CREATE TABLE [dbo].[Sales](
-- [SalesID] [uniqueidentifier] NOT NULL DEFAULT (newid()),
-- [ProductID] [int] NOT NULL,
-- [EmployeeID] [int] NOT NULL,
-- [Quantity] [smallint] NOT NULL,
-- [SaleDate] [datetime] NOT NULL CONSTRAINT [DF_SaleDate] DEFAULT (getdate()),
-- CONSTRAINT [PK_SalesID] PRIMARY KEY CLUSTERED
--(
-- [SalesID] ASC
--)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
--) ON [PRIMARY]
--GO
--INSERT [dbo].[Sales] ([SalesID], [ProductID], [EmployeeID], [Quantity], [SaleDate]) VALUES (N'9498d566-e31b-4ac8-ab54-1c898471fba8', 2, 1, 1, CAST(N'2012-03-01 00:00:00.000' AS DateTime))
--INSERT [dbo].[Sales] ([SalesID], [ProductID], [EmployeeID], [Quantity], [SaleDate]) VALUES (N'69c7dff4-fbac-48d3-ae0a-5027c816acd2', 2, 2, 2, CAST(N'2012-04-01 00:00:00.000' AS DateTime))
--INSERT [dbo].[Sales] ([SalesID], [ProductID], [EmployeeID], [Quantity], [SaleDate]) VALUES (N'a40b9505-4a2c-4186-a89b-88a401248a58', 1, 1, 4, CAST(N'2012-02-01 00:00:00.000' AS DateTime))
--INSERT [dbo].[Sales] ([SalesID], [ProductID], [EmployeeID], [Quantity], [SaleDate]) VALUES (N'04856027-d7ad-40fe-889b-8d933595ffde', 3, 1, 2, CAST(N'2012-02-01 00:00:00.000' AS DateTime))
--INSERT [dbo].[Sales] ([SalesID], [ProductID], [EmployeeID], [Quantity], [SaleDate]) VALUES (N'173be2de-3b80-4a3d-8bcc-a74d0d70b3a9', 3, 2, 1, CAST(N'2012-03-01 00:00:00.000' AS DateTime))
--GO
;with ts as (
SELECT
JOIN1.[SalesID] AS [SalesID]
, JOIN1.[ProductID]
, JOIN1.seqnum AS seqnum
, JOIN2.[EmployeeID], JOIN2.seqnum2 AS seqnum2
FROM
(
select row_number() over (order by S1.[SalesID] asc) as RowNumber
, S1.[SalesID] AS [SalesID]
, S1.[ProductID] AS [ProductID]
, row_number() over (partition by S1.[ProductID] order by S1.[SalesID]) as seqnum
from (SELECT [SalesID],[ProductID] FROM [Sales]) AS S1 --Replace 'SELECT [ProductID] FROM [Sales]' with your Subquery ( For Example GrossPremium) and change the column accordingly. Remember you need some thing common for Iner join, in this case [SalesID]
)AS JOIN1
INNER JOIN
(
select row_number() over (order by S2.[SalesID] asc) as RowNumber
, S2.[SalesID] AS [SalesID]
, S2.[EmployeeID] AS [EmployeeID]
, row_number() over (partition by S2.[EmployeeID] order by S2.[SalesID]) as seqnum2
from (SELECT [SalesID],[EmployeeID] FROM [Sales]) AS S2 --Replace 'SELECT [[SalesID]] FROM [Sales]' with your Subquery ( For Example CompanyCommissionPercentage) and change the column accordingly. Remember you need some thing common for Iner join, in this case [SalesID]
)AS JOIN2
ON JOIN1.[SalesID]=JOIN2.[SalesID]
)
SELECT
(case when seqnum = 1 then [ProductID] ELSE '-' end) as [ProductID]
,(case when seqnum2 = 1 then [EmployeeID] ELSE '-' end) as [EmployeeID]
FROM (Select TOP 10000000 *FROM ts ORDER BY [SalesID] ASC ) AS ts -- Mentioning TOP is Must, or it will give Error
I do know why you are using - instead of NULL, - will take space
if you wanted to do it in SQL (you probably shouldn't it's pretty ugly) you could do something like this using LAG(). It relies on having a field that you can use to sort the records for each Policy Number, in my dummy data below I included a field called RecordID to do this.
SELECT
PolicyNumber
,CASE
WHEN LAG(GrossPremium) OVER(PARTITION BY PolicyNumber ORDER BY RecordID) IS NULL
THEN CAST(GrossPremium AS VARCHAR(MAX))
ELSE '-'
END GrossPremium
,CASE
WHEN LAG(CompanyComissionPercentage) OVER(PARTITION BY PolicyNumber ORDER BY RecordID) IS NULL
THEN CAST(CompanyComissionPercentage AS VARCHAR(MAX))
ELSE '-'
END CompanyComissionPercentage
,CASE
WHEN LAG(RemitterCommissionPercentage) OVER(PARTITION BY PolicyNumber ORDER BY RecordID) IS NULL
THEN CAST(RemitterCommissionPercentage AS VARCHAR(MAX))
ELSE '-'
END RemitterCommissionPercentage
,CASE
WHEN LAG(RemitterCommission) OVER(PARTITION BY PolicyNumber ORDER BY RecordID) IS NULL
THEN CAST(RemitterCommission AS VARCHAR(MAX))
ELSE '-'
END GrossCommission
,CASE
WHEN LAG(RemitterCommission) OVER(PARTITION BY PolicyNumber ORDER BY RecordID) IS NULL
THEN CAST(RemitterCommission AS VARCHAR(MAX))
ELSE '-'
END GrossCommission
FROM
(
-- Dummy data
SELECT
1234 PolicyNumber -- Partition the LAG() on the policy number.
,1 RecordID -- use this to order the LAG() function.
,8749.00 GrossPremium
,18 CompanyComissionPercentage
,10 RemitterCommissionPercentage
,874.90 RemitterCommission
,1574.82 GrossCommission
UNION ALL
SELECT
1234
,2 RecordID
,8749.00
,18
,10
,874.90
,1574.82
UNION ALL
SELECT
5678
,1 RecordID
,8749.00
,18
,10
,874.90
,1574.82
) x;
I've inherited a SQL Server database that has duplicate data in it. I need to find and remove the duplicate rows. But without an id field, I'm not sure how to find the rows.
Normally, I'd compare it with itself using a LEFT JOIN and check that all fields are the same except the ID field would be table1.id <> table2.id, but without that, I don't know how to find duplicates rows and not have it also match on itself.
TABLE:
productId int not null,
categoryId int not null,
state varchar(255) not null,
dateDone DATETIME not null
SAMPLE DATA
1, 3, "started", "2016-06-15 04:23:12.000"
2, 3, "started", "2016-06-15 04:21:12.000"
1, 3, "started", "2016-06-15 04:23:12.000"
1, 3, "done", "2016-06-15 04:23:12.000"
In that sample, only rows 1 and 3 are duplicates.
How do I find duplicates?
Use having (and group by)
select
productId
, categoryId
, state
, dateDone
, count(*)
from your_table
group by productId ,categoryId ,state, dateDone
having count(*) >1
You can do this with windowing functions. For instance
create table #tmp
(
Id INT
)
insert into #tmp
VALUES (1), (1), (2) --so now we have duplicated rows
WITH CTE AS
(
SELECT
ROW_NUMBER() OVER(PARTITION BY Id ORDER BY Id) AS [DuplicateCounter],
Id
FROM #tmp
)
DELETE FROM CTE
WHERE DuplicateCounter > 1 --duplicated rows have DuplicateCounter > 1
For some reason I thought you wanted to delete them I guess I read that wrong but just switch DELETE in my statement to SELECT and now you have all of the duplicates and not the original. But using DELETE will remove all duplicates and still leave you 1 record which I suspect is your desire.
IF OBJECT_ID('tempdb..#TT') IS NOT NULL
BEGIN
DROP TABLE #TT
END
CREATE TABLE #TT (
productId int not null,
categoryId int not null,
state varchar(255) not null,
dateDone DATETIME not null
)
INSERT INTO #TT (productId, categoryId, state, dateDone)
VALUES (1, 3, 'started', '2016-06-15 04:23:12.000')
,(2, 3, 'started', '2016-06-15 04:21:12.000')
,(1, 3, 'started', '2016-06-15 04:23:12.000')
,(1, 3, 'done', '2016-06-15 04:23:12.000')
SELECT *
FROM
#TT
;WITH cte AS (
SELECT
*
,RowNum = ROW_NUMBER() OVER (PARTITION BY productId, categoryId, state, dateDone ORDER BY productId) --note what you order by doesn't matter
FROM
#TT
)
--if you want to delete them just do this otherwise change DELETE TO SELECT
DELETE
FROM
cte
WHERE
RowNum > 1
SELECT *
FROM
#TT
If you want to and can change schema you can always add an identity column after the fact too and it will populate the existing record
ALTER TABLE #TT
ADD Id INTEGER IDENTITY(1,1) NOT NULL
You can try CTE and then limit the actual selection from the CTE to where RN = 1. Here is the query:-
;WITH ACTE
AS
(
SELECT ProductID, categoryID, State, DateDone,
RN = ROW_NUMBER() OVER(PARTITION BY ProductID, CategoryID, State, DateDone
ORDER BY ProductID, CategoryID, State, DateDone)
FROM [Table]
)
SELECT * FROM ACTE WHERE RN = 1