Update specific columns in a table iteratively (Do a bulk update) - sql

My Table Schema is as follows:
Gender: char(1), not null
Last Name: varchar(25), null
First Name: varhcar(35), not null
The data in the table looks like:
Gender | Last Name | First Name |
M Doe John
F Marie Jane
M Jones Jameson
F Simpson Alice
I now am trying to update all the names in the table from the names present in the txt file.
My Query is as follows:
-- Sort out the Forenames we'll be using for the data, we make a #Name2 table because I have yet to figure our
-- inserting specific columns using BULK INSERT and without using a format file.
CREATE TABLE #Name (Name VARCHAR(50))
CREATE TABLE #ForeNames (FirstName VARCHAR(50), Gender VARCHAR(1))
-- Move data in the #Name2 table
BULK INSERT #Name FROM "c:\girlsforenames.txt" WITH (ROWTERMINATOR='\n')
-- Now move it to the forename table and add the gender
INSERT INTO #ForeNames SELECT [Name], 'F' FROM #Name
-- Delete the names from temporary table
TRUNCATE TABLE #Name
-- Same for the boys
BULK INSERT #Name FROM "c:\boysforenames.txt" WITH (ROWTERMINATOR='\n')
INSERT INTO #ForeNames SELECT [Name], 'M' FROM #Name
-- Now do the surnames
TRUNCATE TABLE #Name
BULK INSERT #Name FROM "c:\surnames.txt" WITH (ROWTERMINATOR='\n')
DECLARE #Counter BIGINT
SET #Counter = 4
WHILE (#Counter > 0)
BEGIN
UPDATE TableName
set
[last_name]= (SELECT TOP 1 FirstName from #ForeNames),
[first_name]=(SELECT TOP 1 Name FROM #Name ORDER BY NEWID()),
[gender]= ( SELECT TOP 1 Gender FROM #ForeNames ORDER BY NEWID());
SET #Counter=#Counter-1
END
DROP TABLE #Name
DROP TABLE #ForeNames
SELECT * FROM TableName
What Happens is all the rows in the table are updated with the same values and each time i execute the query they are updated with the new set of values.
What I want is to loop through each row and update it and den update the next row with the other set of random name. But here it is updating the same random name across all the rows of the table.
Any help would be appreciated.

Each SELECT statement is only being executed once in your example (and thus returning 1 result), and since your UPDATE isn't being limited, you're applying the same value to every row.
If you want to update each row with different values, you can use a CTE and the ROW_NUMBER() function to update rows at a time.
There's no need to loop, you can do it in one fell swoop:
WITH cte AS (SELECT *,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS n1
FROM TableName
)
UPDATE cte
SET FirstName = names.Name
FROM cte
JOIN (SELECT *,ROW_NUMBER() OVER (ORDER BY NEWID()) AS n2
FROM #name
)names
on cte.n1 = names.n2
Demo: SQL Fiddle
This example is just for the FirstName.

Related

SQL: efficiently append incremental number to string, avoiding duplicates

I have a set of records (table [#tmp_origin]) containing duplicate entries in a string field ([Names]). I would like to insert the whole content of [#tmp_origin] into the destination table [#tmp_destination], that does NOT allow duplicates and may already contain items.
If the string in the origin table does not exist in the destination table, then in is simply inserted in the destination table, as is.
If an entry in the destination table already exists with the same value of the entry in the original table, a string-ified incremental number must be appended to the string, before it is inserted in the destination table.
The process of moving data in this way has been implemented with a cursor, in this sample script:
-- create initial situation (origin and destination table, both containing items)
-- Begin
CREATE TABLE [#tmp_origin] ([Names] VARCHAR(10))
CREATE TABLE [#tmp_destination] ([Names] VARCHAR(10))
CREATE UNIQUE INDEX [IX_UniqueName] ON [#tmp_destination]([Names] ASC)
INSERT INTO [#tmp_origin]([Names]) VALUES ('a')
INSERT INTO [#tmp_origin]([Names]) VALUES ('a')
INSERT INTO [#tmp_origin]([Names]) VALUES ('b')
INSERT INTO [#tmp_origin]([Names]) VALUES ('c')
INSERT INTO [#tmp_destination]([Names]) VALUES ('a')
INSERT INTO [#tmp_destination]([Names]) VALUES ('a_1')
INSERT INTO [#tmp_destination]([Names]) VALUES ('b')
-- create initial situation - End
DECLARE #Name VARCHAR(10)
DECLARE NamesCursor CURSOR LOCAL FORWARD_ONLY FAST_FORWARD READ_ONLY FOR
SELECT [Names]
FROM [#tmp_origin];
OPEN NamesCursor;
FETCH NEXT FROM NamesCursor INTO #Name;
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #finalName VARCHAR(10)
SET #finalName = #Name
DECLARE #counter INT
SET #counter = 1
WHILE(1=1)
BEGIN
IF NOT EXISTS(SELECT * FROM [#tmp_destination] WHERE [Names] = #finalName)
BREAK;
SET #finalName = #Name + '_' + CAST(#counter AS VARCHAR)
SET #counter = #counter + 1
END
INSERT INTO [#tmp_destination] ([Names]) (
SELECT #finalName
)
FETCH NEXT FROM NamesCursor INTO #Name;
END
CLOSE NamesCursor;
DEALLOCATE NamesCursor;
SELECT *
FROM [#tmp_destination]
/*
Expected result:
a
a_1
a_2
a_3
b
b_1
c
*/
DROP TABLE [#tmp_origin]
DROP TABLE [#tmp_destination]
This works correctly, but its performance drastically slows down when the number of items to insert increases.
Any idea to speed it up?
thanks
Using a windowing function allows the duplicates to be numbered. You can also get the count from the destination table (will need where condition to strip off the suffix you've added):
select orig.names,
row_number() over (partition by orig.names order by orig.names) as rowNo,
dest.count
from ##tmp_origin orig
cross apply (select count(1) from #tmp_destination where names = orig.names) as dest
An insert can be built from the above (new suffix is rowNo + dest.count -1 if greater than zero).
Suggest you refactor the destination temporary table to include the name and suffix as separate columns – this might mean having a new intermediate stage – because this will make the matching logic much simpler.
Something like this:
insert [#tmp_destination]
select CASE WHEN row_number() over(partition by Names order by Names) > 1 THEN Names + '_' + CONVERT(VARCHAR(10), row_number() over(partition by Names order by Names)) ELSE Names END
from [#tmp_origin]
I wouldn't use a cursor in that case. Instead, I would build the query using ROW_NUMBER(). This way you add a counter in your original table, and then use this counter to append to your [Names]:
SELECT [Names], ROW_NUMBER() OVER (PARTITION BY [Names] ORDER BY [Names]) - 1 AS [counter]
INTO #tmp_origin_with_counter
FROM #tmp_origin
SELECT CONCAT([Names], IIF([counter] = 0, '', '_'+ CAST([counter] AS NVARCHAR)))
INTO #tmp_destination
FROM #tmp_origin_with_counter

Conditional SQL Insert

I have to write a. insert statement that looks at a table and inserts a record if the conditions are met. This is a one time thing so not overly concerned about it being efficient.
the table contains a work breakdown structure for a project ( each project having, a project level(wbs1), a phase level(wbs2) and a task level (wbs3)
that table looks like this
Wbs1 wbs2 wbs3 name
262 ProjectA
262 01 Data Analsys
262 01 01 Data cleansing
262 01 02 Data Transforming
I need to insert a phase(WBS2) to each project(WBS1) with an insert statement, for example adding a wbs2 "02" to each project(wbs1).
writing the insert statment is no problem and I select the data from the project level since most of it is redundant so no issue there, im just not sure how to have it loop through and add the phase to each project, since there are multiple rows with the same project(wbs1) number
insert statement sample
Insert into dbo.pr ([WBS1],[WBS2],[WBS3],[Name])
(Select [WBS1],'999',[WBS3],'In-House Expenses'
from dbo.pr where wbs1 = #ProjectID
and wbs2 ='')
How do i run this statement to inserta row every project?(wbs1)
hopefully this makes sense.
You can use a temporary table with an added RowNumber field and then a WHILE loop to handle the looping over each row. You can then run an IF EXISTS as a criteria check before running the stored procedure. See below for example
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
DECLARE #ProjectId NVARCHAR(50) = '262'
CREATE TABLE #Temp (RowNumber INT, wbs1 NVARCHAR(255), wbs2 NVARCHAR(255), wbs3 NVARCHAR(255), name NVARCHAR(255))
INSERT INTO #Temp
SELECT ROW_NUMBER() OVER (ORDER BY wbs1, wbs2, wbs3, name)
,pr.*
FROM pr
select *
from #temp
-- Create loop variables to handle incremeting
DECLARE #Counter INT = 1;
DECLARE #MaxLoop INT = (SELECT COUNT(wbs1) FROM #temp)
WHILE #Counter <= #MaxLoop
BEGIN
-- Use if Exists to check the current looped meets whatever critiera you have
IF EXISTS (SELECT 'true'
FROM #Temp
WHERE RowNumber = #Counter
AND wbs1 = #ProjectId
AND wbs2 = ''
)
BEGIN
Insert into pr (wbs1,wbs2,wbs3,name)
(Select [WBS1],'999',[WBS3],'In-House Expenses'
from #temp where RowNumber = #Counter)
END
-- Remember to increment the counter
SET #Counter = #Counter + 1;
END
SELECT *
FROM pr
drop table #temp

How to replace text in a string with values from a column in sql [duplicate]

This question already has an answer here:
How to replace a string with values from columns in a table in SQL
(1 answer)
Closed 9 years ago.
Exp Major Start
__________________________________________________________
| |
'My names are W.Major and W.Start' | Hal | Bark
___________________________________|________|_________________
'W.Major is a doctor' | Mark | Slope
___________________________________|________|_______________
Hi All suppose I have the table above in SQL server management studio
and for any text in the Exp column I want to replace W.Major with the value in the Major column and wherever there is a W.Start I want to replace it with the value in the Start column.
Do you know what type of SP I have to write to get this accomplished?
Well you can use a Dynamic SQL and UNPIVOT to get table with all unit replacements and then you run a while loop to replace expressions one by one till you get your result.
I am sure there can be better techniques but here is my solution.
Please mark it as answer for my effort :)
IF OBJECT_ID ('tempdb.dbo.#temptable') IS NOT NULL DROP TABLE #temptable
CREATE TABLE #tempTable ( ID INT IDENTITY(1, 1), [Exp] nvarchar(4000) not NULL, replacementWord nvarchar(50) not null, Wordvalue nvarchar(50) not null, flag bit null, ReplacedExp nvarchar(4000) null)
DECLARE #query VARCHAR(4000)
DECLARE #queryRWords VARCHAR(2000)
SELECT #queryRWords =
STUFF((select DISTINCT '],['+ LTRIM(C.name) from sys.Columns C INNER JOIN sys.objects O on O.object_id=C.object_id where O.name='yourtable' and O.type_desc='USER_TABLE' and C.name not like 'Exp' ORDER BY '],['+LTRIM(C.name) FOR XML PATH('') ),1,2,'') + ']'
SET #query='INSERT INTO #tempTable(Exp, replacementWord, WordValue) select [Exp], [replacementWord],[WordValue] FROM (SELECT [Exp], [major],[start] FROM [yourtable]) p
UNPIVOT([Wordvalue] FOR [replacementWord] IN ('+#queryRWords+'))AS unpvt'
EXECUTE(#query)
UPDATE #tempTable SET ReplacedExp=[Exp]
DECLARE #ExpCount INT
SELECT #ExpCount= COUNT(*) FROM #tempTable
WHILE #ExpCount >0
BEGIN
IF((SELECT [Exp] From #tempTable where Id=#ExpCount)<>(SELECT [Exp] from #tempTable where Id=(#ExpCount-1)))
BEGIN
UPDATE #tempTable SET FLAG=1, ReplacedExp= REPLACE(ReplacedExp, CAST('W.'+replacementWord AS VARCHAR), WordValue) FROM #tempTable WHERE Id=#ExpCount
END
ELSE
BEGIN
UPDATE #tempTable SET ReplacedExp= REPLACE(ReplacedExp, CAST('W.'+replacementWord AS VARCHAR), WordValue) FROM #tempTable WHERE Id=#ExpCount
UPDATE #tempTable SET ReplacedExp= (SELECT ReplacedExp FROM #temptable where Id=#ExpCount) WHERE Id=(#ExpCount-1)
END
SET #ExpCount=#ExpCount-1
END
UPDATE #tempTable
SET flag=1 where Id=1
SELECT ReplacedExp FROM #tempTable where flag=1
Here is the sample TSQL where you can have a good start
SELECT Exp, Major, Start
, Replace(Replace(Exp, 'W.Major', Major), 'W.Start', Start) As Result
FROM [Your Table Name]

How to add a row number to new table in SQL?

I'm trying to create a new table using an existing table already using:
INSERT INTO NewTable (...,...)
SELECT * from SampleTable
What I need to is add a record number at the beginning or the end, it really doesn't matter as long as it's there.
Sample Table
Elizabeth RI 02914
Emily MA 01834
Prospective New Table
1 Elizabeth RI 02914
2 Emily MA 01834
Is that at all possible?
This is what I ultimately I'm shooting for... except right now those tables aren't the same size because I need my ErrorTemporaryTable to have a column in which the first row has a number which increments by the previous one by one.
declare #counter int
declare #ClientMessage varchar(255)
declare #TestingMessage carchar(255)
select #counter = (select count(*) + 1 as counter from ErrorValidationTesting)
while #counter <= (select count(*) from ErrorValidationTable ET, ErrorValidationMessage EM where ET.Error = EM.Error_ID)
begin
insert into ErrorValidationTesting (Validation_Error_ID, Program_ID, Displayed_ID, Client_Message, Testing_Message, Create_Date)
select * from ErrorTemporaryTable
select #counter = #counter + 1
end
You can use into clause with IDENTITY column:
SELECT IDENTITY(int, 1,1) AS ID_Num, col0, col1
INTO NewTable
FROM OldTable;
Here is more information
You can also create table with identity field:
create table NewTable
(
id int IDENTITY,
col0 varchar(30),
col1 varchar(30)
)
and insert:
insert into NewTable (col0, col1)
SELECT col0, col1
FROM OldTable;
or if you have NewTable and you want to add new column see this solution on SO.
INSERT INTO NewTable (...,...)
SELECT ROW_NUMBER() OVER (ORDER BY order_column), * from SampleTable
If you are in SQL Server
INSERT INTO newTable (idCol, c1,c2,...cn)
SELECT ROW_NUMBER() OVER(ORDER BY c1), c1,c2,...cn
FROM oldTable
Try this query to insert 1,2,3... Replace MyTable and ID with your column names.
DECLARE #myVar int
SET #myVar = 0
UPDATE
MyTable
SET
ID = #myvar ,
#myvar = #myVar + 1

Delete duplicate records in SQL Server?

Consider a column named EmployeeName table Employee. The goal is to delete repeated records, based on the EmployeeName field.
EmployeeName
------------
Anand
Anand
Anil
Dipak
Anil
Dipak
Dipak
Anil
Using one query, I want to delete the records which are repeated.
How can this be done with TSQL in SQL Server?
You can do this with window functions. It will order the dupes by empId, and delete all but the first one.
delete x from (
select *, rn=row_number() over (partition by EmployeeName order by empId)
from Employee
) x
where rn > 1;
Run it as a select to see what would be deleted:
select *
from (
select *, rn=row_number() over (partition by EmployeeName order by empId)
from Employee
) x
where rn > 1;
Assuming that your Employee table also has a unique column (ID in the example below), the following will work:
delete from Employee
where ID not in
(
select min(ID)
from Employee
group by EmployeeName
);
This will leave the version with the lowest ID in the table.
Edit
Re McGyver's comment - as of SQL 2012
MIN can be used with numeric, char, varchar, uniqueidentifier, or datetime columns, but not with bit columns
For 2008 R2 and earlier,
MIN can be used with numeric, char, varchar, or datetime columns, but not with bit columns (and it also doesn't work with GUID's)
For 2008R2 you'll need to cast the GUID to a type supported by MIN, e.g.
delete from GuidEmployees
where CAST(ID AS binary(16)) not in
(
select min(CAST(ID AS binary(16)))
from GuidEmployees
group by EmployeeName
);
SqlFiddle for various types in Sql 2008
SqlFiddle for various types in Sql 2012
You could try something like the following:
delete T1
from MyTable T1, MyTable T2
where T1.dupField = T2.dupField
and T1.uniqueField > T2.uniqueField
(this assumes that you have an integer based unique field)
Personally though I'd say you were better off trying to correct the fact that duplicate entries are being added to the database before it occurs rather than as a post fix-it operation.
DELETE
FROM MyTable
WHERE ID NOT IN (
SELECT MAX(ID)
FROM MyTable
GROUP BY DuplicateColumn1, DuplicateColumn2, DuplicateColumn3)
WITH TempUsers (FirstName, LastName, duplicateRecordCount)
AS
(
SELECT FirstName, LastName,
ROW_NUMBER() OVER (PARTITIONBY FirstName, LastName ORDERBY FirstName) AS duplicateRecordCount
FROM dbo.Users
)
DELETE
FROM TempUsers
WHERE duplicateRecordCount > 1
WITH CTE AS
(
SELECT EmployeeName,
ROW_NUMBER() OVER(PARTITION BY EmployeeName ORDER BY EmployeeName) AS R
FROM employee_table
)
DELETE CTE WHERE R > 1;
The magic of common table expressions.
Try
DELETE
FROM employee
WHERE rowid NOT IN (SELECT MAX(rowid) FROM employee
GROUP BY EmployeeName);
If you're looking for a way to remove duplicates, yet you have a foreign key pointing to the table with duplicates, you could take the following approach using a slow yet effective cursor.
It will relocate the duplicate keys on the foreign key table.
create table #properOlvChangeCodes(
id int not null,
name nvarchar(max) not null
)
DECLARE #name VARCHAR(MAX);
DECLARE #id INT;
DECLARE #newid INT;
DECLARE #oldid INT;
DECLARE OLVTRCCursor CURSOR FOR SELECT id, name FROM Sales_OrderLineVersionChangeReasonCode;
OPEN OLVTRCCursor;
FETCH NEXT FROM OLVTRCCursor INTO #id, #name;
WHILE ##FETCH_STATUS = 0
BEGIN
-- determine if it should be replaced (is already in temptable with name)
if(exists(select * from #properOlvChangeCodes where Name=#name)) begin
-- if it is, finds its id
Select top 1 #newid = id
from Sales_OrderLineVersionChangeReasonCode
where Name = #name
-- replace terminationreasoncodeid in olv for the new terminationreasoncodeid
update Sales_OrderLineVersion set ChangeReasonCodeId = #newid where ChangeReasonCodeId = #id
-- delete the record from the terminationreasoncode
delete from Sales_OrderLineVersionChangeReasonCode where Id = #id
end else begin
-- insert into temp table if new
insert into #properOlvChangeCodes(Id, name)
values(#id, #name)
end
FETCH NEXT FROM OLVTRCCursor INTO #id, #name;
END;
CLOSE OLVTRCCursor;
DEALLOCATE OLVTRCCursor;
drop table #properOlvChangeCodes
delete from person
where ID not in
(
select t.id from
(select min(ID) as id from person
group by email
) as t
);
Please see the below way of deletion too.
Declare #Employee table (EmployeeName varchar(10))
Insert into #Employee values
('Anand'),('Anand'),('Anil'),('Dipak'),
('Anil'),('Dipak'),('Dipak'),('Anil')
Select * from #Employee
Created a sample table named #Employee and loaded it with given data.
Delete aliasName from (
Select *,
ROW_NUMBER() over (Partition by EmployeeName order by EmployeeName) as rowNumber
From #Employee) aliasName
Where rowNumber > 1
Select * from #Employee
Result:
I know, this is asked six years ago, posting just incase it is helpful for anyone.
Here's a nice way of deduplicating records in a table that has an identity column based on a desired primary key that you can define at runtime. Before I start I'll populate a sample data set to work with using the following code:
if exists (select 1 from sys.all_objects where type='u' and name='_original')
drop table _original
declare #startyear int = 2017
declare #endyear int = 2018
declare #iterator int = 1
declare #income money = cast((SELECT round(RAND()*(5000-4990)+4990 , 2)) as money)
declare #salesrepid int = cast(floor(rand()*(9100-9000)+9000) as varchar(4))
create table #original (rowid int identity, monthyear varchar(max), salesrepid int, sale money)
while #iterator<=50000 begin
insert #original
select (Select cast(floor(rand()*(#endyear-#startyear)+#startyear) as varchar(4))+'-'+ cast(floor(rand()*(13-1)+1) as varchar(2)) ), #salesrepid , #income
set #salesrepid = cast(floor(rand()*(9100-9000)+9000) as varchar(4))
set #income = cast((SELECT round(RAND()*(5000-4990)+4990 , 2)) as money)
set #iterator=#iterator+1
end
update #original
set monthyear=replace(monthyear, '-', '-0') where len(monthyear)=6
select * into _original from #original
Next I'll create a Type called ColumnNames:
create type ColumnNames AS table
(Columnnames varchar(max))
Finally I will create a stored proc with the following 3 caveats:
1. The proc will take a required parameter #tablename that defines the name of the table you are deleting from in your database.
2. The proc has an optional parameter #columns that you can use to define the fields that make up the desired primary key that you are deleting against. If this field is left blank, it is assumed that all the fields besides the identity column make up the desired primary key.
3. When duplicate records are deleted, the record with the lowest value in it's identity column will be maintained.
Here is my delete_dupes stored proc:
create proc delete_dupes (#tablename varchar(max), #columns columnnames readonly)
as
begin
declare #table table (iterator int, name varchar(max), is_identity int)
declare #tablepartition table (idx int identity, type varchar(max), value varchar(max))
declare #partitionby varchar(max)
declare #iterator int= 1
if exists (select 1 from #columns) begin
declare #columns1 table (iterator int, columnnames varchar(max))
insert #columns1
select 1, columnnames from #columns
set #partitionby = (select distinct
substring((Select ', '+t1.columnnames
From #columns1 t1
Where T1.iterator = T2.iterator
ORDER BY T1.iterator
For XML PATH ('')),2, 1000) partition
From #columns1 T2 )
end
insert #table
select 1, a.name, is_identity from sys.all_columns a join sys.all_objects b on a.object_id=b.object_id
where b.name = #tablename
declare #identity varchar(max)= (select name from #table where is_identity=1)
while #iterator>=0 begin
insert #tablepartition
Select distinct case when #iterator=1 then 'order by' else 'over (partition by' end ,
substring((Select ', '+t1.name
From #table t1
Where T1.iterator = T2.iterator and is_identity=#iterator
ORDER BY T1.iterator
For XML PATH ('')),2, 5000) partition
From #table T2
set #iterator=#iterator-1
end
declare #originalpartition varchar(max)
if #partitionby is null begin
select #originalpartition = replace(b.value+','+a.type+a.value ,'over (partition by','') from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1
select #partitionby = a.type+a.value+' '+b.type+a.value+','+b.value+') rownum' from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1
end
else
begin
select #originalpartition=b.value +','+ #partitionby from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1
set #partitionby = (select 'OVER (partition by'+ #partitionby + ' ORDER BY'+ #partitionby + ','+b.value +') rownum'
from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1)
end
exec('select row_number() ' + #partitionby +', '+#originalpartition+' into ##temp from '+ #tablename+'')
exec(
'delete a from _original a
left join ##temp b on a.'+#identity+'=b.'+#identity+' and rownum=1
where b.rownum is null')
drop table ##temp
end
Once this is complied, you can delete all your duplicate records by running the proc. To delete dupes without defining a desired primary key use this call:
exec delete_dupes '_original'
To delete dupes based on a defined desired primary key use this call:
declare #table1 as columnnames
insert #table1
values ('salesrepid'),('sale')
exec delete_dupes '_original' , #table1