SQL - Need to duplicate rows but with a unique primary key - sql

I need to duplicate some rows in a table but the primary key needs to be unique. When I try to do it this way I get a "Violation of PRIMARY KEY constraint"
INSERT INTO Company.Customer
SELECT CustomerId, FirstName, LastName
FROM Company.Customer
WHERE LastName LIKE '%JONES%';
CustomerId in this example is the primary key. So the values in FirstName and LastName need to stay the same but the CustomerId needs to change
Thanks!

Based on your comments it seems that CustomerId is not IDENTITY column. This makes things trickier, because it is unclear how your primary keys are generated. For simple case you can use such approach:
-- Retrieve maximum value of CustomerId
DECLARE #maxid int = 0
SELECT #maxid = MAX(CustomerId) FROM Company.Customer
-- When inserting data for column CustomerId add maximum id value and row number
-- This should ensure that the key values do not clash
INSERT INTO Company.Customer (CustomerId, FirstName, LastName)
SELECT ROW_NUMBER() OVER (ORDER BY CustomerId ASC) + #maxid, FirstName, LastName
FROM Company.Customer
WHERE LastName LIKE '%JONES%';
However, if you don't have a reason not to, I suggest using IDENTITY column - It will make things easier to handle.

Leave the primary key "CustomerId" out of select portion of your query... It should be auto generated for you on the inserted rows.
INSERT INTO Customer (FirstName, LastName)
(SELECT FirstName, LastName
FROM Customer
WHERE LastName LIKE '%JONES%')

Related

SQL Server HASHBYTES function returning un

I'm using the HASHBYTES function in T-SQL to generate an MD5 hash of some data, but I am getting some unexpected results, even though hashing the same data. What am I doing wrong here?
For demonstration purposes I'll create a table and insert a random guid as the 'CustomerId' and a random email address as the 'EmailAddress'. The 'ConcatHash' is a computed column which should create an MD5 hash of the two columns joined together by the pipe character. So it's easier to see whats going on I have also added a ConcatColumn so you can see what the CONCAT_WS is doing.
CREATE TABLE dbo.CustomerTest
(
CustomerId UNIQUEIDENTIFIER NOT NULL
, EmailAddress VARCHAR(255) NOT NULL
, ConcatColumn AS (CONCAT_WS('|', CustomerId, EmailAddress))
, ConcatHash AS (HASHBYTES('MD5', CONCAT_WS('|', CustomerId, EmailAddress))) PERSISTED
)
GO
INSERT INTO dbo.CustomerTest
VALUES
('8E38101D-988E-4BF1-B8F1-E8E0B8DAA891', 'a1jfapedu#adhoc-orange.com')
GO
SELECT * FROM dbo.CustomerTest
Here is the result...
I'll now query the same data from a different table, using CONCAT_WS and HASHBYTES in exactly the same way as I did previously.
SELECT CustomerId
, Email
, CONCAT_WS('|', CustomerId, Email) As ConcatColumn
, HASHBYTES('MD5', CONCAT_WS('|', CustomerId, Email)) AS ConcatHash
FROM dbo.Customers
WHERE CustomerId = '8E38101D-988E-4BF1-B8F1-E8E0B8DAA891'
Here is the result...
Here are the results side-by-side, and you can see the data is the same, the concatanated data is the same, yet the MD5 is different...
To save you the trouble of looking at the 'ConcatColumn' column letter by letter, I have already verified they are identical. So why is the MD5 hash different?
varchar and nvarchar columns do not produce the same hash results...
-- Setup demo data...
create table dbo.Customers1 (
CustomerId varchar(255),
Email varchar(255),
);
insert dbo.Customers1 (CustomerId, Email) values
('8E38101D-988E-4BF1-B8F1-E8E0B8DAA891', 'a1jfapedu#adhoc-orange.com');
create table dbo.Customers2 (
CustomerId varchar(255),
Email nvarchar(255),
);
insert dbo.Customers2 (CustomerId, Email) values
('8E38101D-988E-4BF1-B8F1-E8E0B8DAA891', 'a1jfapedu#adhoc-orange.com');
-- Query data...
SELECT CustomerId
, Email
, HASHBYTES('MD5', CONCAT_WS('|', CustomerId, Email)) AS ConcatHash
FROM dbo.Customers1
WHERE CustomerId = '8E38101D-988E-4BF1-B8F1-E8E0B8DAA891'
SELECT CustomerId
, Email
, HASHBYTES('MD5', CONCAT_WS('|', CustomerId, Email)) AS ConcatHash
FROM dbo.Customers2
WHERE CustomerId = '8E38101D-988E-4BF1-B8F1-E8E0B8DAA891'
Which yields...
CustomerId
Email
ConcatHash
8E38101D-988E-4BF1-B8F1-E8E0B8DAA891
a1jfapedu#adhoc-orange.com
0xB3CF062CD2FAB8601A1B58E53D1F705B
and...
CustomerId
Email
ConcatHash
8E38101D-988E-4BF1-B8F1-E8E0B8DAA891
a1jfapedu#adhoc-orange.com
0xFACC935D24A15B73B4F6B864D3BA536

Delete duplicate rows with soundex?

I have two tables, one has foreign keys to the other. I want to delete duplicates from Table 1 at the same time updating the keys on Table 2. I.e count the duplicates on Table 1 keep 1 key from the duplicates and query the rest of the duplicate records on Table 2 replacing them with the key I'm keeping from Table 1. Soundex would be the best option because not all the names are spelled right in Table 1. I have the basic algorithm but not sure how to do it. Help?
So far this is what I have:
declare #Duplicate int
declare #OriginalKey int
create table #tempTable1
(
CourseID int, <--- The Key I want to keep or delete
SchoolID int,
CourseName nvarchar(100),
Category nvarchar(100),
IsReqThisYear bit,
yearrequired int
);
create table #tempTable2
(
CertID int,
UserID int,
CourseID int, <---- Must stay updated with Table 1
SchoolID int,
StartDateOfCourse datetime,
EndDateOfCourse datetime,
Type nvarchar(100),
HrsOfClass float,
Category nvarchar(100),
Cost money,
PassFail varchar(20),
Comments nvarchar(1024),
ExpiryDate datetime,
Instructor nvarchar(200),
Level nchar(10)
)
--Deletes records from Table 1 not used in Table 2--
delete from Table1
where CourseID not in (select CourseID from Table2 where CourseID is not null)
insert into #tempTable1(CourseID, SchoolID, CourseName, Category, IsReqThisYear, yearrequired)
select CourseID, SchoolID, CourseName, Category, IsReqThisYear, yearrequired from Table1
insert into #tempTable2(CertID, UserID, CourseID, SchoolID, StartDateOfCourse, EndDateOfCourse, Type, HrsOfClass,Category, Cost, PassFail, Comments, ExpiryDate, Instructor, Level)
select CertID, UserID, CourseID, SchoolID, StartDateOfCourse, EndDateOfCourse, Type, HrsOfClass,Category, Cost, PassFail, Comments, ExpiryDate, Instructor, Level from Table2
select cour.CourseName, Count(cour.CourseName) cnt from Table1 as cour
join #tempTable1 as temp on cour.CourseID = temp.CourseID
where SOUNDEX(temp.CourseName) = SOUNDEX(cour.CourseName) <---
The last part does not exactly work, gives me an error
Error: Column 'Table1.CourseName' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
UPDATE: Some of the names in CourseName have numbers in them too. Like some are in romans and numeral format. Need to find those too but Soundex ignores numbers.

How do I populate the identity column when inserting into a table?

I have a table whose data is inserted by selecting from another table. For example,
CREATE TABLE TestTable (ID int, FirstName VARCHAR(100), LastName VARCHAR(100))
INSERT INTO TestTable (FirstName, LastName)
SELECT FirstName, LastName
FROM Person.Contact
But How do I populate the ID? I tried
INSERT INTO TestTable (SCOPE_IDENTITY()+1, FirstName, LastName)
SELECT FirstName, LastName
FROM Person.Contact
I don't want to make ID as identity column is because I duplicate the table structure from an exsiting one which the ID is regular column.
But it doesn't work. Any idea?
This depends on the exact SQL server you're using, but since it appears you're using Microsoft SQL Server:
Simply labeling your column as ID is not enough. You'll need to make sure that the ID column is marked in SQL Server as an Identity column. This is similar to marking a column SERIAL in PostgreSQL, or AUTOINCREMENT in MySQL. Make sure you've done this first.
Assuming you've done that, simply let the database itself add the identity value by explicitly not referencing that column in your INSERT statement. Thus, something like
INSERT INTO TestTable (FirstName, LastName)
SELECT FirstName, LastName
FROM Person.Contact
and relying on SQL Server's underlying identity support to fill it in for you will work fine. It looks from your example like the thing you're missing is marking the ID column as an identity column in the first place.
They are right in that you should specify a autonumber column. As below:
CREATE TABLE TestTable
(
ID int NOT NULL IDENTITY (1, 1),
Firstname varchar(100) NULL,
Lastname varchar(100) NULL
)
Then when you insert, use the following:
DECLARE #MyTableVar table( ID int);
INSERT INTO TestTable
OUTPUT INSERTED.ID
INTO #MyTableVar
SELECT FirstName, LastName
FROM Person.Contact;
Then you select #MyTableVar for the identities you inserted
Is below what you are looking for?
SELECT
id + 0
,FirstName
,LastName
INTO test_table
From Person.Contact

versioning of a table

anybody has seen any examples of a table with multiple versions for each record
something like if you would had the table
Person(Id, FirstName, LastName)
and you change a record's LastName than you would have both versions of LastName (first one, and the one after the change)
I've seen this done two ways. The first is in the table itself by adding an EffectiveDate and CancelDate (or somesuch). To get the current for a given record, you'd do something like: SELECT Id, FirstName, LastName FROM Table WHERE CancelDate IS NULL
The other is to have a global history table (which holds all of your historical data). The structure for such a table normally looks something like
Id bigint not null,
TableName nvarchar(50),
ColumnName nvarchar(50),
PKColumnName nvarchar(50),
PKValue bigint, //or whatever datatype
OriginalValue nvarchar(max),
NewValue nvarchar(max),
ChangeDate datetime
Then you set a trigger on your tables (or, alternatively, add a policy that all of your Updates/Inserts will also insert into your HX table) so that the correct data is logged.
The way we're doing it (might not be the best way) is to have an active bit field, and a foreign key back to the parent record. So for general queries you would filter on active employees, but you can get the history of a single employee with their Employee ID.
declare #employees
(
PK_emID int identity(1,1),
EmployeeID int,
FirstName varchar(50),
LastName varchar(50),
Active bit,
FK_EmployeeID int
primary key(PK_emID)
)
insert into #employees
(
EmployeeID,
FirstName,
LastName,
Active,
FK_EployeeID
)
select 1, 'David', 'Engle', 1,null
union all
select 2, 'Amy', 'Edge', 0,null
union all
select 2, 'Amy','Engle',1,2

How to insert sequential numbers in primary key using select subquery?

I am reading a table A and inserting the date in Table B (both tables are of same structure except primary key data type). In Table B, Primary key is int whereas in Table A it is UniqueIdentifier.
INSERT INTO TableB
(ID, Names, Address)
(select ID, Names, Address from TableA)
Now how can i insert int type incremental value (1,2,3,so on) in TableB instead of uniqueidentifier from TableA using above script.
Help?
Why not change Table B so that the primary key is an identity which auto-increments?
Go to the table properties, select the ID field, under "Identity specification", set "Identity Increment" = 1, "Identity Seed" = 1. By doing that, the ID becomes auto incremental...
Then your insert statement would be something like:
INSERT INTO TableB (Names, Address) (select Names, Address from TableA)
If changing the schema of your TableB is not an option then add a rank to your select statement like this:
insert into tableB select rank() over(order by id), name, address from tableA
This will always start at 1. I you could add + 10 if you wanted to start your numbering at a number other than 1. I'm sure you get the idea from there.
CREATE TABLE TableB
(
ID int PRIMARY KEY IDENTITY(1,1),
Name nvarchar(200),
Address nvarchar(200)
)
Then, in the query, don't specify the value of the identity column.
INSERT INTO TableB(Name, Address)
SELECT Name, Address FROM TableA