SQL How to find if all values from one field exist in another field in any order - sql

I am trying to match data from an external source to an in house source. For example one table would have a field with a value of "black blue" and another table would have a field with a value of "blue black". I am trying to figure out how to check if all individual words in the first table are contained in a record the 2nd table in any order. It's not always two words that need to be compared it could be 3 or 4 as well. I know I could use a cursor and build dynamic sql substituting the space with the AND keywod and using the contains function but I'm hoping not to have to do that.
Any help would be much appreciated.

Try doing something like this: Split the data from the first table on the space into a temporary table variable. Then use CHARINDEX to determine if each word is contained in the second table's record. Then just do this for each word in the first record and if the count is the same as the successful checks then you know every word from the first record is used in the second.
Edit: Use a Split function such as:
CREATE FUNCTION dbo.Split (#sep char(1), #s varchar(512))
RETURNS table
AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT pn,
SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END) AS s
FROM Pieces
)

Here's another method you could try, you could sample some simple attributes of your strings such as, length, number of spaces, etc.; then you could use a cross-join to create all of the possible string match combinations.
Then within your where-clause you can sort by matches, the final piece of which in this example is a check using the patindex() function to see if the sampled piece of the first string is in the second string.
-- begin sample table variable set up
declare #s table(
id int identity(1,1)
,string varchar(255)
,numSpace int
,numWord int
,lenString int
,firstPatt varchar(255)
);
declare #t table(
id int identity(1,1)
,string varchar(255)
,numSpace int
,numWord int
,lenString int
);
insert into #t(string)
values ('my name');
insert into #t(string)
values ('your name');
insert into #t(string)
values ('run and jump');
insert into #t(string)
values ('hello my name is');
insert into #s(string)
values ('name my');
insert into #s(string)
values ('name your');
insert into #s(string)
values ('jump and run');
insert into #s(string)
values ('my name is hello');
update #s
set numSpace = len(string)-len(replace(string,' ',''));
update #s
set numWord = len(string)-len(replace(string,' ',''))+1;
update #s
set lenString = len(string);
update #s
set firstPatt = rtrim(substring(string,1,charindex(' ',string,0)));
update #t
set numSpace = len(string)-len(replace(string,' ',''));
update #t
set numWord = len(string)-len(replace(string,' ',''))+1;
update #t
set lenString = len(string);
-- end sample table variable set up
-- select all combinations of strings using a cross join
-- and sort the entries in your where clause
-- the pattern index checks to see if the sampled string
-- from the first table variable is in the second table variable
select *
from
#s s cross join #t t
where
s.numSpace = t.numspace
and s.numWord = t.numWord
and s.lenString = t.lenString
and patindex('%'+s.firstPatt+'%',t.string)>0;

Related

Generating a unique batch id (SQL Server)

This is possible 2x questions in 1x. Sorry about that, but here goes:
PROBLEM
I am creating a unique batch id everytime a user uploads some data to SQL Server. Currently, I do this by looking at the last value of the 'Identity Specification' and add +1 to that.
Problem arises, as you might have guessed, if multiple users input data at the same, they both would get the same batch id...
Possible Solution
In order to mitigate this issue, I have come up with this method to generate 3 letter + random number; and the (last id value + 1):
DECLARE #tmp CHAR(3) = CHAR(CAST(RAND()*26 AS int)+65) + CHAR(CAST(RAND()*26 AS int)+65) + CHAR(CAST(RAND()*26 AS int)+65);
SELECT #tmp;
select cast(RAND()*9999 as int)
(1) I am not sure how to concatenate this into one line of string.
(2) The other question, is there a way to 100% guarantee every user is given a unique batch id every time they submit a request, regardless of how many are doing it simultaneously?
I would really appreciate your input in this.
1 - Concatenation part is very simple, you can do the following:
DECLARE #tmp VARCHAR(10);
SET #tmp = CHAR(CAST(RAND()*26 AS int)+65)
+ CHAR(CAST(RAND()*26 AS int)+65)
+ CHAR(CAST(RAND()*26 AS int)+65)
+ CAST(cast(RAND()*9999 as int) AS VARCHAR(4));
SELECT #tmp;
2 - I would suggest to populate a table with the Random values you would like to issue to users and then select from it, to avoid the race-condition.
Create a table called BatchNumbers with two Columns BatchNumber and Used.
Populate the batch number table and 0 as default value for Used Column.
Then everytime you need a batch number do the following.
CREATE PROC dbo.usp_Get_BatchNumber
#BatchNumber VARCHAR(10) OUTPUT
AS
BEGIN
SET NOCOUNT ON;
Declare #t TABLE (BN VARCHAR(10));
UPDATE TOP (1) BatchNumbers
SET Used = 1
OUTPUT inserted.BatchNumber INTO #t (BN )
WHERE Used = 0;
SELECT #BatchNumber = BN FROM #t;
END
You need an "Upload" table with a Bigint Identity column for the BatchID, then add a new row for every user upload.
The server will maintain the correct values and prevent collisions.
I would use the built in function for this:
select newid()
> 240CA878-135E-4176-AE57-0FA83FF74037
For the first problem, you can either create a variable for your random number as a char(4) and just simply concatenate the 2, or create it as an int and then CAST it as a VARCHAR while concatenating. Everything that is concatenated into a string must be a string.
DECLARE #tmp CHAR(3) = CHAR(CAST(RAND()*26 AS int)+65) + CHAR(CAST(RAND()*26 AS int)+65) + CHAR(CAST(RAND()*26 AS int)+65);
SELECT #tmp;
DECLARE #randNum VARCHAR(4) = CAST(RAND()*9999 AS INT)
-- OR DECLARE #randNum INT = CAST(Rand()*9999) AS INT)
SELECT #randNum
DECLARE #batchID VARCHAR(MAX) = #tmp + #randNum
-- OR DECLARE #batchID VARCHAR(MAX) = #tmp + CAST(#randNum AS VARCHAR)
SELECT #batchID
try the following:
1)
DECLARE #tmp CHAR(7) = CHAR(CAST(RAND()*26 AS int)+65) + CHAR(CAST(RAND()*26 AS int)+65) + CHAR(CAST(RAND()*26 AS int)+65) + cast(cast(RAND()*9999 as int) as varchar(4));
SELECT #tmp;
2) Yes, I think so.
I upvoted Terry Carmen's answer, but from his comments it sounds like he's suggesting something different from what I first thought, so here's a complete example. I think you want a table that has a key defined with the IDENTITY property, which will tell SQL Server that you want unique, sequential values in that column and you want the database to worry about the details of guaranteeing that this is so.
create table dbo.Import
(
-- identity(1, 1) means that SQL Server will automatically assign values for
-- this column when you insert a record, with 1 being the first value
-- assigned and each subsequent value incrementing by 1.
Identifier bigint not null identity(1, 1),
-- This column for illustration only; replace it with whatever data you need
-- to store.
YourStuffHere varchar(max)
);
-- Now simply use any INSERT or MERGE command against dbo.Import, and omit the
-- Identifier column from the list of columns whose values the command supplies.
-- Then you can use the SCOPE_IDENTITY() function or an OUTPUT clause to capture
-- the Identifier value that SQL Server has inserted.
-- Example 1: INSERT with explicit values and OUTPUT.
insert dbo.Import
(YourStuffHere)
output
inserted.Identifier
values
('Example 1');
-- Example 2: INSERT/SELECT with OUTPUT.
insert dbo.Import
(YourStuffHere)
output
inserted.Identifier
select
'Example 2';
-- Example 3: INSERT with SCOPE_IDENTITY().
insert dbo.Import
(YourStuffHere)
values
('Example 3');
select Identifier = convert(bigint, scope_identity());
-- Show table contents.
select * from dbo.Import;
The first INSERT statement above produces the following result:
Identifier
1
The second:
Identifier
2
The SELECT following the third INPUT gives:
Identifier
3
And the final SELECT shows you the contents of the table:
Identifier YourStuffHere
1 Example 1
2 Example 2
3 Example 3
This is the easiest way to go about this as it allows SQL Server to do all the real work for you. Please let me know if I've misunderstood your requirements.

How to store multiple values in a SQL Server variable

I want to store values from a SELECT statement into a variable which is capable of holding more than one value because my SELECT statement returns multiple values of type INT. This is how my SP looks like so far.
ALTER PROCEDURE "ESG"."SP_ADD"
AS
BEGIN
DECLARE #Id table(identifiers VARCHAR);
INSERT INTO #Id (identifiers) VALUES('axaa1aaa-aaaa-a5aa-aaaa-aa8aaaa9aaaa');
INSERT INTO #Id (identifiers) VALUES('bxbb1bbb-bbbb-b5bb-bbb4-bb8bbbb9bbbf');
DECLARE #tranID INT = (SELECT
DOCUMENT_SET_.DOCUMENT_SET_TRANSACTION_ID
FROM DOCUMENT_SET_TRANSACTION
WHERE DOCUMENT_SET_TRANSACTION.IDENTIFIER IN (SELECT identifiers FROM #Id));
END
Variable #tranID should be a list or an array to hold the ids. Is it possible to do it SQL Server?
You can declare a variable of type table
DECLARE #tblTrans TABLE (
tranID INT
);
INSERT INTO #tblTrans
SELECT DOCUMENT_SET_TRANSACTION.DOCUMENT_SET_TRANSACTION_ID
FROM ESG.DOCUMENT_SET_TRANSACTION
WHERE DOCUMENT_SET_TRANSACTION.IDENTIFIER
IN (SELECT identifiers FROM #envelopeId);
Depending on what you want to do with the values after this, you could declare a cursor to loop through them or select straight from the variable.
You could also look into using a temporary table depending on what scope you need.
Try this, only take the firs row of example. Do u try this?
select DOCUMENT_SET_TRANSACTION.DOCUMENT_SET_TRANSACTION_ID,
(STUFF((SELECT '-' + convert(varchar(max),DOCUMENT_SET_TRANSACTION.DOCUMENT_SET_TRANSACTION_ID)
FROM ESG.DOCUMENT_SET_TRANSACTION
FOR XML PATH ('')), 1, 2, '')) AS example
FROM ESG.DOCUMENT_SET_TRANSACTION

SQL Merge Statement - Output into a scalar variable (SQL Server)

I'm getting my head around the MERGE statement in SQL server. I generally use it to insert/update a single row, which I realise isn't the only use, but it's something I seem to do quite often.
But what happens if you want to insert a value of 1, or update to increment the value and output the incremented value eg:
CREATE TABLE [Counter] (
[Key] VARCHAR(255) NOT NULL PRIMARY KEY,
[Value] INT NOT NULL
);
DECLARE #paramKey VARCHAR(255);
SET #paramKey = 'String';
MERGE [Counter] AS targt
USING (Values(#paramKey)) AS source ([Key])
ON (targt.[Key] = source.[Key])
WHEN MATCHED THEN
UPDATE SET Value = Value +1
WHEN NOT MATCHED THEN
INSERT ([Key], Value)
VALUES (source.[Key], 1);
-- but now I want the new value!
Is there a way of doing this? I notice the output clause in https://msdn.microsoft.com/en-gb/library/bb510625.aspx but it doesn't seem to work with scalars (I could output to a single row-ed table variable but that seems wrong):
-- using table variables but seems
DECLARE #paramKey VARCHAR(255), #value int;
SET #paramKey = 'String'
DECLARE #Tab table (
[Value] INT
)
MERGE Counter AS targt
USING (Values(#paramKey)) AS source ([Key])
ON (targt.[Key] = source.[Key])
WHEN MATCHED THEN
UPDATE SET Value = Value +1
WHEN NOT MATCHED THEN
INSERT ([Key], Value)
VALUES (source.[Key], 1)
OUTPUT inserted.[Value] INTO #Tab;
-- can now use #Tab as a single rowed table variable
Is there a better option?

Extracting specific column values embedded within composite Strings of codes

I am trying to create a piece of code in sql server 2008 that will grab specific values from each distinct string within my dbo table. The ultimate goal is to make a drop down box within Visual Studio so that one can choose all lines from the database that contain a specific product code (see definition of product code below). Example strings:
in_0314_95pf_500_w_0315
in_0314_500_95pf_0315_w
The part of these strings I am wishing to identify is the 3 digit numeric code (in this case let us call it product code) that appears once within each string. There are roughly 300 different product codes.
The problem is that these product code values do not appear in the same position within each unique string. Hence, I am having a hard time determining the product code because I can't use substring, charindex, like, etc.
Any ideas? Any help is MUCH appreciated.
This can be done with PATINDEX:
DECLARE #s NVARCHAR(100) = 'in_0314_95pf_500_w_0315'
SELECT SUBSTRING(#s, PATINDEX('%[_][0-9][0-9][0-9][_]%', #s) + 1, 3)
Output:
500
If there are no underscores then:
SELECT SUBSTRING(#s, PATINDEX('%[^0-9][0-9][0-9][0-9][^0-9]%', #s) + 1, 3)
This means 3 digits between any symbols that are not digits.
EDIT:
Apply to table like:
SELECT SUBSTRING(ColumnName, PATINDEX('%[^0-9][0-9][0-9][0-9][^0-9]%', ColumnName) + 1, 3)
FROM TableName
One approach is to use a String splitting table function like this one which breaks the string up into its components. You can then filter the components based on your criteria:
SELECT Name
FROM dbo.splitstring('in_0314_95pf_500_w_0315', '_')
WHERE ISNUMERIC(Name) = 1 AND LEN(Name) = 3;
I've amended the function slightly to accept the delimiter as a parameter.
CREATE FUNCTION dbo.splitstring ( #stringToSplit VARCHAR(MAX), #delimiter VARCHAR(50))
RETURNS
#returnList TABLE ([Name] [nvarchar] (500))
AS
BEGIN
DECLARE #name NVARCHAR(255)
DECLARE #pos INT
WHILE CHARINDEX(#delimiter, #stringToSplit) > 0
BEGIN
SELECT #pos = CHARINDEX(#delimiter, #stringToSplit)
SELECT #name = SUBSTRING(#stringToSplit, len(#delimiter), #pos-len(#delimiter))
INSERT INTO #returnList
SELECT #name
SELECT #stringToSplit = SUBSTRING(#stringToSplit, #pos+LEN(#delimiter),
LEN(#stringToSplit)-#pos)
END
INSERT INTO #returnList
SELECT #stringToSplit
RETURN
END
To apply this to your table, use CROSS APPLY (Single Delimiter):
SELECT mt.Name, x.Name AS ProductCode
FROM MyTable mt
CROSS APPLY dbo.splitstring(mt.Name, '_') x
WHERE ISNUMERIC(x.Name) = 1 AND LEN(x.Name) = 3
Update, Multiple Delimiters
I guess the real underlying problem is that ultimately the product codes need to be normalized out of the composite key (e.g. add a distinct ProductId or ProductCode column to the same table), derived using a query like this, and then stored back in the table via an update. Reverse engineering the product codes out of the string appears to be a trial and error process.
Nonetheless, you can continue to keep passing the split strings through further splitting functions (one per each type of delimiter), before applying your final discriminating filter:
SELECT *
FROM MyTable mt
CROSS APPLY dbo.splitstring(mt.Name, 'test') y -- First alias
CROSS APPLY dbo.splitstring(y.Name, '_') x -- Reference the preceding alias
WHERE ISNUMERIC(x.Name) = 1 AND LEN(x.Name) = 3; -- Must reference the last alias (x)
Note that the stringsplit function has again been changed to accommodate multicharacter delimiters.
If you have a table (or can generate in inline view) of the product codes, you can join the list of long strings to the product codes with a like clause.
Create Table longcodes (
longcode varchar(20)
)
Create Table products (
prodCode char(3)
)
insert products values('100')
insert products values('111')
insert products values('123')
insert longcodes values ('abc_a_100_test')
insert longcodes values ('asdf_111_bob')
insert longcodes values ('in_0314_123_95pf')
insert longcodes values ('f_100_u')
insert longcodes values ('hihi_111_bye')
insert longcodes values ('in_123_0314_95pf')
insert longcodes values ('a_b__c_d_100_efg')
select *
from products p
join longcodes l on l.longcode like '%_' + p.prodCode + '_%'
And they get aligned with the product codes like this:
prodCode longcode
100 abc_a_100_test
100 f_100_u
100 a_b__c_d_100_efg
111 asdf_111_bob
111 hihi_111_bye
123 in_0314_123_95pf
123 in_123_0314_95pf
EDIT: Seeing the developments in the other answer, you can simplify the like clause to
like p.prodCode
and just deal with the fact that you have a much greater chance of a single composite string producing multiple matches.

How to run SQL Server stored procedure query for each value of a CSV

I am having following stored procedure in my SQL Server 2008 R2 database
ALTER PROCEDURE [dbo].[usp_send_email]
#pStatus Int Out,
#pEMailId Int Out,
#pSenderUserName varchar(MAX),
#pReceivers VarChar(50), **-- this column can have csv values**
#pSub NVarChar(100),
#pCon NVarchar(MAX),
#pHasAttachments Bit
AS
BEGIN
--SET NOCOUNT ON;
Insert Into MessagingMessage
(
CreatedBy,
[Subject],
Body,
HasAttachments
)
Values
(
#pSenderUserName,
#pSub,
#pCon,
#pHasAttachments
)
SET #pEMailId = SCOPE_IDENTITY()
Insert Into MessagingMessageReceipient
(
MessageId,
ReceipientId,
ReceipientType
)
Values
(
#pEMailId,
#pReceivers,
1
)
SET #pStatus = SCOPE_IDENTITY()
END
In above code I want to run the first statement only one time but second insert statements in loop for each comma separated username.CSV value coming as a parameter is already validated by C# code so no need to validate it.
Using this link: how to split and insert CSV data into a new table in single statement?, there is a function you can use to parse csv to to table and I have used it in the code below. Try the following
Insert Into MessagingMessageReceipient
(
MessageId,
ReceipientId,
ReceipientType
)
SELECT
#pEMailId,
csv.Part, -- each #pReceiver
1
FROM
dbo.inline_split_me(',',#pReceivers) csv
And the function is copied below
CREATE FUNCTION inline_split_me (#SplitOn char(1),#String varchar(7998))
RETURNS TABLE AS
RETURN (WITH SplitSting AS
(SELECT
LEFT(#String,CHARINDEX(#SplitOn,#String)-1) AS Part
,RIGHT(#String,LEN(#String)-CHARINDEX(#SplitOn,#String)) AS Remainder
WHERE #String IS NOT NULL AND CHARINDEX(#SplitOn,#String)>0
UNION ALL
SELECT
LEFT(Remainder,CHARINDEX(#SplitOn,Remainder)-1)
,RIGHT(Remainder,LEN(Remainder)-CHARINDEX(#SplitOn,Remainder))
FROM SplitSting
WHERE Remainder IS NOT NULL AND CHARINDEX(#SplitOn,Remainder)>0
UNION ALL
SELECT
Remainder,null
FROM SplitSting
WHERE Remainder IS NOT NULL AND CHARINDEX(#SplitOn,Remainder)=0
)
SELECT Part FROM SplitSting
)