I have a variable #a='1,2,3,4' and a table that contain a column B that contain comma separated values.
How can I check that column B values contain any of the #a variable values?
You need to implement a function for splitting the values. There are a lot of variations, you can use this:
CREATE FUNCTION [dbo].[fn_Analysis_ConvertCsvListToNVarCharTableWithOrder](#List nvarchar(max), #Delimiter nvarchar(10) = ',')
RETURNS #result TABLE
(
[Value] nvarchar(max),
[SortOrder] bigint NOT NULL
)
AS
BEGIN
IF #Delimiter is null
BEGIN
SET #Delimiter = ','
END
DECLARE #XML xml = N'<r><![CDATA[' + REPLACE(#List, #Delimiter, ']]></r><r><![CDATA[') + ']]></r>'
DECLARE #BufTable TABLE (Value nvarchar(max), SortOrder bigint NOT NULL IDENTITY(1, 1) PRIMARY KEY)
INSERT INTO #BufTable (Value)
SELECT Tbl.Col.value('.', 'nvarchar(max)')
FROM #xml.nodes('//r') Tbl(Col)
OPTION (OPTIMIZE FOR (#xml = NULL))
INSERT INTO #result (Value, SortOrder)
SELECT Value, SortOrder
FROM #BufTable
RETURN
END
Having such function, its pretty easy:
DECLARE #DataSource TABLE
(
[column] VARCHAR(1024)
);
DECLARE #column VARCHAR(1024) = '1,2,3,4';
INSERT INTO #DataSource ([column])
VALUES ('100,200,300')
,('100,1,500')
,('1,2,3,500')
,('200')
,('33,32,31,4,30');
SELECT DISTINCT [column]
FROM #DataSource
CROSS APPLY [dbo].[fn_Analysis_ConvertCsvListToNVarCharTableWithOrder] ([column], ',') DSV
INNER JOIN [dbo].[fn_Analysis_ConvertCsvListToNVarCharTableWithOrder] (#column, ',') FV
ON DSV.[Value] = FV.[Value];
Using CROSS APPLY we are splitting the values for each column. Then we are splitting the filtering values and performing INNER JOIN in order to match only the rows having a value contained in the filter value. After that, we need a DISTINCT because column value may contains many values from the filter.
A t-sql string "splitter" is what you need but I would NOT use the mTVF recommended above as it is extremely inefficient and will kill parallelism. An inline table valued function (iTVF) is what you want for splitting strings.
I would suggest using delimitedSplit8k or delimitedSplit8k_lead which will perform ~30-90 times faster; or STRING_SPLIT if you're on SQL 2016+ and only need the value which will be several hundred times faster. Note this performance test:
-- sample data
declare #rows int = 10000;
if object_id('tempdb..#strings') is not null drop table #strings;
select top (#rows)
someid = identity(int,1,1),
somestring = replace(right(left(cast(newid() as varchar(36)), 27),21),'-',',')
into #strings
from sys.all_columns a, sys.all_columns b;
-- Performance test
set nocount on;
print 'fn_Analysis_ConvertCsvListToNVarCharTableWithOrder'+char(10)+replicate('-',50);
go
declare #st datetime = getdate(), #item varchar(10);
select #item = [value]
from #strings t
cross apply dbo.fn_Analysis_ConvertCsvListToNVarCharTableWithOrder(t.somestring,',');
print datediff(ms,#st,getdate());
go 5
print 'delimitedSplit8K (serial)'+char(10)+replicate('-',50);
go
declare #st datetime = getdate(), #item varchar(10);
select #item = item
from #strings t
cross apply dbo.DelimitedSplit8K(t.somestring,',')
option (maxdop 1);
print datediff(ms,#st,getdate());
go 5
print 'delimitedSplit8K (parallel)'+char(10)+replicate('-',50);
go
declare #st datetime = getdate(), #item varchar(10);
select #item = item
from #strings t
cross apply dbo.DelimitedSplit8K(t.somestring,',')
option (recompile, querytraceon 8649);
print datediff(ms,#st,getdate());
go 5
Results
fn_Analysis_ConvertCsvListToNVarCharTableWithOrder
--------------------------------------------------
Beginning execution loop
4183
4274
4536
4294
4406
Batch execution completed 5 times.
delimitedSplit8K (serial)
--------------------------------------------------
Beginning execution loop
50
50
50
54
53
Batch execution completed 5 times.
delimitedSplit8K (parallel)
--------------------------------------------------
Beginning execution loop
133
134
133
140
136
Batch execution completed 5 times.
How you could use to solve your problem
declare #sometable table(someid int identity, someNbr tinyint);
insert #sometable values (1),(3),(6),(12),(7),(15),(19);
declare #searchstring varchar(1000) = '1,2,3,4,19';
select someid, someNbr
from #sometable t
cross apply dbo.DelimitedSplit8K(#searchstring,',') s
where t.someNbr = s.Item;
Results
someid someNbr
----------- -------
1 1
2 3
7 19
Related
How do I create a function in SQL Server 2017 that identifies when a string contains duplicate consecutive letters (a-z) and replaces those duplicate letters with a single instance of that letter?
Here are some examples of what should happen:
CompanyAAABCD -> CompanyABCD
CommpanyABYTTT -> CompanyABYT
Company11111 -> Company11111
alter function fn_RemoveDuplicateChar(#name varchar(200))
RETURNS VARCHAR(200)
as
begin
declare #strPosition int=1;
declare #strlen int=0;
declare #finalstr varchar(200)='';
declare #str varchar(200)='';
declare #fstr varchar(200)='';
select #strlen = (select len(#name))
while #strPosition<=#strlen
begin
select #fstr = SUBSTRING(#name, #strPosition, 1)
select #str = SUBSTRING(#finalstr, len(#finalstr), 1)
If #fstr <> #str or ( ISNUMERIC(#fstr)=1 and ISNUMERIC(#str)=1)
set #finalstr = #finalstr + #fstr
set #strPosition =#strPosition+1
end
return (select #finalstr)
end
go
select dbo.fn_RemoveDuplicateChar('CompanyAAABCD')
select dbo.fn_RemoveDuplicateChar('CommpanyABYTTT')
select dbo.fn_RemoveDuplicateChar('Company11111')
If you just wanted a single round of replacement (i.e. aaabbbb becomes aabb) then you could use this:
CREATE OR ALTER FUNCTION dbo.RemoveDuplicates (#value varchar(200))
RETURNS VARCHAR(200)
WITH SCHEMABINDING
AS
BEGIN
DECLARE #result varchar(200) = #value;
DECLARE #i int = 65;
-- a-z is ASCII 65-90
WHILE #i < 90
BEGIN
SET #result = REPLACE(#result, CHAR(#i) + CHAR(#i), CHAR(#i));
SET #i += 1
END;
RETURN #result;
END;
GO
But it seems you need a recursive replacement, so that every character that has the same before it is removed.
So we can use this version, which is similar to the other answer.
CREATE OR ALTER FUNCTION dbo.RemoveDuplicates (#value varchar(200))
RETURNS varchar(200)
WITH SCHEMABINDING
AS
BEGIN
DECLARE #c char(1);
DECLARE #cLast char(1) = LEFT(#value, 1);
DECLARE #result varchar(200) = #cLast;
DECLARE #strlen int = LEN(#value);
DECLARE #i int = 2;
WHILE (#i < #strlen)
BEGIN
SET #c = SUBSTRING(#value, #i, 1);
IF (#c <> #cLast)
SET #result += #c;
SET #i += 1
END;
RETURN #result;
END;
GO
I rewrote this as an inline Table-Valued Function, and found it significantly faster. Here are two versions of that, depending whether you can use STRING_AGG
CREATE OR ALTER FUNCTION dbo.RemoveDuplicatesXML (#value varchar(200))
RETURNS TABLE
WITH SCHEMABINDING
AS RETURN
(
WITH L1 AS (SELECT n FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) v(n)),
L2 AS (SELECT 1 n FROM L1 A CROSS JOIN L1 B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn FROM L2),
Chars AS (SELECT TOP(LEN(#value)) rn FROM Nums)
SELECT (
SELECT SUBSTRING(#value, rn, 1)
FROM Chars
WHERE rn = 1 OR SUBSTRING(#value, rn - 1, 1) <> SUBSTRING(#value, rn, 1)
ORDER BY rn
FOR XML PATH(''), TYPE
).value('text()[1]','nvarchar(max)') Result
);
GO
CREATE OR ALTER FUNCTION dbo.RemoveDuplicatesAGG (#value varchar(200))
RETURNS TABLE
WITH SCHEMABINDING
AS RETURN
(
WITH L1 AS (SELECT n FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) v(n)),
L2 AS (SELECT 1 n FROM L1 A CROSS JOIN L1 B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn FROM L2),
Chars AS (SELECT TOP(LEN(#value)) rn FROM Nums)
SELECT STRING_AGG(SUBSTRING(#value, rn, 1), '') WITHIN GROUP (ORDER BY rn) Result
FROM Chars
WHERE rn = 1 OR SUBSTRING(#value, rn - 1, 1) <> SUBSTRING(#value, rn, 1)
);
GO
This utilizes Itzik Ben-Gan's famous inline tally-table method to break out the string into single characters. You will need another CROSS JOIN or more (1) if you have more than 256 characters.
You have two methods to use this, the performance should be identical
Either as a scalar subquery
SELECT (SELECT * FROM RemoveDuplicatesAGG(t.MyString) Result
FROM myTable t
Or as an APPLY
SELECT d.Result
FROM myTable t
CROSS APPLY RemoveDuplicatesAGG(t.MyString) d
I know I'm a little late here but if performance is important then you can use the fastest "de-duplicator" in the game (the function, removeDupesExcept8K, is at the end of this post.) It takes an input string and a pattern representing what you want deduplicated; in the example below I'm saying "deduplicate anything that's not between A to Z.
DECLARE #string VARCHAR(8000) = 'AAABBBCCC999';
SELECT rd.NewString FROM samd.removeDupesExcept8K(#string, '[^A-Z]') AS rd;
Returns: ABC999
Let's compare fn_RemoveDuplicateChar from B.Muthamizhselvi above to the one at the end of the post.
Performance test:
--==== Test Data
SELECT TOP(10000)
ID = IDENTITY(INT,1,1),
String = REPLACE(REPLACE(REPLACE(NEWID(),'A',0),'B',0),'-','AAA')
INTO #strings
FROM sys.all_columns, sys.all_columns b;
GO
--==== Performance Test
PRINT CHAR(13)+'dbo.fn_RemoveDuplicateChar'+CHAR(13)+REPLICATE('-',90);
GO
DECLARE #st DATETIME = GETDATE(), #x VARCHAR(100);
SELECT #x = dbo.fn_RemoveDuplicateChar(s.String)
FROM #strings AS s
PRINT DATEDIFF(MS,#st,GETDATE());
GO 3
PRINT CHAR(13)+'samd.removeDupChar8K - Serial'+CHAR(13)+REPLICATE('-',90);
GO
DECLARE #st DATETIME = GETDATE(), #x VARCHAR(100);
SELECT #x = rd.NewString
FROM #strings AS s
CROSS APPLY samd.removeDupesExcept8K(s.String,'[^A-Z]') AS rd
OPTION (MAXDOP 1);
PRINT DATEDIFF(MS,#st,GETDATE());
GO 3
PRINT CHAR(13)+'samd.removeDupChar8K - Parallel'+CHAR(13)+REPLICATE('-',90);
GO
DECLARE #st DATETIME = GETDATE(), #x VARCHAR(100);
SELECT #x = rd.NewString
FROM #strings AS s
CROSS APPLY samd.removeDupesExcept8K(s.String,'[^A-Z]') AS rd
OPTION (QUERYTRACEON 8649);
PRINT DATEDIFF(MS,#st,GETDATE());
GO 3
As you'll see below, removeDupesExcept8K is twice as fast with a serial execution plan (one CPU) and more than 10X faster with a parallel plan. No need to test fn_RemoveDuplicateChar with a parallel plan, scalar UDFs can't go parallel unless inlined.
Test Results:
dbo.fn_RemoveDuplicateChar
------------------------------------------------------------------------------------------
Beginning execution loop
1110
1106
1093
Batch execution completed 3 times.
samd.removeDupChar8K - Serial
------------------------------------------------------------------------------------------
Beginning execution loop
563
560
593
Batch execution completed 3 times.
samd.removeDupChar8K - Parallel
------------------------------------------------------------------------------------------
Beginning execution loop
91
91
93
Batch execution completed 3 times.
The Function
IF OBJECT_ID('samd.removeDupesExcept8K') IS NOT NULL DROP FUNCTION samd.removeDupesExcept8K;
GO
CREATE FUNCTION samd.removeDupesExcept8K(#string varchar(8000), #preserved varchar(50))
/*****************************************************************************************
[Purpose]:
A purely set-based inline table valued function (iTVF) that accepts and input strings
(#string) and a pattern (#preserved) and removes all duplicate characters in #string that
do not match the #preserved pattern.
[Author]:
Alan Burstein
[Compatibility]:
SQL Server 2008+
[Syntax]:
--===== Autonomous use
SELECT rd.newString
FROM samd.removeDupesExcept8K(#string, #preserved) AS rd;
--===== Use against a table
SELECT st.SomeColumn1, rd.newString
FROM SomeTable AS st
CROSS
APPLY samd.removeDupesExcept8K(st.SomeColumn1, #preserved) AS rd;
Parameters:
#string = varchar(8000); Input string to be "cleaned"
#preserved = varchar(50); the pattern to preserve. For example, when #preserved='[0-9]'
only non-numeric characters will be removed
[Return Types]:
Inline Table Valued Function returns:
newString = varchar(8000); the string with duplicate characters removed
[Developer Notes]:
1. Requires NGrams8K. The code for NGrams8K can be found here:
http://www.sqlservercentral.com/articles/Tally+Table/142316/
2. This function is what is referred to as an "inline" scalar UDF." Technically it's an
inline table valued function (iTVF) but performs the same task as a scalar valued user
defined function (UDF); the difference is that it requires the APPLY table operator
to accept column values as a parameter. For more about "inline" scalar UDFs see this
article by SQL MVP Jeff Moden: http://www.sqlservercentral.com/articles/T-SQL/91724/
and for more about how to use APPLY see the this article by SQL MVP Paul White:
http://www.sqlservercentral.com/articles/APPLY/69953/.
Note the above syntax example and usage examples below to better understand how to
use the function. Although the function is slightly more complicated to use than a
scalar UDF it will yield notably better performance for many reasons. For example,
unlike a scalar UDFs or multi-line table valued functions, the inline scalar UDF does
not restrict the query optimizer's ability generate a parallel query execution plan.
3. removeDupesExcept8K is deterministic; for more about deterministic and nondeterministic
functions see https://msdn.microsoft.com/en-us/library/ms178091.aspx
[Examples]:
--===== 1. Examples...
DECLARE #string varchar(8000) = '!!!aa###bb!!!';
BEGIN
--===== 1.1. Remove all duplicate characters
SELECT f.newString
FROM samd.removeDupesExcept8K(#string,'') f; -- Returns: !a#b!
--===== 1.2. Remove all non-alphabetical duplicates
SELECT f.newString
FROM samd.removeDupesExcept8K(#string,'[a-z]') f; -- Returns: !aa#bb!
--===== 1.3. Remove only alphabetical duplicates
SELECT f.newString
FROM samd.removeDupesExcept8K(#string,'[^a-z]') f; -- Returns: !!!a###b!!!
END
---------------------------------------------------------------------------------------
[Revision History]:
Rev 00 - 20160720 - Initial Creation - Alan Burstein
****************************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS RETURN
SELECT newString =
(
SELECT ng.token+''
FROM samd.NGrams8K(#string,1) AS ng
WHERE ng.token <> SUBSTRING(#string, ng.position+1,1) -- exclude chars = the next char
OR ng.token LIKE #preserved -- preserve characters that match the #preserved pattern
ORDER BY ng.position
FOR XML PATH(''),TYPE
).value('(text())[1]','varchar(8000)'); -- using Wayne Sheffield’s concatenation logic
I have a variable I want split it into two different column on the basis of ~ tild and after split I want Again split with , Comma
Like Below
declare #Remarks varchar(100) = 'Product1~2,Product2~1'
I have split function After using Split function
select value from fn_split(#Remarks,',')
My result is
value
Product1~2
Product2~1
But I want result Like
value Qty
Product1 2
Product2 1
Disclaimer: You can use the fn_split function, I am just not using it because of my version of SQL.
I know of no way to split into separate columns or than manually, so you can use a couple substring functions to accomplish what you are trying to do.
#Remarks varchar(100) = 'Product1~2,Product2~1', #Delimiter VARCHAR(1) = ','
DECLARE #Products TABLE(Product VARCHAR(MAX))
;WITH Split_CTE (startPostion, endPosition)
AS (
SELECT CAST(0 AS INT) AS startPostion
,CHARINDEX(#Delimiter, #Remarks) AS endPosition
UNION ALL
SELECT endPosition + 1
,CHARINDEX(#Delimiter, #Remarks, endPosition + 1)
FROM Split_CTE
WHERE endPosition > 0
)
INSERT INTO #Products
SELECT SUBSTRING(#Remarks,startPostion, COALESCE(NULLIF(endPosition,0),LEN(#Remarks) + 1) - startPostion) AS [Data]
FROM Split_CTE
SELECT SUBSTRING([Product], CHARINDEX('~', [Product]) + 1, LEN([Product])) AS Id
,SUBSTRING([Product], 0, CHARINDEX('~', [Product])) AS Product
FROM #Products
There's also a way to do this using XML that you might find interesting:
DECLARE #Remarks varchar(100) = 'Product1~2,Product2~1'
-- set up some variables for customizing the delimiters and parsing into XML
DECLARE #xml as xml
,#str as varchar(100)
,#str2 as varchar(100)
,#delimiter as varchar(10)
,#delimiter2 as varchar(10)
-- initialize using the values you provided
SET #delimiter ='~'
SET #delimiter2 =','
SET #str = #Remarks
-- convert your string to XML
SET #str2 = ('<val>'+replace(#str,#delimiter ,'</val><qty>')+'</qty>')
SET #xml = cast(('<rec>'+replace(#str2,#delimiter2 ,'</qty></rec><rec><val>')+'</rec>') as xml)
-- SQL using XQuery
SELECT
ref.value('val[1]', 'varchar(10)') AS value,
ref.value('qty[1]', 'varchar(10)') AS quantity
FROM #xml.nodes('/rec')
xmlData( ref )
And the result:
value quantity
---------- ----------
Product1 2
Product2 1
(2 row(s) affected)
I need multi-valued columns divided into single values
SOS_ID ALLOCATED_PART_NBR ALLOCATED_SALES_ITM ALLOCATED_QTY
523 500~5008~038~5008 2302~~007~5û005 1~1~~~1~2
Note: if no values between ~ delimiter it should insert empty string.
I want the output like this:
SOS_ID ALLOCATED_PART_NBR ALLOCATED_SALES_ITM ALLOCATED_QTY
523 500 2302 1
523 5008 '' 1
523 038 007 ''
523 5008 5û005 ''
523 ''/NULL ''/NULL 1
523 ''/NULL ''/NULL 2
So... here's a method I got to work for what you wanted. First, you need a table-valued function that will split a string into fields based on a delimiter, and which will pad out the number of rows returned to a specified length:
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[SplitString]') AND type IN (N'FN', N'IF', N'TF', N'FS', N'FT'))
DROP FUNCTION [dbo].[SplitString]
GO
SET ANSI_NULLS ON
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[SplitString] (
#delimitedString nvarchar(4000),
#delimiter nvarchar(100),
#padRows int
)
/**************************************************************************
DESCRIPTION:
Accepts a delimited string and splits it at the specified
delimiter points. Returns the individual items as a table data
type with the ElementID field as the array index and the Element
field as the data
PARAMETERS:
#delimitedString - The string to be split
#delimiter - String containing the delimiter where
delimited string should be split
#padRows - Any rows less than this value will be padded
with empty rows (NULL means no padding)
RETURNS:
Table data type containing array of strings that were split with
the delimiters removed from the source string
USAGE:
SELECT ElementID, Element
FROM asi_SplitString('11111,22222,3333', ',', NULL)
ORDER BY ElementID
***************************************************************************/
RETURNS #tblArray TABLE
(
ElementID int IDENTITY(1,1),
Element nvarchar(1000)
)
AS
BEGIN
DECLARE #index int
DECLARE #siStart int
DECLARE #siDelSize int
DECLARE #count int
SET #count = 1;
SET #siDelSize = LEN(#delimiter);
--loop through source string and add elements to destination table array
WHILE LEN(#delimitedString) > 0
BEGIN
SET #index = CHARINDEX(#delimiter, #delimitedString);
IF #index = 0
BEGIN
INSERT INTO #tblArray VALUES (#delimitedString);
BREAK;
END
ELSE
BEGIN
INSERT INTO #tblArray VALUES(SUBSTRING(#delimitedString, 1,#index - 1));
SET #siStart = #index + #siDelSize;
SET #delimitedString = SUBSTRING(#delimitedString, #siStart , LEN(#delimitedString) - #siStart + 1);
END
SET #count += 1;
END
IF (#padRows IS NOT NULL)
WHILE (#count < #padRows)
BEGIN
SET #count += 1;
INSERT INTO #tblArray VALUES ('');
END
RETURN;
END
GO
Now you need a sample table with data to test this with (based on your question):
CREATE TABLE TestTable (SOS_ID nvarchar(10),
ALLOCATED_PART_NBR nvarchar(400),
ALLOCATED_SALES_ITM nvarchar(400),
ALLOCATED_QTY nvarchar(400))
INSERT INTO TestTable (SOS_ID, ALLOCATED_PART_NBR, ALLOCATED_SALES_ITM, ALLOCATED_QTY)
VALUES ('523', '500~5008~038~5008', '2302~~007~5û005', '1~1~~~1~2')
Now, some code that will transform the data above into the result you wanted:
DECLARE #fieldCount int;
WITH TildeCounts AS (
SELECT LEN(ALLOCATED_PART_NBR) - LEN(REPLACE(ALLOCATED_PART_NBR, '~', '')) AS TildeCount
FROM TestTable t
UNION ALL
SELECT LEN( ALLOCATED_SALES_ITM) - LEN(REPLACE( ALLOCATED_SALES_ITM, '~', '')) AS TildeCount
FROM TestTable t
UNION ALL
SELECT LEN(ALLOCATED_QTY) - LEN(REPLACE(ALLOCATED_QTY, '~', '')) AS TildeCount
FROM TestTable t
) SELECT #fieldCount = MAX(TildeCount) + 1 FROM TildeCounts;
SELECT t.SOS_ID, a.Element AS [ALLOCATED_PART_NBR], b.Element AS [ALLOCATED_SALES_ITM], c.Element AS [ALLOCATED_QTY]
FROM TestTable t
CROSS APPLY dbo.SplitString(ALLOCATED_PART_NBR, '~', #fieldCount) a
CROSS APPLY dbo.SplitString(ALLOCATED_SALES_ITM, '~', #fieldCount) b
CROSS APPLY dbo.SplitString(ALLOCATED_QTY, '~', #fieldCount) c
WHERE a.ElementID = b.ElementID AND b.ElementID = c.ElementID
What this does is it first gets the maximum number of fields in all the strings (so it can pad out the ones that are shorter). It then selects from the table, CROSS APPYING the function to each column, filtering only for the rows where all the IDs match (line up).
Convert the strings to xml, then select the nth node from each one.
SQL Fiddle Demo
DECLARE #max_field_count int = 6;
SELECT
SOS_ID
,ALLOCATED_PART_NBR = CAST(N'<a>'+REPLACE(ALLOCATED_PART_NBR ,'~','</a><a>')+'</a>' AS XML).query('(a)[sql:column("i")]').value('.','varchar(max)')
,ALLOCATED_SALES_ITM = CAST(N'<a>'+REPLACE(ALLOCATED_SALES_ITM,'~','</a><a>')+'</a>' AS XML).query('(a)[sql:column("i")]').value('.','varchar(max)')
,ALLOCATED_QTY = CAST(N'<a>'+REPLACE(ALLOCATED_QTY ,'~','</a><a>')+'</a>' AS XML).query('(a)[sql:column("i")]').value('.','varchar(max)')
FROM MyTable
CROSS JOIN (SELECT TOP (#max_field_count) ROW_NUMBER() OVER(ORDER BY (SELECT 1)) FROM master.dbo.spt_values) n(i)
Have a Table with the CSV Values in the columns as below
ID Name text
1 SID,DOB 123,12/01/1990
2 City,State,Zip NewYork,NewYork,01234
3 SID,DOB 456,12/21/1990
What is need to get is 2 tables in this scenario as out put with the corresponding values
ID SID DOB
1 123 12/01/1990
3 456 12/21/1990
ID City State Zip
2 NewYork NewYork 01234
Is there any way of achieving it using a Cursor or any other method in SQL server?
There are several ways that this can be done. One way that I would suggest would be to split the data from the comma separated list into multiple rows.
Since you are using SQL Server, you could implement a recursive CTE to split the data, then apply a PIVOT function to create the columns that you want.
;with cte (id, NameItem, Name, textItem, text) as
(
select id,
cast(left(Name, charindex(',',Name+',')-1) as varchar(50)) NameItem,
stuff(Name, 1, charindex(',',Name+','), '') Name,
cast(left(text, charindex(',',text+',')-1) as varchar(50)) textItem,
stuff(text, 1, charindex(',',text+','), '') text
from yt
union all
select id,
cast(left(Name, charindex(',',Name+',')-1) as varchar(50)) NameItem,
stuff(Name, 1, charindex(',',Name+','), '') Name,
cast(left(text, charindex(',',text+',')-1) as varchar(50)) textItem,
stuff(text, 1, charindex(',',text+','), '') text
from cte
where Name > ''
and text > ''
)
select id, SID, DOB
into table1
from
(
select id, nameitem, textitem
from cte
where nameitem in ('SID', 'DOB')
) d
pivot
(
max(textitem)
for nameitem in (SID, DOB)
) piv;
See SQL Fiddle with Demo. The recursive version will work great but if you have a large dataset, you could have some performance issues so you could also use a user defined function to split the data:
create FUNCTION [dbo].[Split](#String1 varchar(MAX), #String2 varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (colName varchar(MAX), colValue varchar(max))
as
begin
declare #idx1 int
declare #slice1 varchar(8000)
declare #idx2 int
declare #slice2 varchar(8000)
select #idx1 = 1
if len(#String1)<1 or #String1 is null return
while #idx1 != 0
begin
set #idx1 = charindex(#Delimiter,#String1)
set #idx2 = charindex(#Delimiter,#String2)
if #idx1 !=0
begin
set #slice1 = left(#String1,#idx1 - 1)
set #slice2 = left(#String2,#idx2 - 1)
end
else
begin
set #slice1 = #String1
set #slice2 = #String2
end
if(len(#slice1)>0)
insert into #temptable(colName, colValue) values(#slice1, #slice2)
set #String1 = right(#String1,len(#String1) - #idx1)
set #String2 = right(#String2,len(#String2) - #idx2)
if len(#String1) = 0 break
end
return
end;
Then you can use a CROSS APPLY to get the result for each row:
select id, SID, DOB
into table1
from
(
select t.id,
c.colname,
c.colvalue
from yt t
cross apply dbo.split(t.name, t.text, ',') c
where c.colname in ('SID', 'DOB')
) src
pivot
(
max(colvalue)
for colname in (SID, DOB)
) piv;
See SQL Fiddle with Demo
You'd need to approach this as a multi-step ETL project. I'd probably start with exporting the two types of rows into a couple staging tables. So, for example:
select * from yourtable /* rows that start with a number */
where substring(text,1,1) in
('0','1','2','3','4','5','6','7','8','9')
select * from yourtable /* rows that don't start with a number */
where substring(text,1,1)
not in ('0','1','2','3','4','5','6','7','8','9')
/* or simply this to follow your example explicitly */
select * from yourtable where name like 'sid%'
select * from yourtable where name like 'city%'
Once you get the two types separated then you can split them out with one of the already written split functions found readily out on the interweb.
Aaron Bertrand (who is on here often) has written up a great post on the variety of ways to split comma delimted strings using SQL. Each of the methods are compared and contrasted here.
http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
If your row count is minimal (under 50k let's say) and it's going to be a one time operation than pick the easiest way and don't worry too much about all the performance numbers.
If you have a ton of rows or this is an ETL process that will run all the time then you'll really want to pay attention to that stuff.
A simple solution using cursors to build temporary tables. This has the limitation of making all columns VARCHAR and would be slow for large amounts of data.
--** Set up example data
DECLARE #Source TABLE (ID INT, Name VARCHAR(50), [text] VARCHAR(200));
INSERT INTO #Source
(ID, Name, [text])
VALUES (1, 'SID,DOB', '123,12/01/1990')
, (2, 'City,State,Zip', 'NewYork,NewYork,01234')
, (3, 'SID,DOB', '456,12/21/1990');
--** Declare variables
DECLARE #Name VARCHAR(200) = '';
DECLARE #Text VARCHAR(1000) = '';
DECLARE #SQL VARCHAR(MAX);
--** Set up cursor for the tables
DECLARE cursor_table CURSOR FAST_FORWARD READ_ONLY FOR
SELECT s.Name
FROM #Source AS s
GROUP BY Name;
OPEN cursor_table
FETCH NEXT FROM cursor_table INTO #Name;
WHILE ##FETCH_STATUS = 0
BEGIN
--** Dynamically create a temp table with the specified columns
SET #SQL = 'CREATE TABLE ##Table (' + REPLACE(#Name, ',', ' VARCHAR(50),') + ' VARCHAR(50));';
EXEC(#SQL);
--** Set up cursor to insert the rows
DECLARE row_cursor CURSOR FAST_FORWARD READ_ONLY FOR
SELECT s.Text
FROM #Source AS s
WHERE Name = #Name;
OPEN row_cursor;
FETCH NEXT FROM row_cursor INTO #Text;
WHILE ##FETCH_STATUS = 0
BEGIN
--** Dynamically insert the row
SELECT #SQL = 'INSERT INTO ##Table VALUES (''' + REPLACE(#Text, ',', ''',''') + ''');';
EXEC(#SQL);
FETCH NEXT FROM row_cursor INTO #Text;
END
--** Display the table
SELECT *
FROM ##Table;
--** Housekeeping
CLOSE row_cursor;
DEALLOCATE row_cursor;
DROP TABLE ##Table;
FETCH NEXT FROM cursor_table INTO #Name;
END
CLOSE cursor_table;
DEALLOCATE cursor_table;
I am trying to compare a database field which stores list items (comma separated) with unfortunately a variable which is also a list item.
Example:
In this case, a user can belong to multiple groups, and content access is also allocated to multiple groups.
contentid | group
(1) (c,d)
(2) (a,c)
(3) (b)
So, I need to select all content where user is in group (a,c). In this case, contentid 1,2 should be returned.
Here's a safe but slow solution for SQL 2008
BEGIN
-- setup
DECLARE #tbl TABLE (
[contentid] INT
,[group] VARCHAR(MAX)
)
INSERT INTO #tbl VALUES
(1, 'c,d')
,(2, 'a,c')
,(3, 'd')
-- send your request as simple xml
DECLARE #param XML
SET #param = '<g>a</g><g>c</g>'
-- query
SELECT DISTINCT contentid
FROM #tbl t
INNER JOIN #param.nodes('/g') AS t2(g)
ON ',' + t.[group] + ',' LIKE '%,' + t2.g.value('.', 'varchar(max)') + ',%'
END
You just pass your query in as an XML snippet instead of a comma separated list.
If your group names are single characters or you can be sure the names are not character-subsets of each other (ie: GroupA, GroupAB), then the query can be optimized to.
ON t.[group] LIKE '%' + t2.g.value('.', 'varchar(max)') + '%'
If you're using a RDBMS without XML parsing capability you'll have to use string split your query into a temp table and work it that way.
You really should not be using comma separated values inside your columns. It would be much better if the [group] column only contained one value and you had repeated entries with a UNIQUE constraint on the composite (contentid, group).
You might find this question and answer useful : How do I split a string so I can access item x?
Or you could always use something like this :
create function SplitString(
#string varchar(max),
#delimiter char(1)
)
returns #items table (item varchar(max))
as
begin
declare #index int set #index = 0
if (#delimiter is null) set #delimiter = ','
declare #prevdelimiter int set #prevdelimiter = 0
while (#index < len(#string)) begin
if (substring(#string, #index, 1) = #delimiter) begin
insert into #items
select substring(#string, #prevdelimiter, #index-#prevdelimiter)
set #prevdelimiter = #index + 1
end
set #index = #index + 1
end
--last item (or only if there were no delimiters)
insert into #items
select substring(#string, #prevdelimiter, #index - #prevdelimiter + 1)
return
end
go
declare #content table(contentid int, [group] varchar(max))
insert into #content
select 1, 'c,d'
union
select 2, 'a,c'
union
select 3, 'b'
declare #groups varchar(max) set #groups = 'a,c'
declare #grouptable table(item varchar(max))
insert into #grouptable
select * from dbo.SplitString(#groups, ',')
select * From #content
where (select count(*) from #grouptable g1 join dbo.SplitString([group], ',') g2 on g1.item = g2.item) > 0