Table from comma separated Column at two level - sql

I have one old db in which there are two columns which contain comma separated values like this,
SQL FIDDLE LINK for SCHEMA
Now my problem is that I am trying to import those values into another database which is normalized. So instead of comma separated values, I need to convert those values into a tabular format .
So my output should be look like this,

You need to define what those columns mean. In your example are you discarding the original ID column, in which case what does "1,2,3" & "A,B" actually mean?
I'd probably approach this by cursoring through each row and using a split function to convert each field to a table of values.
create FUNCTION dbo.fn_Split1 (#sep nchar(1), #s nvarchar(4000))
RETURNS table
/**************************************************************************************************
* Author: http://stackoverflow.com/questions/314824/
* Description: splits a string into a table of values, with single-char delimiter.
* Example Usage:
select * from dbo.fn_split1(',', '1,2,5,2,,dggsfdsg,456,df,1,2,5,2,,dggsfdsg,456,df,1,2,5,2,,')
**************************************************************************************************/
AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT pn,
SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 4000 END) AS s
FROM Pieces
)

CREATE TABLE #RegionDetail
(
Id int identity(1,1) not null,
RegionId nvarchar(50),
Zone nvarchar(50)
)
INSERT INTO #RegionDetail (RegionId,Zone) values ('1,2,3','A,B')
INSERT INTO #RegionDetail (RegionId,Zone) values ('1,2,3','X,Y')
INSERT INTO #RegionDetail (RegionId,Zone) values ('4,3,5','A,B')
GO
create FUNCTION [dbo].[Split](#String varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (items varchar(MAX))
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Items) values(#slice)
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
return
end;
GO
SELECT Id,RegionId,Zone FROM #RegionDetail
select
r.Id,f.items as RegionId,z.items as Zone
from
#RegionDetail r
cross apply [dbo].[Split](r.RegionId,',') f
cross apply [dbo].[Split](r.Zone,',') z
order by Id,RegionId,Zone

Related

T-SQL query: find most frequent pair of values

I have text column with numeric values separated by semicolons. I'm trying to figure out how to get the most frequent pair of values that appeared together in the same row. I've found a solution for a very similar problem in Python Finding the most frequent occurrences of pairs in a list of lists, but I don't know how to rewrite it in using SQL In example below it returns 2 and 3 because this pair appeared 3 times in the input set:
Input rows Output
---------- -------
';1;2;3;5' | '2;3'
';2;3' | '1;2'
';3;4;5;1;2' | '1;3'
';1;5;2' | '1;5'
Orginal data:
You may try with the following approach. First, using OPENJSON(), get all possible combinations. When OPENJSON parses a JSON array the indexes of the elements in the JSON text are returned as keys (0-based). Then, count the most frequent pair with DENSE_RANK().
Input:
CREATE TABLE #Items (
Id int,
ItemValues varchar(max)
)
INSERT INTO #Items
(Id, ItemValues)
VALUES
(1, '1;2;3;5'),
(2, '2;3'),
(3, '3;4;5;1;2'),
(4, '1;5;2')
Statement:
;WITH combinationsCTE AS (
SELECT
CASE
WHEN s1.[value] <= s2.[value] THEN CONCAT(s1.[value], ';', s2.[value])
ELSE CONCAT(s2.[value], ';', s1.[value])
END AS PairValue
FROM #Items i
CROSS APPLY (SELECT [key], [value] FROM OPENJSON('["' + REPLACE(i.ItemValues,';','","') + '"]')) s1
CROSS APPLY (SELECT [key], [value] FROM OPENJSON('["' + REPLACE(i.ItemValues,';','","') + '"]')) s2
WHERE (s1.[key] < s2.[key])
), rankingCTE AS (
SELECT
PairValue,
DENSE_RANK() OVER (ORDER BY COUNT(PairValue) DESC) AS PairRank
FROM combinationsCTE
GROUP BY PairValue
)
SELECT PairValue
FROM rankingCTE
WHERE PairRank = 1
Output:
PairValue
1;2
1;5
2;3
2;5
First have a split function
CREATE FUNCTION Splitfn(#String varchar(8000), #Delimiter char(1))
returns #temptable TABLE (items varchar(8000))
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Items) values(#slice)
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
return
end
Second Step To get all rows in a single string
Declare #val Varchar(MAX);
Select #val = COALESCE(#val + '; ' + YourColumn, YourColumn)
From YourTable
Third step,
SELECT TOP 1 items, count(*)
FROM dbo.Splitfn(#Val, ';')
WHERE LTRIM(RTRIM(items)) <> ''
GROUP BY items
ORDER BY Count(*) DESC

How to compare two multiple values columns in SQLSERVER

I have a column that contains some value like : ;1;3;7;2;
And another column with values like : ;5;2;3;
I need to know if at least one of the number in the second column (5,2 or 3) is contained in the first column.
Of course this is an example, I have to do it for several records.
Do you have an idea ?
Here is my code :
SELECT *
FROM COMPANIES
WHERE F_SKILLS IN F_CONVENTION
Check This.
Using below query you can find all common numbers appeared in both columns.
First Create Function "[SplitLongString]":
create FUNCTION [dbo].[SplitLongString]
(
#DelimitedString VARCHAR(MAX),
#Delimiter VARCHAR(100)
)
RETURNS
#tblArray TABLE
(
ElementID INT IDENTITY(1,1),
Element VARCHAR(1000)
)
AS
BEGIN
DECLARE
#siIndex INT,
#siStart INT,
#siDelSize INT
SET #siDelSize = LEN(#Delimiter)
--loop through source string and add elements to destination table array
WHILE LEN(#DelimitedString) > 0
BEGIN
SET #siIndex = CHARINDEX(#Delimiter, #DelimitedString)
IF #siIndex = 0
BEGIN
INSERT INTO #tblArray VALUES(#DelimitedString)
BREAK
END
ELSE
BEGIN
INSERT INTO #tblArray VALUES(SUBSTRING(#DelimitedString, 1,#siIndex - 1))
SET #siStart = #siIndex + #siDelSize
SET #DelimitedString = SUBSTRING(#DelimitedString, #siStart , LEN(#DelimitedString) - #siStart + 1)
END
END
RETURN
END
after you can cross apply to seprate out commo or semi colon. You will get common element under column element. use these column for your further use.
select A.*,y.Element common_element--,X.Element
from #COMPANIES A
CROSS APPLY SplitLongString(F_SKILLS,';') y
CROSS APPLY SplitLongString(F_CONVENTION,';') X
where x.Element=y.Element and ( X.Element!=' ' or X.Element!= null)
Output :
let us know if you have any query.
You can use default function that is dbo.Split('5;2;3;',',')
if you don't have this function you can create your own
Create Function
CREATE FUNCTION SplitString
(
#Input NVARCHAR(MAX),
#Character CHAR(1)
)
RETURNS #Output TABLE (
Item NVARCHAR(1000)
)
AS
BEGIN
DECLARE #StartIndex INT, #EndIndex INT
SET #StartIndex = 1
IF SUBSTRING(#Input, LEN(#Input) - 1, LEN(#Input)) <> #Character
BEGIN
SET #Input = #Input + #Character
END
WHILE CHARINDEX(#Character, #Input) > 0
BEGIN
SET #EndIndex = CHARINDEX(#Character, #Input)
INSERT INTO #Output(Item)
SELECT SUBSTRING(#Input, #StartIndex, #EndIndex - 1)
SET #Input = SUBSTRING(#Input, #EndIndex + 1, LEN(#Input))
END
RETURN
END
GO
after creating function you can add Condition to your Query
select * from yourTableName tbl where (select * from dbo.SplitString(tbl.YourColumnWithSemicoluns,';')) in (select * from dbo.SplitString('5;2;3;',';'))
If you're using SQL Server 2016, check out the string_split function.
Assuming you're not, you can create a split function as per previous answer.
If this isn't an option you can do it with a CTE, but it is likely to be inefficient if you have a large dataset.
create table test(col1 varchar(100), col2 varchar(100));
insert into test values ('a;b;c', 'c;d;e'),('a;b;c','d;e;f'), ('a;b;c', 'b;a;d')
;WITH SplitSting AS
(
SELECT
col1, col2, LEFT(col1,CHARINDEX(';',col1)-1) AS value
,RIGHT(col1,LEN(col1)-CHARINDEX(';',col1)) AS remainder
FROM test
WHERE col1 IS NOT NULL AND CHARINDEX(';',col1)>0
UNION ALL
SELECT
col1, col2,LEFT(remainder,CHARINDEX(';',remainder)-1)
,RIGHT(remainder,LEN(remainder)-CHARINDEX(';',remainder))
FROM SplitSting
WHERE remainder IS NOT NULL AND CHARINDEX(';',remainder)>0
UNION ALL
SELECT
col1, col2,remainder,null
FROM SplitSting
WHERE remainder IS NOT NULL AND CHARINDEX(';',remainder)=0
)
SELECT distinct col1, col2 FROM SplitSting
where ';'+col2+';' like '%;'+value+';%'
If it's a finite set of just a few numbers, you might be able to get away with something as simple as:
SELECT * FROM companies
WHERE (f_skills LIKE '%;1;%' AND f_convention LIKE '%;1;%')
OR (f_skills LIKE '%;2;%' AND f_convention LIKE '%;2;%')
OR (f_skills LIKE '%;3;%' AND f_convention LIKE '%;3;%')
...
If that doesn't work... Well, looks like some of the other answers on the page may be bit more comprehensive... Almost embarrassingly so. Although, if the numbers are really just 1-9 like the question suggests, I stand by my answer. :) I know it looks a little pitiful in comparison, but, seriously, it just might work! If not, I'd start with the one from Mr. Bhosale.

Reverse Concat - Split function

I have a table and it has a value column that lists data as:
Row 1: '00','01','02','03'
Row 2: '03','02','09','08'
I have a couple of split functions
FUNCTION [dbo].[udf_Split](#String varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (Item varchar(MAX))
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx<>0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Item) values(#slice)
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end return end;
I'm trying to create a view of the table, with that column and then I'd like my view results to be a list of rows that have each value broken to its own row (and distinct) So would look like: (the tics can stay or go, don't care about them right now)
Row 1: 00
Row 2: 01
Row 3: 02
Row 4: 03
My view is pretty much a:
SELECT DISTINCT VALUE FROM TABLE
cross apply dbo.split(Value, ',') as Item
But it's not working. Can someone lend me some direction on how I should work this?
This is because you're SELECTing the field VALUE instead of Item.Item. You should do this:
SELECT DISTINCT x.Item
FROM TABLE
CROSS APPLY dbo.split(Value, ',') AS x
Additionally, your dbo.split function is not optimal. There are number of ways to split a string in a set-based fashion, instead of RBAR. Here is one way using XML:
CREATE FUNCTION dbo.SplitStrings_XML
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
(
SELECT Item = y.i.value('(./text())[1]', 'nvarchar(4000)')
FROM
(
SELECT x = CONVERT(XML, '<i>'
+ REPLACE(#List, #Delimiter, '</i><i>')
+ '</i>').query('.')
) AS a CROSS APPLY x.nodes('i') AS y(i)
);
GO
Sample usage:
;WITH CteData(Value) AS(
SELECT '''00'',''01'',''02'',''03''' UNION ALL
SELECT '''03'',''02'',''09'',''08'''
)
SELECT DISTINCT x.Item
FROM CteData d
CROSS APPLY dbo.SplitStrings_XML(d.Value, ',') x
Result:
Item
--------
'00'
'01'
'02'
'03'
'08'
'09'
For other string splitter, you can read this article by Aaron Bertrand.

SQL Server - Compare Varchar values using IN

In my table, I have a varchar column whereby multi-values are stored. An example of my table:
RecNum | Title | Category
-----------------------------------------
wja-2012-000001 | abcdef | 4,6
wja-2012-000002 | qwerty | 1,3,7
wja-2012-000003 | asdffg |
wja-2012-000004 | zxcvbb | 2,7
wja-2012-000005 | ploiuh | 3,4,12
The values in the Category column points to another table.
How can I return the relevant rows if I want to retrieve the rows with value 1,3,5,6,8 in the Category column?
When I tried using IN, I get the 'Conversion failed when converting the varchar value '1,3,5,6,8' to data type int' error.
Breaking the Categories out into a separate table would be a better design if that's a change you can make... otherwise, you could create a function to split the values into a table of integers like this:
CREATE FUNCTION dbo.Split(#String varchar(8000), #Delimiter char(1))
returns #temptable TABLE (id int)
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(id) values(convert(int, #slice))
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
return
end
Then call it from your query:
SELECT ...
FROM ...
WHERE #SomeID IN (SELECT id FROM dbo.Split(Category, ','))
Or if you're looking to provide a list of categories as an input parameter (such as '1,3,5,6,8'), and return all records in your table that contain at least one of these values, you could use a query like this:
SELECT ...
FROM ...
WHERE
EXISTS (
select 1
from dbo.Split(Category, ',') s1
join dbo.Split(#SearchValues, ',') s2 ON s1.id = s2.id
)
you can do like this
declare #var varchar(30); set #var='2,3';
exec('select * from category where Category_Id in ('+#var+')')
Try this solution:
CREATE TABLE test4(RecNum varchar(20),Title varchar(10),Category varchar(15))
INSERT INTO test4
VALUES('wja-2012-000001','abcdef','4,6'),
('wja-2012-000002','qwerty','1,3,7'),
('wja-2012-000003','asdffg',null),
('wja-2012-000004','zxcvbb','2,7'),
('wja-2012-000005','ploiuh','3,4,12')
select * from test4
Declare #str varchar(25) = '1,3,5,6,8'
;WITH CTE as (select RecNum,Title,Category from test4)
,CTE1 as (
select RecNum,Title,RIGHT(#str,LEN(#str)-CHARINDEX(',',#str,1)) as rem from CTE where category like '%'+LEFT(#str,1)+'%'
union all
select c.RecNum,c.Title,RIGHT(c1.rem,LEN(c1.rem)-CHARINDEX(',',c1.rem,1)) as rem from CTE1 c1 inner join CTE c
on c.category like '%'+LEFT(c1.rem,1)+'%' and CHARINDEX(',',c1.rem,1)>0
)
select RecNum,Title from CTE1
As mentioned by others, your table design violates basic database design principles and if there is no way around it, you could normalize the table with little code (example below) and then join away with the other table. Here you go:
Data:
CREATE TABLE data(RecNum varchar(20),Title varchar(10),Category varchar(15))
INSERT INTO data
VALUES('wja-2012-000001','abcdef','4,6'),
('wja-2012-000002','qwerty','1,3,7'),
('wja-2012-000003','asdffg',null),
('wja-2012-000004','zxcvbb','2,7'),
('wja-2012-000005','ploiuh','3,4,12')
This function takes a comma separated string and returns a table:
CREATE FUNCTION listToTable (#list nvarchar(MAX))
RETURNS #tbl TABLE (number int NOT NULL) AS
BEGIN
DECLARE #pos int,
#nextpos int,
#valuelen int
SELECT #pos = 0, #nextpos = 1
WHILE #nextpos > 0
BEGIN
SELECT #nextpos = charindex(',', #list, #pos + 1)
SELECT #valuelen = CASE WHEN #nextpos > 0
THEN #nextpos
ELSE len(#list) + 1
END - #pos - 1
INSERT #tbl (number)
VALUES (convert(int, substring(#list, #pos + 1, #valuelen)))
SELECT #pos = #nextpos
END
RETURN
END
Then, you can do something like this to "normalize" the table:
SELECT *
FROM data m
CROSS APPLY listToTable(m.Category) AS t
where Category is not null
And then use the result of the above query to join with the "other" table. For example (i did not test this query):
select * from otherTable a
join listToTable('1,3,5,6,8') b
on a.Category = b.number
join(
SELECT *
FROM data m
CROSS APPLY listToTable(m.Category) AS t
where Category is not null
) c
on a.category = c.number

SQL why wont this let me select a blank value and a value?

I have the following UDF.
CREATE FUNCTION [dbo].[udf_GenerateVarcharTableFromStringList]
(#list varchar(MAX),
#delimiter char(1) = N',')
RETURNS #tbl TABLE ([Value] varchar(200))
WITH SCHEMABINDING
AS
BEGIN
DECLARE #chrind INT
DECLARE #Piece nvarchar(4000)
SELECT #chrind = 1
WHILE #chrind > 0
BEGIN
SELECT #chrind = CHARINDEX(#delimiter,#list)
IF #chrind > 0
SELECT #Piece = LEFT(#list,#chrind - 1)
ELSE
SELECT #Piece = #list
INSERT #tbl([Value]) VALUES(#Piece)
SELECT #list = RIGHT(#list,LEN(#list) - #chrind)
IF LEN(#list) = 0 BREAK
END
RETURN
END
I call this function with the following code example from a where clause in my sprocs:
WHERE u.[Owner] IN
(SELECT [VALUE]
FROM dbo.udf_GenerateVarcharTableFromStringList(#Owners, ','))
I use this where statement in sprocs that I use for reporting services multivalue parameters. When a user selects just blank for the value it returns the correct data but if a user selects a blank value and another value that actually has text it does not return the blank value owners anymore, just the owners from the text? Any ideas? I take it something is wrong with the function?
Well, from what I could gather from your comments it seems that you need a new splitting function. Here's what I use:
create FUNCTION [dbo].[fnSplitString] (#s varchar(512),#sep char(1))
RETURNS table
AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT pn,
SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END) AS s
FROM Pieces
)
It has a nice enumeration feature to it and it doesn't eat blanks. :)