How to insert a hyphen between blocks of alpha and numeric characters - sql

I need to insert hyphens between blocks of alpha and numeric text in a string.
I am not even sure how to start on this problem.
ABC123 -> ABC-123
ABC123XYZ -> ABC-123-XYZ
D123 -> D-123
123C -> 123-C

This one will work for you with single value.
DECLARE #CODE VARCHAR(50) = '12ABC123XYZ'
,#NEWCODE VARCHAR(100) = ''
;WITH CTE
AS (
SELECT NUMBER
,SUBSTRING(#CODE, NUMBER, 1) AS VAL
FROM master.dbo.spt_values
WHERE TYPE = 'P'
AND number BETWEEN 1
AND LEN(#CODE)
)
SELECT #NEWCODE = #NEWCODE + CASE
WHEN ISNUMERIC(C1.VAL) <> ISNUMERIC(ISNULL(C2.VAL, C1.VAL))
THEN '-' + C1.VAL
ELSE C1.VAL
END
FROM CTE C1
LEFT JOIN CTE C2 ON C1.number = C2.number + 1
SELECT #NEWCODE
Result : 12-ABC-123-XYZ
And If you want this to work with table column, you need to create a scalar function.
CREATE FUNCTION CODE_SPLIT
(
#CODE VARCHAR(50)
)
RETURNS VARCHAR(100)
AS
BEGIN
DECLARE #NEWCODE VARCHAR(100) ='';
;WITH CTE
AS (
SELECT NUMBER
,SUBSTRING(#CODE, NUMBER, 1) AS VAL
FROM master.dbo.spt_values
WHERE TYPE = 'P'
AND number BETWEEN 1
AND LEN(#CODE)
)
SELECT #NEWCODE = #NEWCODE + CASE
WHEN ISNUMERIC(C1.VAL) <> ISNUMERIC(ISNULL(C2.VAL, C1.VAL))
THEN '-' + C1.VAL
ELSE C1.VAL
END
FROM CTE C1
LEFT JOIN CTE C2 ON C1.number = C2.number + 1
RETURN #NEWCODE
END
GO
And call it on your actual table
Schema:
SELECT * INTO #TAB FROM(
SELECT 'ABC123' AS CODE
UNION ALL
SELECT 'ABC123XYZ'
UNION ALL
SELECT 'D123'
UNION ALL
SELECT '123C'
)A
SELECT CODE, dbo.CODE_SPLIT(CODE) AS NEWCODE FROM #TAB
Result:
+-----------+-------------+
| CODE | NEWCODE |
+-----------+-------------+
| ABC123 | ABC-123 |
| ABC123XYZ | ABC-123-XYZ |
| D123 | D-123 |
| 123C | 123-C |
+-----------+-------------+

patindex('%[0-9]%') return index of first number.
patindex('%[^0-9]%') return index of first non-numeric character.
You could use recursive CTE and PATINDEX like this.
DECLARE #SampleData AS TABLE
(
TextValue varchar(100)
)
INSERT INTO #SampleData
VALUES ('ABC124'), ('ABC123XYZ'), ('123C'), ('ABC'), ('1A2B3C')
;WITH cte AS
(
SELECT sd.TextValue AS RootText,
sd.TextValue AS CurrentText,
CAST('' AS varchar(100)) AS Result
FROM #SampleData sd
UNION ALL
SELECT
c.RootText,
CASE
WHEN patindex('%[0-9]%', c.CurrentText) = 0 OR patindex('%[^0-9]%', c.CurrentText) = 0
THEN ''
WHEN patindex('%[0-9]%', c.CurrentText) > patindex('%[^0-9]%', c.CurrentText)
THEN RIGHT(c.CurrentText, len(c.CurrentText) - patindex('%[0-9]%', c.CurrentText) + 1)
ELSE RIGHT(c.CurrentText, len(c.CurrentText) - patindex('%[^0-9]%', c.CurrentText) + 1)
END AS CurrentText,
CAST(
CASE
WHEN patindex('%[0-9]%', c.CurrentText) = 0 OR patindex('%[^0-9]%', c.CurrentText) = 0
THEN Result + '-' + c.CurrentText
WHEN patindex('%[0-9]%', c.CurrentText) > patindex('%[^0-9]%', c.CurrentText)
THEN Result + '-' + LEFT(CurrentText, patindex('%[0-9]%', c.CurrentText) - 1)
ELSE Result + '-' + LEFT(CurrentText, patindex('%[^0-9]%', c.CurrentText) - 1)
END AS varchar(100)
) AS Result
FROM cte c
WHERE LEN(CurrentText) > 0
)
SELECT cte.RootText, STUFF(cte.Result, 1,1,'') AS Result FROM cte
WHERE cte.CurrentText = ''
Demo link: http://rextester.com/FTYA72053

Related

SQL Function - Fuzzy Matching with Levenshtein Distance Algorithm - Return Lowest Value Only

Problem: Need SQL function to return the 'lowest' matching value using the Levenshtein algorithm.
Code:
CREATE FUNCTION ufn_levenshtein(#s1 nvarchar(3999), #s2 nvarchar(3999))
RETURNS int
AS
BEGIN
DECLARE #s1_len int, #s2_len int
DECLARE #i int, #j int, #s1_char nchar, #c int, #c_temp int
DECLARE #cv0 varbinary(8000), #cv1 varbinary(8000)
SELECT
#s1_len = LEN(#s1),
#s2_len = LEN(#s2),
#cv1 = 0x0000,
#j = 1, #i = 1, #c = 0
WHILE #j <= #s2_len
SELECT #cv1 = #cv1 + CAST(#j AS binary(2)), #j = #j + 1
WHILE #i <= #s1_len
BEGIN
SELECT
#s1_char = SUBSTRING(#s1, #i, 1),
#c = #i,
#cv0 = CAST(#i AS binary(2)),
#j = 1
WHILE #j <= #s2_len
BEGIN
SET #c = #c + 1
SET #c_temp = CAST(SUBSTRING(#cv1, #j+#j-1, 2) AS int) +
CASE WHEN #s1_char = SUBSTRING(#s2, #j, 1) THEN 0 ELSE 1 END
IF #c > #c_temp SET #c = #c_temp
SET #c_temp = CAST(SUBSTRING(#cv1, #j+#j+1, 2) AS int)+1
IF #c > #c_temp SET #c = #c_temp
SELECT #cv0 = #cv0 + CAST(#c AS binary(2)), #j = #j + 1
END
SELECT #cv1 = #cv0, #i = #i + 1
END
RETURN #c
END
IF OBJECT_ID('tempdb..#ExistingCustomers') IS NOT NULL
DROP TABLE #ExistingCustomers;
CREATE TABLE #ExistingCustomers
(
Customer VARCHAR(255),
ID INT
)
INSERT #ExistingCustomers SELECT 'Ed''s Barbershop', 1002
INSERT #ExistingCustomers SELECT 'GroceryTown', 1003
INSERT #ExistingCustomers SELECT 'Candy Place', 1004
INSERT #ExistingCustomers SELECT 'Handy Man', 1005
IF OBJECT_ID('tempdb..#POTENTIALCUSTOMERS') IS NOT NULL
DROP TABLE #POTENTIALCUSTOMERS;
CREATE TABLE #POTENTIALCUSTOMERS(Customer VARCHAR(255));
INSERT #POTENTIALCUSTOMERS SELECT 'Eds Barbershop'
INSERT #POTENTIALCUSTOMERS SELECT 'Grocery Town'
INSERT #POTENTIALCUSTOMERS SELECT 'Candy Place'
INSERT #POTENTIALCUSTOMERS SELECT 'Handee Man'
INSERT #POTENTIALCUSTOMERS SELECT 'The Apple Farm'
INSERT #POTENTIALCUSTOMERS SELECT 'Ride-a-Long Bikes'
SELECT A.Customer,
b.ID,
b.Customer as cust,
dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) as ValueLev
FROM #POTENTIALCUSTOMERS a
LEFT JOIN #ExistingCustomers b ON dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) < 15;
This returns:
What I would like to return:
Explanation: The results are the 'lowest' values from the Levenshtein algorithm. There are two rows where the Levenshtein scores are the same The Apple Farm and Ride-a-Long Bikes, in which case any of the values is fine, just as long as it is one value.
References:
SQL Fuzzy Join - MSSQL
http://www.kodyaz.com/articles/fuzzy-string-matching-using-levenshtein-distance-sql-server.aspx
You can use CTE to get the result you want if you partition by the potential customer and use the ValueLev to order the results:
;WITH CTE AS
(
SELECT RANK() OVER (PARTITION BY a.Customer ORDER BY dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) ASC) AS RowNbr,
A.Customer,
b.ID,
b.Customer as cust,
dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) as ValueLev
FROM #POTENTIALCUSTOMERS a
LEFT JOIN #ExistingCustomers b ON dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) < 15
)
SELECT Customer,
MIN(ID) AS ID,
MIN(cust) AS cust,
ValueLev
FROM CTE
WHERE CTE.RowNbr = 1
GROUP BY Customer, ValueLev
As you don't mind which result is returned in the case of duplicate ValueLev, use GROUP BY and MIN to scale the results down to one per potential customer.
Output:
Customer ID cust ValueLev
Candy Place 1004 Candy Place 0
Grocery Town 1003 GroceryTown 0
Eds Barbershop 1002 Ed's Barbershop 1
Handee Man 1005 Handy Man 2
The Apple Farm 1004 Candy Place 9
Ride-a-Long Bikes 1003 Candy Place 14

Converting multiple delimited fields into rows in SQL Server

I have a data source which contains data in delimited fields which exist in a staging area in SQL Server. I'd like to transform this data into many rows so it is easier to work with. This differs from the numerous other questions and answers on similar topics in that I have multiple fields where this delimited data exists. Here is an example of what my data looks like:
ID | Field | Value
---+-------+------
1 | a,b,c | 1,2,3
2 | a,c | 5,2
And this is the desired output:
ID | Field | Value
---+-------+------
1 | a | 1
1 | b | 2
1 | c | 3
2 | a | 5
2 | c | 2
My code so far uses the XML parsing method like the one mentioned here: Turning a Comma Separated string into individual rows I needed to extend it to join each field to its corresponding value which I have done by generating a row_number for each ID and then matching based on the ID and this row_number.
My issue is that it is painfully slow so I wondered if anyone has any more performant methods?
select
[Value].ID, [Field], [Value]
from
(select
A.ID, Split.a.value('.', 'varchar(100)') as [Value],
row_number() over (partition by ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Value], ',', '</M><M>') + '</M>' as xml) as [Value]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Value].nodes ('/M') as Split(a)
) [Value]
inner join
(
select
A.ID, Split.a.value('.', 'varchar(100)') as [Field],
row_number() over (partition by A.ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Field], ',', '</M><M>') + '</M>' as xml) as [Field]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Field].nodes ('/M') as Split(a)
) [Field] on [Value].ID = [Field].ID and [Value].RowNumber = [Field].RowNumber
Here is an approach using the splitter from Jeff Moden. http://www.sqlservercentral.com/articles/Tally+Table/72993/ One nice feature of that splitter is that it returns the ordinal position of each element so you can use it for joins and such.
Starting with some data.
declare #Something table
(
ID int
, Field varchar(50)
, Value varchar(50)
)
insert #Something values
(1, 'a,b,c', '1,2,3')
, (2, 'a,c', '5,2')
;
Since you have two sets of delimited data you will be forced to split this for each set of delimited values. Here is how you can leverage this splitter to accomplish this.
with Fields as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Field, ',') f
)
, Value as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Value, ',') v
)
select f.ID
, Field = f.Item
, Value = v.Item
from Fields f
join Value v on v.ItemNumber = f.ItemNumber and v.ID = f.ID
If at all possible it would be best to see if you can change whatever process it is that is populating your source data so it is normalized and not delimited because it is a pain to work with.
Basing on #Gordon Linoff s query here another recursive cte:
DECLARE #t TABLE(
ID int
,Field VARCHAR(MAX)
,Value VARCHAR(MAX)
)
INSERT INTO #t VALUES
(1, 'a,b,c', '1,2,3')
,(2, 'a,c', '5,2')
,(3, 'x', '7');
with cte as (
select ID
,SUBSTRING(Field, 1, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)-1 ELSE LEN(Field) END) AS Field
,SUBSTRING(Value, 1, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)-1 ELSE LEN(Value) END) AS Value
,SUBSTRING(Field, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)+1 ELSE 1 END, LEN(Field)-CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field) ELSE 0 END) as field_list
,SUBSTRING(Value, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)+1 ELSE 1 END, LEN(Value)-CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value) ELSE 0 END) as value_list
,0 as lev
from #t
WHERE CHARINDEX(',', Field) > 0
UNION ALL
select ID
,SUBSTRING(field_list, 1, CASE WHEN CHARINDEX(',', field_list) > 0 THEN CHARINDEX(',', field_list)-1 ELSE LEN(field_list) END) AS Field
,SUBSTRING(value_list, 1, CASE WHEN CHARINDEX(',', value_list) > 0 THEN CHARINDEX(',', value_list)-1 ELSE LEN(value_list) END) AS Value
,CASE WHEN CHARINDEX(',', field_list) > 0 THEN SUBSTRING(field_list, CHARINDEX(',', field_list)+1, LEN(field_list)-CHARINDEX(',', field_list)) ELSE '' END as field_list
,CASE WHEN CHARINDEX(',', value_list) > 0 THEN SUBSTRING(value_list, CHARINDEX(',', value_list)+1, LEN(value_list)-CHARINDEX(',', value_list)) ELSE '' END as value_list
,lev + 1
from cte
WHERE LEN(field_list) > 0
)
select ID, Field, Value
from cte
UNION ALL
SELECT ID, Field, Value
FROM #t
WHERE CHARINDEX(',', Field) = 0
ORDER BY ID, Field
OPTION (MAXRECURSION 0)
One method is a recursive CTE:
with cte as (
select id, cast(NULL as varchar(max)) as field, cast(NULL as varchar(max)) as value, field as field_list, value as value_list, 0 as lev
from t
union all
select id, left(field_list, charindex(',', field_list + ',') - 1),
left(value_list, charindex(',', value_list + ',') - 1),
substring(field_list, charindex(',', field_list + ',') + 1, len(field_list)),
substring(value_list, charindex(',', value_list + ',') + 1, len(value_list)),
1 + lev
from cte
where field_list <> '' and value_list <> ''
)
select *
from cte
where lev > 0;
Here is an example of how it works.

Hot to convert a variable with value '1,2,3' to a table (every number as a record)

Working on SQL (2005 and 2008)
the variable with value '1,2,3' would be call #cedis and this could to have N number for example
set #cedis='1' or set #cedis='1,2,3,4,5,6,7' or set #cedis='125,98,91'
so important, its this must to be a select only, a loop could not to be use, only a select!
this must to return a (result as ) table with values for example
set #cedis='1,2,3,4' this must to return a result
number 1 2 3 4
declare #cedis varchar(max)
set #cedis='1,58,123,8'
;with datos as
(
my select with is going to return me the table
)
select * from datos
result set is
number
1
58
123
8
If am not wrong this is what you need
DECLARE #cedis VARCHAR(500)='1,2,3,4'
SELECT Split.a.value('.', 'VARCHAR(100)') Numbers
FROM (SELECT Cast ('<M>' + Replace(#cedis, ',', '</M><M>') + '</M>' AS XML) AS Numbers) AS A
CROSS APPLY Numbers.nodes ('/M') AS Split(a)
Result:
Numbers
-------
1
2
3
4
A table valued function would do it.
CREATE FUNCTION [dbo].[fn_Split](#text VARCHAR(MAX), #delimiter VARCHAR(5) = ',')
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value VARCHAR(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
END
SET #text = RIGHT(#text, (LEN(#text) - (#index+LEN(#delimiter)-1)))
END
RETURN
END
You can call it as follows:
SELECT *
FROM dbo.fn_Split(#cedis,',')
Here is a more generic solution that breaks any given string into a table based on any given separator:
http://rextester.com/VSRDLS48817
Not an original idea, but I've found it very useful.
create function [dbo].[SplitString]
(
#str nvarchar(255),
#separator char(1)
)
returns table
AS
return (
with tokens(p, a, b) AS (
select
cast(1 as int),
cast(1 as int),
charindex(#separator, #str)
union all
select
p + 1,
b + 1,
charindex(#separator, #str, b + 1)
from tokens
where b > 0
)
select
p-1 ItemIndex,
substring(
#str,
a,
case when b > 0 then b-a ELSE LEN(#str) end)
AS Item
from tokens
);
This is another one approach to get required output result
DECLARE #cedis VARCHAR(MAX) ,
#delimeter VARCHAR(10)
SET #cedis = '1,58,123,8,14144,15,155231,15,3647,2347,45,76,68,2354,577,5'
SET #delimeter = ','
SET #cedis = #cedis + #delimeter;
WITH datos
AS ( SELECT n = 1
UNION ALL
SELECT n + 1
FROM datos
WHERE n <= LEN(#cedis)
),
cte
AS ( SELECT T.N ,
ROW_NUMBER() OVER ( ORDER BY T.N ) AS RN
FROM datos AS T
WHERE SUBSTRING(#cedis, T.N, LEN(#delimeter)) = #delimeter
AND LEN(#cedis) >= T.N
)
SELECT SUBSTRING(#cedis, COALESCE(R.N + LEN(#delimeter), 1),
L.N - COALESCE(R.N + LEN(#delimeter), 1)) AS part ,
L.RN AS ID
FROM cte AS L
LEFT JOIN cte AS R ON L.RN = R.RN + 1
OPTION ( MAXRECURSION 1000 )

Pivot Header Data in row using sql server

Is there any way to make table pivot in sql server like such a way.
I have data like
| OldItem | NewItem |
---------------------
| HD1 | 365 |
I need output like below.
| Name | Value1 |
---------------------
| OldItem | HD1 |
| NewItem | 365 |
Thanks in advance.
Please try using UNPIVOT. Sample given is for static two rows.
SELECT Name, Value1
FROM
(SELECT *
FROM tbl) p
UNPIVOT
(Value1 FOR Name IN
(OldItem, NewItem)
)AS unpvt;
The following works for me:
Create Table #Values (OldItem char(3),NewItem int);
INSERT INTO #Values (OldItem, NewItem)
VALUES ('HD1',365)
,('HD2',300)
,('HD3',200);
With Values_Ordered AS
(
SELECT OldItem, NewItem, row_number() OVER (ORDER BY OldItem) AS Sequence
FROM #Values
)
SELECT 'OldItem' AS Name,
min(CASE WHEN Sequence = 1 THEN OldItem ELSE NULL END) AS Value1,
min(CASE WHEN Sequence = 2 THEN OldItem ELSE NULL END) AS Value2,
min(CASE WHEN Sequence = 3 THEN OldItem ELSE NULL END) AS Value3
FROM Values_Ordered
UNION ALL
SELECT 'NewItem' AS Name,
min(CASE WHEN Sequence = 1 THEN CAST(NewItem AS CHAR(3)) ELSE NULL END) AS Value1,
min(CASE WHEN Sequence = 2 THEN CAST(NewItem AS CHAR(3)) ELSE NULL END) AS Value2,
min(CASE WHEN Sequence = 3 THEN CAST(NewItem AS CHAR(3)) ELSE NULL END) AS Value3
FROM Values_Ordered
And here is my little code :D
DECLARE #dataTable TABLE (OldItem VARCHAR(10), NewItem INT)
INSERT INTO #dataTable SELECT 'HD1', 365
INSERT INTO #dataTable SELECT 'HD2', 300
INSERT INTO #dataTable SELECT 'HD3', 200
INSERT INTO #dataTable SELECT 'HD4', 200
--first select data what you need and add upcoming new column name
SELECT 'Value' + CAST(ROW_NUMBER() OVER (ORDER BY OldITem) AS VARCHAR) AS NewColumn, 'OldItem' as RowName, OldItem AS Item
INTO #SelectedData
FROM #dataTable
WHERE OldItem IN ('HD1', 'HD2', 'HD3')
UNION ALL
SELECT 'Value' + CAST(ROW_NUMBER() OVER (ORDER BY OldITem) AS VARCHAR) AS NewColumn, 'NewItem' as RowName, CAST(NewItem AS VARCHAR) AS Item
FROM #dataTable
WHERE OldItem IN ('HD1', 'HD2', 'HD3')
--Collect what column names will be
DECLARE #columns NVARCHAR(MAX) = (
SELECT STUFF(
(SELECT DISTINCT ', [' + NewColumn + ']'
FROM #SelectedData
FOR XML PATH ('')),
1, 2, '' )
)
-- create dynamic code for pivot
DECLARE #dynamicSQL AS NVARCHAR(MAX);
SET #dynamicSQL = N'
SELECT RowName, ' + #columns + '
FROM #SelectedData
PIVOT (MIN(Item) FOR NewColumn IN (' + #columns + ')) AS T
';
EXEC sp_executesql #dynamicSQL

How do I join all values from multiple rows into a single row?

Lets say I have the query:
SELECT Foo FROM Bar
Which returns
Foo
A
B
C
What I really what is:
Foo
A,B,C
So all of the values from all of the rows has been collapsed into a single row (the commas are optional).
Is there a way to use a select statement to do this because I do not want to use cursors?
DECLARE #foos VARCHAR(4000)
SELECT #foos = COALESCE(#foos + ',', '') + Foo FROM Bar
SELECT #foos AS Foo
SELECT
(
SELECT
CASE
WHEN ROW_NUMBER() OVER(ORDER BY bar) = 1 THEN ''
ELSE ', '
END + CAST(bar AS VARCHAR)
FROM foo
ORDER BY bar
FOR XML PATH('')
)
Ross,
this should get you started.
DECLARE #r VARCHAR(8000)
SELECT #r = (SELECT DISTINCT Foo + ', ' FROM Bar FOR XML PATH(''))
IF #r IS NOT NULL AND #r <> '' SET #r = SUBSTRING(#r, 1, LEN(#r)-1)
SELECT #r
Try the following
declare #joined varchar(max)
set #joined = ''
select #joined = #joined + IsNull(Foo + ',', '')
from Bar (nolock)
--; Drop last "," if necessary
set #joined = substring(#joined, 1, len(#joined) - (case when len(#joined) > 0 then 1 else 0 end))
select #joined as foo
select max(a),max(b),max(c) from
(
select 'a' as a,null as b,null as c
union
select null,'b',null
union
select null,null,'c'
) as x