Extract & Expand Numbers - sql

Using SQL with Microsoft SQL Server. I have a column that has values like this:
5-7(A-C) 15(A-C)
3(A-C)
I am trying to extract the numbers and if there is a dash then I need those numbers plus all the numbers in between. So for this example the output would be 5, 6, 7, 15 for the first row and 3 for the second row. I will use the results to see if they exist in another table.
I have been using this but it does not get the numbers between the dash:
SELECT
CASE
WHEN CHARINDEX('-', SUBSTRING(cc_EXPRESSION, 1, CHARINDEX('(', cc_EXPRESSION) - 1)) > 0
THEN CAST(LEFT(SUBSTRING(cc_EXPRESSION, 1, CHARINDEX('(', cc_EXPRESSION) - 1), CHARINDEX('-', SUBSTRING(cc_EXPRESSION, 1, CHARINDEX('(', cc_EXPRESSION) - 1)) - 1) AS INT)
ELSE CAST(SUBSTRING(cc_EXPRESSION, 1, CHARINDEX('(', cc_EXPRESSION) - 1) AS INT)
END AS extracted_number

Here is an option that illustrates how you can "stack" expressions via a CROSS APPLY and JOIN an ad-hoc tally/numbers table.
You may notice I used TOP 1000 ... feel free to pick a more reasonable number
Example
Select A.cc_Expression
,NewValue = string_agg(N+R1,',')
From YourTable A
Cross Apply string_split(cc_Expression,' ') B
Cross Apply (values ( replace(left(B.Value,charindex('(',B.Value)-1 ),'-','.') ) )C(Rng)
Cross Apply (values (try_convert(int,coalesce(parsename(C.Rng,2),parsename(C.Rng,1) )) ,try_convert(int,parsename(C.Rng,1) )) ) D(R1,R2)
Join ( Select Top 1000 N=-1+Row_Number() Over (Order By (Select NULL)) From master..spt_values n1, master..spt_values n2 ) E on N<=R2-R1
Group By A.cc_Expression
Results
cc_Expression NewValue
3(A-C) 3
5-7(A-C) 15(A-C) 5,6,7,15

With the following table :
CREATE TABLE I_have_a_column_that_has_values_like_this (COL VARCHAR(256));
INSERT INTO I_have_a_column_that_has_values_like_this
VALUES ('5-7(A-C) 15(A-C)'), ('3(A-C)');
You can do it like :
WITH
T0 AS
(
SELECT COL, LEFT(value, CHARINDEX('(', value) -1) AS VAL
FROM I_have_a_column_that_has_values_like_this
CROSS APPLY STRING_SPLIT(COL, ' ')
),
T1 AS
(
SELECT COL, CASE WHEN VAL NOT LIKE '%-%' THEN VAL + '-' + VAL ELSE VAL END AS VAL
FROM T0
)
SELECT COL, value AS VALS
FROM T1
CROSS APPLY GENERATE_SERIES(CAST(LEFT(VAL, CHARINDEX('-', VAL)-1) AS INT),
CAST(RIGHT(VAL, CHARINDEX('-', REVERSE(VAL))-1) AS INT)) AS G
The result will be :
COL VALS
---------------------- -----------
5-7(A-C) 15(A-C) 5
5-7(A-C) 15(A-C) 6
5-7(A-C) 15(A-C) 7
5-7(A-C) 15(A-C) 15
3(A-C) 3

Related

SQL Substring and Charindex

I have 111-1001-0000-0000 this record in one column and 453200-0000-000 in second column
I want output as 111-1001-0000-453200-0000-0000-000
That means 111-1001-0000 from 1st column and 453200 from 2nd column and again 0000 from 1st column and 0000-000
from 2nd column
I tried below query but getting 111-1001-453200-0000-0000-000.
-0000 is missing from 1st column
Declare #1stcolumn nvarchar(30),#2ndcolumn nvarchar(30)
set #1stcolumn='111-1001-0000-0000'
set #2ndcolumn='453200-0000-000'
select substring(#1stcolumn,1,charindex(right(#1stcolumn,charindex('-',reverse(#1stcolumn))),#1stcolumn))
+substring(#2ndcolumn,1,charindex('-',#2ndcolumn))+reverse(substring(reverse(#1stcolumn),0,charindex('-',reverse(#1stcolumn))))
+'-'+substring(#2ndcolumn,charindex('-',#2ndcolumn)+1,len(#2ndcolumn))
find the position where to split column 1 and column2. Use LEFT() and RIGHT() to split the string and then concatenate back in the order that you want
; with tbl as
(
select col1 = #1stcolumn, col2 = #2ndcolumn
)
select *,
c1.s1 + '-' + c2.s1 + '-' +c1.s2 + '-' + c2.s2
from tbl t
cross apply
(
select s1 = left(col1, p - 1),
s2 = right(col1, len(col1) - p)
from (
-- find the position of 3rd '-' by cascading charindex
select p = charindex('-', col1,
charindex('-', col1,
charindex('-', col1) + 1) + 1)
) p
) c1
cross apply
(
select s1 = left(col2, p - 1),
s2 = right(col2, len(col2) - p)
from (
select p = charindex('-', col2)
) p
) c2
A little modification in first substring. To get correct length I used LEN .
select substring(#1stcolumn,1,(Len(#1stcolumn) - charindex('- ',REVERSE(#1stcolumn)) + 1))
+substring(#2ndcolumn,1,charindex('-',#2ndcolumn))
+reverse(substring(reverse(#1stcolumn),0,charindex('-',reverse(#1stcolumn))))
+'-'+substring(#2ndcolumn,charindex('-',#2ndcolumn)+1,len(#2ndcolumn))
I'd probably do with with PARSENAME as it's quite concise then:
WITH YourTable AS(
SELECT '111-1001-0000-0000' AS Column1,
'453200-0000-000' AS Column2)
SELECT CONCAT_WS('-',PN.C1P1,PN.C1P2,PN.C1P3,PN.C2P1,PN.C1P4,PN.C2P2,PN.C2P3) AS NewString
FROM YourTable YT
CROSS APPLY (VALUES(REPLACE(YT.Column1,'-','.'),REPLACE(YT.Column2,'-','.')))R(Column1,Column2)
CROSS APPLY (VALUES(PARSENAME(R.Column1,4),PARSENAME(R.Column1,3),PARSENAME(R.Column1,2),PARSENAME(R.Column1,1),PARSENAME(R.Column2,3),PARSENAME(R.Column2,2),PARSENAME(R.Column2,1)))PN(C1P1,C1P2,C1P3,C1P4,C2P1,C2P2,C2P3);
WITH
test AS
(
select '111-1001-0000-0000' as col1, '453200-0000-000' as col2
)
,cte as
(
select
col1,
col2,
substring
(
col1,
0,
len(col1)-charindex('-',reverse(col1))
) as part1,
substring
(
col2,
0,
len(col2)-charindex('-',col2) - 1
) as part2
from test
),
cte2 as
(
select
part1,
part2,
substring
(
reverse(col1),
0,
charindex('-',reverse(col1))
) as part3,
substring
(
col2,
charindex('-',col2)+1,
len(col2)-charindex('-',col2)+1
) as part4
from cte
)
select part1+'-'+part2+'-'+part3+'-'+part4
from cte2

Some numbers are getting truncated. I would like to pull all numbers

I'm trying to parse only numbers from a string. My code must be pretty close, but something is off here, because several numbers in the last string are being truncated, although the first two strings seem fine.
Here is my code.
Drop Table SampleData
Create table SampleData
(id int, factor varchar(100))
insert into #source_Policy values (1 ,'AAA 1.058 (Protection Class)')
insert into #source_Policy values (2, 'BBB0.565 (Construction) ')
insert into #source_Policy values ( 3, 'CCCCC 1.04890616 (Building Limit Rel')
Select *
From SampleData
;with processTable as (
select id, factor, num
from SampleData
cross apply (
select (select C + ''
from (select N, substring(factor, N, 1) C from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12)) Num(N) where N<=datalength(factor)) t
where PATINDEX('%[0-9.]%',C)> 0
order by N
for xml path(''))
) p0 (num)
)
SELECT id, factor, num
FROM processTable
This is the result that I get.
In the num column, instead of 1.04, I would like to see the full precision, so: 1.04890616
I would think something like this:
select s.*, v2.numstr
from sampledata s cross apply
(values (stuff(factor, 1, patindex('%[0-9]%', factor) - 1, ''))) v(str) cross apply
(values (left(v.str, patindex('%[^0-9.]%', v.str + 'x') - 1))) v2(numstr);
Here is a SQL Fiddle.

Converting multiple delimited fields into rows in SQL Server

I have a data source which contains data in delimited fields which exist in a staging area in SQL Server. I'd like to transform this data into many rows so it is easier to work with. This differs from the numerous other questions and answers on similar topics in that I have multiple fields where this delimited data exists. Here is an example of what my data looks like:
ID | Field | Value
---+-------+------
1 | a,b,c | 1,2,3
2 | a,c | 5,2
And this is the desired output:
ID | Field | Value
---+-------+------
1 | a | 1
1 | b | 2
1 | c | 3
2 | a | 5
2 | c | 2
My code so far uses the XML parsing method like the one mentioned here: Turning a Comma Separated string into individual rows I needed to extend it to join each field to its corresponding value which I have done by generating a row_number for each ID and then matching based on the ID and this row_number.
My issue is that it is painfully slow so I wondered if anyone has any more performant methods?
select
[Value].ID, [Field], [Value]
from
(select
A.ID, Split.a.value('.', 'varchar(100)') as [Value],
row_number() over (partition by ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Value], ',', '</M><M>') + '</M>' as xml) as [Value]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Value].nodes ('/M') as Split(a)
) [Value]
inner join
(
select
A.ID, Split.a.value('.', 'varchar(100)') as [Field],
row_number() over (partition by A.ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Field], ',', '</M><M>') + '</M>' as xml) as [Field]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Field].nodes ('/M') as Split(a)
) [Field] on [Value].ID = [Field].ID and [Value].RowNumber = [Field].RowNumber
Here is an approach using the splitter from Jeff Moden. http://www.sqlservercentral.com/articles/Tally+Table/72993/ One nice feature of that splitter is that it returns the ordinal position of each element so you can use it for joins and such.
Starting with some data.
declare #Something table
(
ID int
, Field varchar(50)
, Value varchar(50)
)
insert #Something values
(1, 'a,b,c', '1,2,3')
, (2, 'a,c', '5,2')
;
Since you have two sets of delimited data you will be forced to split this for each set of delimited values. Here is how you can leverage this splitter to accomplish this.
with Fields as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Field, ',') f
)
, Value as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Value, ',') v
)
select f.ID
, Field = f.Item
, Value = v.Item
from Fields f
join Value v on v.ItemNumber = f.ItemNumber and v.ID = f.ID
If at all possible it would be best to see if you can change whatever process it is that is populating your source data so it is normalized and not delimited because it is a pain to work with.
Basing on #Gordon Linoff s query here another recursive cte:
DECLARE #t TABLE(
ID int
,Field VARCHAR(MAX)
,Value VARCHAR(MAX)
)
INSERT INTO #t VALUES
(1, 'a,b,c', '1,2,3')
,(2, 'a,c', '5,2')
,(3, 'x', '7');
with cte as (
select ID
,SUBSTRING(Field, 1, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)-1 ELSE LEN(Field) END) AS Field
,SUBSTRING(Value, 1, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)-1 ELSE LEN(Value) END) AS Value
,SUBSTRING(Field, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)+1 ELSE 1 END, LEN(Field)-CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field) ELSE 0 END) as field_list
,SUBSTRING(Value, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)+1 ELSE 1 END, LEN(Value)-CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value) ELSE 0 END) as value_list
,0 as lev
from #t
WHERE CHARINDEX(',', Field) > 0
UNION ALL
select ID
,SUBSTRING(field_list, 1, CASE WHEN CHARINDEX(',', field_list) > 0 THEN CHARINDEX(',', field_list)-1 ELSE LEN(field_list) END) AS Field
,SUBSTRING(value_list, 1, CASE WHEN CHARINDEX(',', value_list) > 0 THEN CHARINDEX(',', value_list)-1 ELSE LEN(value_list) END) AS Value
,CASE WHEN CHARINDEX(',', field_list) > 0 THEN SUBSTRING(field_list, CHARINDEX(',', field_list)+1, LEN(field_list)-CHARINDEX(',', field_list)) ELSE '' END as field_list
,CASE WHEN CHARINDEX(',', value_list) > 0 THEN SUBSTRING(value_list, CHARINDEX(',', value_list)+1, LEN(value_list)-CHARINDEX(',', value_list)) ELSE '' END as value_list
,lev + 1
from cte
WHERE LEN(field_list) > 0
)
select ID, Field, Value
from cte
UNION ALL
SELECT ID, Field, Value
FROM #t
WHERE CHARINDEX(',', Field) = 0
ORDER BY ID, Field
OPTION (MAXRECURSION 0)
One method is a recursive CTE:
with cte as (
select id, cast(NULL as varchar(max)) as field, cast(NULL as varchar(max)) as value, field as field_list, value as value_list, 0 as lev
from t
union all
select id, left(field_list, charindex(',', field_list + ',') - 1),
left(value_list, charindex(',', value_list + ',') - 1),
substring(field_list, charindex(',', field_list + ',') + 1, len(field_list)),
substring(value_list, charindex(',', value_list + ',') + 1, len(value_list)),
1 + lev
from cte
where field_list <> '' and value_list <> ''
)
select *
from cte
where lev > 0;
Here is an example of how it works.

Semicolon seperated value to other column in sql server

i have a table with a column have value seperated by semi colon.
the concern is value in the column are not fixed. it starts from 1 and end upto 80 semicolon sepaeration.
i am trying to put each individual value to seperate column
SQL SERVER 2008 code
DECLARE #Table TABLE(
Val VARCHAR(50)
)
INSERT INTO #Table (Val) SELECT '2Xcalcium; kidney' union all SELECT '3XMagnessium; liver' union all SELECT '2-ECG;3XSODIUM;DIALYSIS'
SELECT *,
CAST(LEFT(Val,CHARINDEX(';',Val)-1) AS VARCHAR) FirstValue,
CAST(RIGHT(Val,LEN(Val) - CHARINDEX(';',Val)) AS VARCHAR) SecondValue
FROM #Table
I tried the above code but this is limited to 2 semicolon only. please share your expertise.
Try it like this:
DECLARE #Table TABLE(
Val VARCHAR(50)
)
INSERT INTO #Table (Val) SELECT '2Xcalcium; kidney' union all SELECT '3XMagnessium; liver' union all SELECT '2-ECG;3XSODIUM;DIALYSIS';
;WITH Splitted AS
(
SELECT *
,CAST('<x>' + REPLACE(Val,';','</x><x>') + '</x>' AS XML) ValuesAsXML
FROM #Table
)
SELECT *
,ValuesAsXML.value('x[1]','varchar(max)') AS FirstCol
,ValuesAsXML.value('x[2]','varchar(max)') AS SecondCol
,ValuesAsXML.value('x[3]','varchar(max)') AS ThirdCol
,ValuesAsXML.value('x[4]','varchar(max)') AS FourthCol
,ValuesAsXML.value('x[5]','varchar(max)') AS FifthCol
FROM Splitted
The result
Val FirstCol SecondCol ThirdCol FourthCol FifthCol
2Xcalcium; kidney 2Xcalcium kidney NULL NULL NULL
3XMagnessium; liver 3XMagnessium liver NULL NULL NULL
2-ECG;3XSODIUM;DIALYSIS 2-ECG 3XSODIUM DIALYSIS NULL NULL
Most of the link provided extract the element into rows.
If you prefer to use your existing logic and extract the individual element into separate column, you can use multiple cascaded CROSS APPLY.
SELECT t.Val,
v1.V as V1,
v2.V as V2,
v3.V as V3
FROM #Table t
cross apply
(
select V = LEFT(t.Val, CHARINDEX(';', t.Val + ';') - 1),
Val = STUFF(t.Val, 1, CHARINDEX(';', t.Val + ';'), '')
) v1
cross apply
(
select V = LEFT(v1.Val, CHARINDEX(';', v1.Val + ';') - 1),
Val = STUFF(v1.Val, 1, CHARINDEX(';', v1.Val + ';'), '')
) v2
cross apply
(
select V = LEFT(v2.Val, CHARINDEX(';', v2.Val + ';') - 1),
Val = STUFF(v2.Val, 1, CHARINDEX(';', v2.Val + ';'), '')
) v3
From your question ,it seems that you have data in below format..This can be done easily with numbers table..
declare #string varchar(max)
set #string='s,t,a,c,k'
select substring(','+#string+',',n+1,charindex(',',','+#string+',',n+1)-n-1)
from
numbers
where n<=len(#string)
and substring(','+#string+',',n,1)=','
Output:
s
t
a
c
k
Few more Gems:
https://dba.stackexchange.com/questions/11506/why-are-numbers-tables-invaluable
http://sqlperformance.com/2012/07/t-sql-queries/split-strings

extract text from the string

I need to extract text from the string KWR/50X50X5/1.4301 between /, or 50x50x5 in T-SQL. I've tried using Substing, however, does not go to me.
Ultimately, I need to add the values (sum values) ​​in between / without character x (for example, 50 + 50 + 5 = 105) I would be grateful for your help.
Try this:
DECLARE #t TABLE (id INT, v VARCHAR(100) )
INSERT INTO #t
VALUES ( 1, 'PWPQ/80X20/1.4301' ) ,
( 2, 'PWO/120/1.4404' ),
( 3, 'PWOI/120X9X90X80/1.4404' )
;WITH cte1 AS(SELECT id, SUBSTRING(v,
CHARINDEX('/', v) + 1,
CHARINDEX('/', v, CHARINDEX('/', v) + 1) - CHARINDEX('/', v) - 1) AS v
FROM #t),
cte2 AS(SELECT id, CAST ('<X>' + REPLACE(v, 'X', '</X><X>') + '</X>' AS XML) AS v FROM cte1)
SELECT id, SUM(Split.a.value('.', 'int')) AS v
FROM cte2 a CROSS APPLY v.nodes ('/X') AS Split(a)
GROUP BY id
Output:
id v
1 100
2 120
3 299
First cte is for extracting value between /.
Second cte for casting those values to xml format.
The last statement is standard trick for transposing string with delimeter to separate rows.
select substring(firstpart,1,CHARINDEX('/',firstpart)-1)
from
(select
substring(pattern,
CHARINDEX('/',pattern)+1,
datalength(pattern)) as firstpart
from tessst
)X;