Extract string using SQL Server 2012 - sql

I have a string in the form of
<div>#FIRST#12345#</div>
How do I extract the number part from this string using T-SQL in SQL Server 2012? Note the number has variable length

Using just t-sql string functions you can try:
create table t(col varchar(50))
insert into t select '<div>#FIRST#12345#</div>'
insert into t select '<div>#THIRD#543#</div>'
insert into t select '<div>#SECOND#3690123#</div>'
select col,
case when p1.v=0 or p2.v <= p1.v then ''
else Substring(col, p1.v, p2.v-p1.v)
end ExtractedNumber
from t
cross apply(values(CharIndex('#',col,7) + 1))p1(v)
cross apply(values(CharIndex('#',col, p1.v + 1)))p2(v)
Output:
Caveat, this doesn't handle any "edge" cases and assumes data is as described.

Shooting from the hip due to a missing minimal reproducible example.
Assuming that it is XML data type column.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, xmldata XML);
INSERT INTO #tbl (xmldata) VALUES
('<div>#FIRST#12345#</div>'),
('<div>#FIRST#770770#</div>');
-- DDL and sample data population, end
SELECT t.*
, LEFT(x, CHARINDEX('#', x) - 1) AS Result
FROM #tbl t
CROSS APPLY xmldata.nodes('/div/text()') AS t1(c)
CROSS APPLY (SELECT REPLACE(c.value('.', 'VARCHAR(100)'), '#FIRST#' ,'')) AS t2(x);
Output
+----+---------------------------+--------+
| ID | xmldata | Result |
+----+---------------------------+--------+
| 1 | <div>#FIRST#12345#</div> | 12345 |
| 2 | <div>#FIRST#770770#</div> | 770770 |
+----+---------------------------+--------+

Related

Replace a specific character with blank

How can I replace 'a' to blank?
`Name` `ID`
----------------------------------
`b,c,d,e,abb,a` `1`
`b,c,d,a,e,abb` `2`
`a,b,c,d,a,e,abb` `3`
One way to do it would be to add a , to the beginning and end of each Name, then replace every occurence of ',a,' with ',', then trim the result of the ,:
update table_name
set Name = trim(',' from replace(concat(',', Name, ','), ',a,', ','));
Fiddle
Or if you just want to do a select without changing the rows:
select trim(',' from replace(concat(',', Name, ','), ',a,', ',')) as Name, ID
from table_name;
To address #Iptr's comment, if there can be consecutive a such as a, a, ..., you could use STRING_SPLIT to get rows from comma-separated values, then filter out where the value is a, then STRING_AGG and group by to get the comma separated values back:
select ID, STRING_AGG(u.Value, ',') as Name
from table_name
cross apply STRING_SPLIT (Name, ',') u
where Value <> 'a'
group by ID
Fiddle
Here is a solution based on tokenization via XML/XQuery.
It will work starting from SQL Server 2012 onwards.
Steps:
We are tokenizing a string of tokens via XML.
XQuery FLWOR expression is filtering out the 'a' token.
Reverting it back to a string of tokens.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, tokens VARCHAR(1000));
INSERT INTO #tbl (tokens) VALUES
('b,c,d,e,abb,a'),
('b,c,d,a,e,abb'),
('a,b,c,d,a,e,abb');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = ',';
SELECT t.*
, REPLACE(c.query('
for $x in /root/r/text()
return if ($x = "a") then ()
else data($x)
').value('.', 'VARCHAR(MAX)'), SPACE(1), #separator) AS Result
FROM #tbl AS t
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE(tokens, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)) AS t1(c);
Output
+----+-----------------+-------------+
| ID | tokens | Result |
+----+-----------------+-------------+
| 1 | b,c,d,e,abb,a | b,c,d,e,abb |
| 2 | b,c,d,a,e,abb | b,c,d,e,abb |
| 3 | a,b,c,d,a,e,abb | b,c,d,e,abb |
+----+-----------------+-------------+
Try as follow:
select Replace(name, N'a', N'') as RepName , ID from yourTable
Try this.
SELECT ID,Name, REPLACE(Name, 'a', ' ')
FROM tableName;

Removing a part of URL from coulmn in SQL

I have a URL column in the table and below are the URL's. I want to remove the string after Location.
https://xyz.sharepoint.com/sites/tender/lp/46/Lists/PlaceDetails/Location3
https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location2/4_.000
https://xyz.sharepoint.com/sites/tender/lp/52/Lists/PlaceDetails/Location5
https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location6/8_.000
Expected OutPut
https://xyz.sharepoint.com/sites/tender/lp/46/Lists/PlaceDetails/Location3
https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location2
https://xyz.sharepoint.com/sites/tender/lp/52/Lists/PlaceDetails/Location5
https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location6
Tried with charindex but couldn't get succeeded. Any suggestion would be greatly appreciated.
You detect the index of "/" after Location, something like this
DECLARE #DATA NVARCHAR(200) = 'https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location2/4_.000'
SELECT CASE
WHEN charindex('/', #data, charindex('Location', #data)) = 0
THEN #data
ELSE LEFT(#data, charindex('/', #data, charindex('Location', #data)) - 1)
END
Please try the following solution.
It is using the following algorithm:
Tokenize URL as XML.
Get position of the XML element the contains "Location".
Retrieve XML elements up to the position from the step above, and reassemble URL back.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, [URL] VARCHAR(MAX));
INSERT INTO #tbl ([URL]) VALUES
('https://xyz.sharepoint.com/sites/tender/lp/46/Lists/PlaceDetails/Location3'),
('https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location2/4_.000'),
('https://xyz.sharepoint.com/sites/tender/lp/52/Lists/PlaceDetails/Location5'),
('https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location6/8_.000');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = '/'
SELECT t.*
, REPLACE(c.query('data(/root/r[position() le sql:column("t2.pos")]/text())')
.value('.', 'VARCHAR(MAX)')
,SPACE(1),#separator) AS Result
FROM #tbl AS t
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE([URL], #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)) AS t1(c)
CROSS APPLY (SELECT c.query('for $i in /root/r[contains(.,"Location")]
let $pos := count(root/*[. << $i]) + 1
return $pos').value('.','INT')) AS t2(pos) ;
Output
+----+----------------------------------------------------------------------------+
| ID | Result |
+----+----------------------------------------------------------------------------+
| 1 | https://xyz.sharepoint.com/sites/tender/lp/46/Lists/PlaceDetails/Location3 |
| 2 | https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location2 |
| 3 | https://xyz.sharepoint.com/sites/tender/lp/52/Lists/PlaceDetails/Location5 |
| 4 | https://xyz.sharepoint.com/sites/tender/lp/50/Lists/PlaceDetails/Location6 |
+----+----------------------------------------------------------------------------+

SQL REPLACE with Multiple [0-9]

I have a string that I want to replace a group of numbers.
The string contains groupings of numbers (and a few letters). 'A12 456 1 65 7944'
I want to replace the group of 3 numbers with 'xxx', and the group of 4 numbers with 'zzzz'
I thought something like REPLACE(#str, '%[0-9][0-9][0-9]%', 'xxx') would work, but it doesn't. I can't even get '%[0-9]%' to replace anything.
If REPLACE is not suitable, how can I replace groups of numbers?
Please try the following solution based on XML and XQuery.
Notable points:
We are tokenizing input string as XML in the CROSS APPLY clause.
XQuery's FLWOR expression is checking for numeric integer values with
a particular length, and substitutes then with a replacement string.
XQuery .value() method outputs back a final result.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, tokens VARCHAR(MAX));
INSERT INTO #tbl (tokens) VALUES
('A12 456 1 65 7944');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = SPACE(1);
SELECT t.*
, c.query('
for $x in /root/r/text()
return if (xs:int($x) instance of xs:int and string-length($x)=3) then "xxx"
else if (xs:int($x) instance of xs:int and string-length($x)=4) then "zzzz"
else data($x)
').value('.', 'VARCHAR(MAX)') AS Result
FROM #tbl AS t
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE(tokens, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)) AS t1(c);
Output
+----+-------------------+-------------------+
| ID | tokens | Result |
+----+-------------------+-------------------+
| 1 | A12 456 1 65 7944 | A12 xxx 1 65 zzzz |
+----+-------------------+-------------------+

Select substring from column but each record has different pattern

I have column with different pattern of string for each records let's call it [Description]. Here's the sample of records
[Description]
-qwetw MANN/1234556/DATE/030621/B/C/ACC/DIFF+AA11000532
-qwerty 123456789/06/29/2021/ACC./DONE/CLOSED+06+AA11001234
-qwert 123456789101213/-/BACK/300621/Rekening/Tutup+06+ZZZ21001123A
I want only pick the last number order - so the output would be:
[Description]
-11000532
-11001234
-21001123
Is there any function how to get it?
Thanks
Please try the following solutions.
First solution is for SQL Server 2017 onwards.
It is working by implementing the following steps:
It is tokenizing column value as XML.
Last token is our goal, i.e. (/root/r[last()]/text())[1]
TRIM() removes unwanted chars.
Second solution is for SQL Server 2012. It is much more involving.
SQL 2017
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, Tokens VARCHAR(4000));
INSERT INTO #tbl VALUES
('-qwetw MANN/1234556/DATE/030621/B/C/ACC/DIFF+AA11000532'),
('-qwert 123456789101213/-/BACK/300621/Rekening/Tutup+06+ZZZ21001123A'),
('-qwerty 123456789/06/29/2021/ACC./DONE/CLOSED+06+AA11001234');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = '+'
, #CharsToRemove VARCHAR(100) = 'AZ';
SELECT ID, tokens
, TRIM(#CharsToRemove FROM token) AS Result
FROM #tbl
CROSS APPLY (VALUES (TRY_CAST('<root><r><![CDATA[' +
REPLACE(tokens, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML))) AS t(c)
CROSS APPLY (VALUES (c.value('(/root/r[last()]/text())[1]', 'VARCHAR(256)'))) AS t2(token);
SQL 2012
SELECT ID, tokens
, (
SELECT SUBSTRING(token, number, 1)
FROM #tbl AS c
CROSS APPLY (
SELECT DISTINCT number
FROM master..spt_values
WHERE number BETWEEN 1 AND LEN(token)
) V
WHERE c.ID = p.ID
FOR XML PATH('r'), TYPE, ROOT('root')
)
.query('for $x in /root/r
return if (xs:int($x) instance of xs:int) then $x
else ()')
.query('/r/text()').value('.', 'BIGINT') AS Result
FROM #tbl AS p
CROSS APPLY (VALUES (TRY_CAST('<root><r><![CDATA[' +
REPLACE(tokens, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML))) AS t(c)
CROSS APPLY (VALUES (c.value('(/root/r[last()]/text())[1]', 'VARCHAR(256)'))) AS t2(token);
Output
+----+---------------------------------------------------------------------+----------+
| ID | tokens | Result |
+----+---------------------------------------------------------------------+----------+
| 1 | -qwetw MANN/1234556/DATE/030621/B/C/ACC/DIFF+AA11000532 | 11000532 |
| 2 | -qwert 123456789101213/-/BACK/300621/Rekening/Tutup+06+ZZZ21001123A | 21001123 |
| 3 | -qwerty 123456789/06/29/2021/ACC./DONE/CLOSED+06+AA11001234 | 11001234 |
+----+---------------------------------------------------------------------+----------+
Your example is tricky, as it is having alphabets also as part of the order number.
I am assuming your datatype is VARCHAR(4000) and accordingly, I have applied substring in the end with 4000, to make it simpler in the end.
DECLARE #table table(Description varchar(4000))
insert into #table values
('-qwetw MANN/1234556/DATE/030621/B/C/ACC/DIFF+AA11000532')
,('-qwert 123456789101213/-/BACK/300621/Rekening/Tutup+06+ZZZ21001123A')
,('-qwerty 123456789/06/29/2021/ACC./DONE/CLOSED+06+AA11001234');
SELECT SUBSTRING(REVERSE(SUBSTRING(REVERSE(Description),1,CHARINDEX('+',REVERSE(Description)))),PATINDEX('%[1-9]%',REVERSE(SUBSTRING(REVERSE(Description),1,CHARINDEX('+',REVERSE(Description))))),4000) as ordernumber FROM #table
ordernumber
11000532
21001123A
11001234

How to SELECT string between second and third instance of ",,"?

I am trying to get string between second and third instance of ",," using SQL SELECT.
Apparently functions substring and charindex are useful, and I have tried them but the problem is that I need the string between those specific ",,"s and the length of the strings between them can change.
Can't find working example anywhere.
Here is an example:
Table: test
Column: Column1
Row1: cat1,,cat2,,cat3,,cat4,,cat5
Row2: dogger1,,dogger2,,dogger3,,dogger4,,dogger5
Result: cat3dogger3
Here is my closest attempt, it works if the strings are same length every time, but they aren't:
SELECT SUBSTRING(column1,LEN(LEFT(column1,CHARINDEX(',,', column1,12)+2)),LEN(column1) - LEN(LEFT(column1,CHARINDEX(',,', column1,20)+2)) - LEN(RIGHT(column1,CHARINDEX(',,', (REVERSE(column1)))))) AS column1
FROM testi
Just repeat sub-string 3 times, each time moving onto the next ",," e.g.
select
-- Substring till the third ',,'
substring(z.col1, 1, patindex('%,,%',z.col1)-1)
from (values ('cat1,,cat2,,cat3,,cat4,,cat5'),('dogger1,,dogger2,,dogger3,,dogger4,,dogger5')) x (col1)
-- Substring from the first ',,'
cross apply (values (substring(x.col1,patindex('%,,%',x.col1)+2,len(x.col1)))) y (col1)
-- Substring from the second ',,'
cross apply (values (substring(y.col1,patindex('%,,%',y.col1)+2,len(y.col1)))) z (col1);
And just to reiterate, this is a terrible way to store data, so the best solution is to store it properly.
Here is an alternative solution using charindex. The base idea is the same as in Dale K's an answer, but instead of cutting the string, we specify the start_location for the search by using the third, optional parameter, of charindex. This way, we get the location of each separator, and could slip each value off from the main string.
declare #vtest table (column1 varchar(200))
insert into #vtest ( column1 ) values('dogger1,,dogger2,,dogger3,,dogger4,,dogger5')
insert into #vtest ( column1 ) values('cat1,,cat2,,cat3,,cat4,,cat5')
declare #separetor char(2) = ',,'
select
t.column1
, FI.FirstInstance
, SI.SecondInstance
, TI.ThirdInstance
, iif(TI.ThirdInstance is not null, substring(t.column1, SI.SecondInstance + 2, TI.ThirdInstance - SI.SecondInstance - 2), null)
from
#vtest t
cross apply (select nullif(charindex(#separetor, t.column1), 0) FirstInstance) FI
cross apply (select nullif(charindex(#separetor, t.column1, FI.FirstInstance + 2), 0) SecondInstance) SI
cross apply (select nullif(charindex(#separetor, t.column1, SI.SecondInstance + 2), 0) ThirdInstance) TI
For transparency, I saved the separator string in a variable.
By default the charindex returns 0 if the search string is not present, so I overwrite it with the value null, by using nullif
IMHO, SQL Server 2016 and its JSON support in the best option here.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, Tokens VARCHAR(500));
INSERT INTO #tbl VALUES
('cat1,,cat2,,cat3,,cat4,,cat5'),
('dogger1,,dogger2,,dogger3,,dogger4,,dogger5');
-- DDL and sample data population, end
WITH rs AS
(
SELECT *
, '["' + REPLACE(Tokens
, ',,', '","')
+ '"]' AS jsondata
FROM #tbl
)
SELECT rs.ID, rs.Tokens
, JSON_VALUE(jsondata, '$[2]') AS ThirdToken
FROM rs;
Output
+----+---------------------------------------------+------------+
| ID | Tokens | ThirdToken |
+----+---------------------------------------------+------------+
| 1 | cat1,,cat2,,cat3,,cat4,,cat5 | cat3 |
| 2 | dogger1,,dogger2,,dogger3,,dogger4,,dogger5 | dogger3 |
+----+---------------------------------------------+------------+
It´s the same as #"Yitzhak Khabinsky" but i think it looks clearer
WITH CTE_Data
AS(
SELECT 'cat1,,cat2,,cat3,,cat4,,cat5' AS [String]
UNION
SELECT 'dogger1,,dogger2,,dogger3,,dogger4,,dogger5' AS [String]
)
SELECT
A.[String]
,Value3 = JSON_VALUE('["'+ REPLACE(A.[String], ',,', '","') + '"]', '$[2]')
FROM CTE_Data AS A