Extract Only Specified Data in a string SQL Server - sql

I have a review column in a table which has multiple strings formats like below,
Example strings:
Reviews_Column_Data [INPUT]
'05012:000000: :0:00000000|00647:000000: :0:00000000|00283:000000: :0:00000000|'
'05012:000000: :0:00000000|00025:000000: :0:00000000|00647:000000: :0:00000000|'
'05012:000000: :0:00000000|02095:000000: :0:00000000|00647:000000: :0:00000000|'
'05012:000000: :0:00000000|00647:000000: :0:00000000|'
'05081:023931:DF:9:20230111|00604:023931:XX:9:20230111|02470:023931:XX:9:20230111|00655:023931:XX:9:20230111|00464:023931:XX:9:20230111|02130:023931:XX:9:20230111|'
'05081:023931:DF:9:20230131|02229:023931:XX:9:20230131|02130:023931:XX:9:20230131|00692:023931:XX:9:20230131|02170:023931:XX:9:20230131|05084:000000: :0:00000000|00647:000000: :0:00000000| '
I need to extract the review in Application table in the below format...
Application_Review_Column_Data [OUTPUT]
'05012,00647,00283'
'05012,00025,00647'
'05012,02095,00647'
'05012,00647',
'05081,00604,02470,00655,00464,02130'
'05081,02229,02130,00692,02170,05084,00647'
Key Point to find the reviews is, starting point of the string and each end point of symbol "|" and Immediate ":"
I have tried with the below code, but it didn't work
DROP TABLE IF EXISTS #Temp_Tbl
Create table #Temp_Tbl (Comments varchar(500));
INSERT INTO #Temp_Tbl
VALUES('05012:000000: :0:00000000|00647:000000: :0:00000000|00283:000000: :0:00000000|'),
('05012:000000: :0:00000000|00025:000000: :0:00000000|00647:000000: :0:00000000|'),
('05012:000000: :0:00000000|02095:000000: :0:00000000|00647:000000: :0:00000000|'),
('05081:023931:DF:9:20230131|02229:023931:XX:9:20230131|02130:023931:XX:9:20230131|00692:023931:XX:9:20230131|02170:023931:XX:9:20230131|')

As I have commented, use string_split to split on |. Then charindex() and left() to extract the required string. Finally string_agg() to concatenate it back
select Comments,
string_agg(case when p > 0 then left(c.value, p - 1) end, ',')
from #Temp_Tbl t
cross apply string_split(Comments, '|') c
cross apply (select p = charindex(':', c.value) ) p
group by Comments

Related

Selecting text between 2nd and 3rd occurrence of delimiter

I'm trying to select the text between the second and third occurance of a delimeter (-) in SQL server.
For example, if I have the string aaa-bbbb-cccc-dddd I would like to return cccc, but I can't understand how to make a substring work when I have more than 2 of the delimeters.
Thanks for any help
If you always the same number of elements you could leverage PARSENAME like this.
select parsename(replace('aaa-bbbb-cccc-dddd', '-', '.'), 2)
But if your real data is not that consistent you need to use a real splitter.
If parsename() (+1) is not a valid option, perhaps a little XML.
Here are two illustrations, both return the same results
Example
Declare #YourTable table (SomeCol varchar(500))
Insert Into #YourTable values
('aaa-bbbb-cccc-dddd')
Select SomeCol
,Pos2 = cast('<x>' + replace(A.SomeCol,'-','</x><x>')+'</x>' as xml).value('/x[2]','varchar(50)')
,Pos3 = cast('<x>' + replace(A.SomeCol,'-','</x><x>')+'</x>' as xml).value('/x[3]','varchar(50)')
From #YourTable A
Select SomeCol
,B.*
From #YourTable A
Cross Apply (
Select Pos2 = XMLData.value('/x[2]','varchar(50)')
,Pos3 = XMLData.value('/x[3]','varchar(50)')
From (values (cast('<x>' + replace(A.SomeCol,'-','</x><x>')+'</x>' as xml))) B1(XMLData)
) B
Returns
SomeCol Pos2 Pos3
aaa-bbbb-cccc-dddd bbbb cccc

SQL - Split string multiple values, multiple values

I'm looking to split a string of delimited dates into their respective values and place them within columns of a results table. I've had some success with the following, not quite there for my specific application
SQL - Split string to columns by multiple delimiters
String (from & to dates)
tblTempDates.tmpString
2019-2-11--2019-2-15,2019-2-20--2019-2-20,2019-2-23--2019-2-23,2019-3-19--2019-3-24
Delimiters
-- seperates from & to
, new record
Output required
tblTempDates2
dtFrom dtTo
2019-02-11 2019-02-15
2019-02-20 2019-02-20
2019-02-23 2019-02-23
2019-02-19 2019-03-24
Many thanks in advance :-)
Can use while statement to split the csv,
declare #a table(datefromto varchar(500))
declare #b table(fromdate varchar(500),todate varchar(500))
insert into #a values ('2019-2-11--2019-2-15,2019-2-20--2019-2-20,2019-2-23--2019-2-23,2019-3-19--2019-3-24');
declare #datefromto varchar(500) = (select max(datefromto) from #a)
declare #datesplit int
set #datesplit= (SELECT charindex(',',#datefromto))
WHILE (SELECT charindex(',',#datefromto) FROM #a) <> 0
BEGIN
insert into #b select Left(left(#datefromto,#datesplit),charindex('--',#datefromto)-1) ,
Right(left(#datefromto,#datesplit-1),charindex('--',#datefromto)-1) from #a
set #datesplit= (SELECT charindex(',',#datefromto))
set #datefromto= right(#datefromto,len(#datefromto)-#datesplit)
END
insert into #b select Left(left(#datefromto,#datesplit),charindex('--',#datefromto)-1) ,
Right(left(#datefromto,#datesplit-1),charindex('--',#datefromto)-1) from #a
select * from #b
Here is one nice way to do this using STRING_SPLIT, if your version of SQL Server supports it:
SELECT
LEFT(value, CHARINDEX('--', value) - 1) AS dtFrom,
SUBSTRING(value, CHARINDEX('--', value) + 2, 20) AS dtTo
FROM
(
SELECT STRING_SPLIT(input, ',')
FROM yourTable
) t;
This approach first splits each from/to date substring to a separate row by comma using STRING_SPLIT. Then, it uses base string functions to isolate the from and to dates.
with versions before STRING_SPLIT() (or OPENJSON) I'd suggest this approach:
DECLARE #tbl TABLE(ID INT IDENTITY, YourString VARCHAR(1000));
INSERT INTO #tbl VALUES('2019-2-11--2019-2-15,2019-2-20--2019-2-20,2019-2-23--2019-2-23,2019-3-19--2019-3-24');
WITH Casted AS
(
SELECT *
, CAST('<x><y>' + REPLACE(REPLACE(YourString,'--','</y><y>'),',','</y></x><x><y>') + '</y></x>' AS XML) AsXml
FROM #tbl
)
SELECT ID
,x.value('y[1]','date') AS Date1
,x.value('y[2]','date') AS Date2
FROM Casted
CROSS APPLY AsXml.nodes('/x') A(x);
The idea in short:
Some string replacements transform your multi-level-CSV into an XML like this
<x>
<y>2019-2-11</y>
<y>2019-2-15</y>
</x>
<x>
<y>2019-2-20</y>
<y>2019-2-20</y>
</x>
<x>
<y>2019-2-23</y>
<y>2019-2-23</y>
</x>
<x>
<y>2019-3-19</y>
<y>2019-3-24</y>
</x>
We can then use .nodes() to get all repeating <x> as derived table. Then we use .value() to fetch the <y> by their position.

Remove Characters in a String in SQL

I have a column u_manualdoc which contains the values are like this CGY DR# 7405. I want to remove the CGY DR#.
Here's the code:
select u_manualdoc, cardcode, cardname from ODLN
I want only the 7405 number. Thanks!
Try this:
--sample data you provided in comments
declare #tbl table(codes varchar(20))
insert into #tbl values
('CGY PST - 58277') , ('CGY RMC PST # 58083'), ('CGY DR # 7443'), ('CSI # 1304'), ('PO# 0568 , 0570'), ('CGY DR# 7446')
--actual query that you can apply to your table
select SUBSTRING(codes, PATINDEX('%[0-9]%', codes), len(codes)) from #tbl
The key point here is to use patindex, which searches for a pattern and returns index where such pattern occur. I specified %[0-9]% which means that we search for any digit - it will return first occurrence of a digit. Now- since this would be our starting point to substring, we pass it to such function. Third parameter of substring is length. Since we want the rest of a string, len function makes sure that we get that :)
Applying to your naming:
select SUBSTRING(u_manualdoc, PATINDEX('%[0-9]%', u_manualdoc), len(u_manualdoc)),
cardcode,
cardname
from ODLN
You should use string functions charindex,len and substring to get it.
See the code below.
select SUBSTRING(u_manualdoc,CHARINDEX('#',u_manualdoc)+1,LEN(u_manualdoc)- CHARINDEX('#',u_manualdoc))
EDIT
In addition to the other answers, you can use this simple method:
select
substring(
u_manualdoc,
len(u_manualdoc) - patindex('%[^0-9]%', reverse(u_manualdoc)) + 2,
len(u_manualdoc)
),
cardcode, cardname
from ODLN
In this example, patindex finds the first non-digit (as specified by ^[0-9]) from the right side of the string, and then uses that as the starting point of the substring.
This will work on all of your sample strings (including 'PO# 0568 , 0570 CGY DR# 7446').
Or use SQL Server Regex, which lets you use more powerful regular expressions within your queries.
TRY THIS
DECLARE #table TABLE(DirtyCol VARCHAR(100));
INSERT INTO #table
VALUES('AB ABCDE # 123'), ('ABCDE# 123'), ('AB: ABC# 123 AB: ABC# 123'), ('AB#'), ('AB # 1 000 000'), ('AB # 1`234`567'), ('AB # (9)(876)(543)');
WITH tally
AS (
SELECT TOP (100) N = ROW_NUMBER() OVER(ORDER BY ##spid)
FROM sys.all_columns),
data
AS (
SELECT DirtyCol,
Col
FROM #table
CROSS APPLY
(
SELECT
(
SELECT C+''
FROM
(
SELECT N,
SUBSTRING(DirtyCol, N, 1) C
FROM tally
WHERE N <= DATALENGTH(DirtyCol)
) [1]
WHERE C BETWEEN '0' AND '9'
ORDER BY N FOR XML PATH('')
)
) p(Col)
WHERE p.Col IS NOT NULL)
SELECT DirtyCol,
CAST(Col AS INT) IntCol
FROM data;

Select rows using in with comma-separated string parameter

I'm converting a stored procedure from MySql to SQL Server. The procedure has one input parameter nvarchar/varchar which is a comma-separated string, e.g.
'1,2,5,456,454,343,3464'
I need to write a query that will retrieve the relevant rows, in MySql I'm using FIND_IN_SET and I wonder what the equivalent is in SQL Server.
I also need to order the ids as in the string.
The original query is:
SELECT *
FROM table_name t
WHERE FIND_IN_SET(id,p_ids)
ORDER BY FIND_IN_SET(id,p_ids);
The equivalent is like for the where and then charindex() for the order by:
select *
from table_name t
where ','+p_ids+',' like '%,'+cast(id as varchar(255))+',%'
order by charindex(',' + cast(id as varchar(255)) + ',', ',' + p_ids + ',');
Well, you could use charindex() for both, but the like will work in most databases.
Note that I've added delimiters to the beginning and end of the string, so 464 will not accidentally match 3464.
You would need to write a FIND_IN_SET function as it does not exist. The closet mechanism I can think of to convert a delimited string into a joinable object would be a to create a table-valued function and use the result in a standard in statement. It would need to be similar to:
DECLARE #MyParam NVARCHAR(3000)
SET #MyParam='1,2,5,456,454,343,3464'
SELECT
*
FROM
MyTable
WHERE
MyTableID IN (SELECT ID FROM dbo.MySplitDelimitedString(#MyParam,','))
And you would need to create a MySplitDelimitedString type table-valued function that would split a string and return a TABLE (ID INT) object.
A set based solution that splits the id's into ints and join with the base table which will make use of index on the base table id. I assumed the id would be an int, otherwise just remove the cast.
declare #ids nvarchar(100) = N'1,2,5,456,454,343,3464';
with nums as ( -- Generate numbers
select top (len(#ids)) row_number() over (order by (select 0)) n
from sys.messages
)
, pos1 as ( -- Get comma positions
select c.ci
from nums n
cross apply (select charindex(',', #ids, n.n) as ci) c
group by c.ci
)
, pos2 as ( -- Distinct posistions plus start and end
select ci
from pos1
union select 0
union select len(#ids) + 1
)
, pos3 as ( -- add row number for join
select ci, row_number() over (order by ci) as r
from pos2
)
, ids as ( -- id's and row id for ordering
select cast(substring(#ids, p1.ci + 1, p2.ci - p1.ci - 1) as int) id, row_number() over (order by p1.ci) r
from pos3 p1
inner join pos3 p2 on p2.r = p1.r + 1
)
select *
from ids i
inner join table_name t on t.id = i.id
order by i.r;
You can also try this by using regex to get the input values from comma separated string :
select * from table_name where id in (
select regexp_substr(p_ids,'[^,]+', 1, level) from dual
connect by regexp_substr(p_ids, '[^,]+', 1, level) is not null );

using regex to find same digit phone numbers

I need to find same digit phone numbers from a table and delete these. Phone Numbers are like below :
+999999999999
11111111
0000000000000
44444444
I am following this answer to solve this. I am trying :
select * from tblPhone where PhoneNo like '^([0-9a-z])\1+$'
But not succeed. PhoneNo is varchar. How I can achieve this ?
Try this:
select *
from tblPhone
where
substring(replace(PhoneNo,'+',''),1,len(replace(PhoneNo,'+',''))-1)
= substring(replace(PhoneNo,'+',''),2,len(replace(PhoneNo,'+','')))
The idea is that if substring from 1st to second-last position matches the one from 2nd to last, the string must be composed of identical characters.
Demo
Here is an idea. There is a problem with the first character, which can be a + or number. Let's substitute the second character for an empty string and look at the result:
where replace(PhoneNo, substring(PhoneNo, 2, 1), '') in ('+', '') and
(PhoneNo not like '%[^0-9]%' and PhoneNo like '[0-9]%'or
PhoneNo not like '+%[^0-9]%') and PhoneNo like '+%'
)
You can do a pattern matching in SQL Server using patindex but regular expressions as such are not directly supported. There is hope, however, if you use .Net and CLR user-defined functions.
create table #temp(col1 varchar(25))
insert into #temp values ('+9999999')
insert into #temp values ('+123456789')
insert into #temp values ('+444444444')
insert into #temp values ('+9840536987')
select * from #temp
begin tran
select * from #temp
delete from #temp
where cast(replace(replace(col1,'+',''),RIGHT(replace(col1,'+',''),1),0) as bigint) = 0
select * from #temp
rollback tran
You can try this:
SELECT *
FROM tblPhone
WHERE
CAST(PhoneNo AS BIGINT) = REPLICATE(RIGHT(PhoneNo, 1), LEN(CAST(PhoneNo AS BIGINT)))
OR
SELECT *
FROM tblPhone
WHERE REPLACE(PhoneNo, RIGHT(PhoneNo, 1),'') IN ('+','')
This generates the set all the possible invalid phone numbers (which is a very small number) and joins that to tblPhone. This is more efficient than doing string manipulations on every phone number in the table.
DELETE t1
FROM tblPhone t1
INNER JOIN (VALUES (''),('+')) t2(prefix)
CROSS JOIN (VALUES('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')) t3(digit)
CROSS JOIN (SELECT TOP (13) ROW_NUMBER() OVER(ORDER BY (SELECT 1)) FROM master.dbo.spt_values) t4(n)
ON PhoneNo = prefix+REPLICATE(digit,n)