SQL Server remove part of string between characters - sql

I have emails that look like this:
john.doe.946a9979-2951-4852-9e79-ad03eb0c1e5d#gmail.com
I am trying to get this output:
john.doe#gmail.com
I have this so far.... it's close.
SELECT
Caller = REPLACE(Caller,
SUBSTRING(Caller,
CHARINDEX('.', Caller),
CASE WHEN CHARINDEX('#', Caller, CHARINDEX('.', Caller)) > 1 THEN
CHARINDEX('#', Caller, CHARINDEX('.', Caller)) - CHARINDEX('.', Caller)
ELSE
LEN(Caller)
END ) , '')
FROM
some.table

Hmmm. I suspect the string you want to remove is fixed in length. So how about:
select stuff(caller, charindex('#',caller ) - 37, 37, '')

If you have SS 2016 or later, you can use R code to do it - granted I don't know about speed on larger data so be wary if its a production environment. Also be warned Regexp isn't my strongest area so you may want to check that portion.
DECLARE #dummyScript NVARCHAR(1000) = '
SELECT * FROM
(VALUES (''john.doe.946a9979-2951-4852-9e79-ad03eb0c1e5d#gmail.com''),
(''jane whoever 1234453-534343#yahoo.com'') ) t (Email)
'
DECLARE #myRcode NVARCHAR(600)
SET #myRcode = 'OutputDataset <- data.frame(Email_Cleaned = gsub("[0-9]+.+#", "#", InputDataSet$Email)) '
DECLARE #CleanedTable TABLE (Email_Cleaned VARCHAR(500))
INSERT INTO #CleanedTable
EXEC sp_execute_external_script
#language = N'R'
,#script = #myRcode
,#input_data_1 = #dummyScript
,#input_data_1_name = N'InputDataSet'
,#output_data_1_name = N'OutputDataset'
SELECT * FROM #CleanedTable

Here is a simpler method using LEFT(), RIGHT(), and CHARINDEX() functions
DECLARE #Caller VARCHAR(MAX) = 'john.doe.946a9979-2951-4852-9e79-ad03eb0c1e5d#gmail.com'
SELECT
LEFT(#Caller, CHARINDEX('.', #Caller, CHARINDEX('.', #Caller) + 1) - 1) + RIGHT(#Caller, LEN(#Caller) - CHARINDEX('#', #Caller) + 1) Email
The left side will get all the characters until the second dot, and the right side will get the characters from # sign to the end of characters.

Try like below
SELECT
REPLACE(CALLER,
Substring(CALLER,
PATINDEX('.[0-9]%#', CALLER),
PATINDEX('#', CALLER ) )
,'#')
From Table

Related

SQL String - remove a substring between 2 occurences

I have some strings like :
0#11001#2017#308
0#1#2018#327
0#200510#2020#3022
I need to remove the year value (between last 2 '#').
So the results should be like :
0#11001#308
0#1#327
0#200510#3022
Is there a simple query to do it?
UPDATE
yourTable
SET
col = STUFF(yourTable.col, AtSign2.pos, atSign3.pos - atSign2.pos, '')
FROM
yourTable
OUTER APPLY
(SELECT CHARINDEX('#', yourTable.col, 0)) AS atSign1(pos)
OUTER APPLY
(SELECT CHARINDEX('#', yourTable.col, atSign1.pos+1)) AS atSign2(pos)
OUTER APPLY
(SELECT CHARINDEX('#', yourTable.col, atSign2.pos+1)) AS atSign3(pos)
The best thing to do here would be to normalize your data, and stop storing multiple data points embedded in a single string, in a single column. That being said, if you must proceed, we can try using the base string functions to splice together the update:
UPDATE yourTable
SET col = LEFT(col, CHARINDEX('#', col, CHARINDEX('#', col) + 1)) +
REVERSE(LEFT(REVERSE(col), CHARINDEX('#', REVERSE(col)) - 1));
Demo
Note that this answer assumes that every record would have three # separators in it. If not, then we would have to add additional logic.
DECLARE #STRING VARCHAR(50) = '0#200510#2020#3022'
DECLARE #NEED_TO_REMOVE VARCHAR(50) = PARSENAME(REPLACE(#STRING,'#','.'),2) + '#'
SELECT REPLACE(#STRING,#NEED_TO_REMOVE,'') --result: 0#200510#3022

Select only characters in SQL

I have strings in a database like this:
firstname.lastname#email.com
And I only need the characters that appear after the # symbol and before (.) symbol i.e. (email) from the above example
I am trying to find a simple way to do this in SQL.
Do this:
use [your_db_name];
go
create table dbo.test
(
string varchar(max) null
)
insert into dbo.test values ('firstname.lastname#email.com')
select
string,
substring(
string,
charindex('#', string, 0) + 1,
charindex('.', string, charindex('#', string, 0)) - charindex('#', string, 0) - 1
) as you_need
from dbo.test
String manipulations are such a pain in SQL Server. Here is one method:
select t.*,
left(en.emailname, charindex('.', en.emailname + '.') - 1)
from t outer apply
(select stuff(email, 1, charindex('#', email + '#'), '') as emailname) en;
That that in the charindex() calls, the character being searched for is placed at the end of the string. This allows the code to work even for malformed emails -- it returns an empty string when the email is not of the form '%#%.%'.
DECLARE #col char(200)
set #col = 'firstname.lastname#email.com'
SELECT SUBSTRING(#col, LEN(LEFT(#col, CHARINDEX ('#', #col))) + 1, LEN(#col) - LEN(LEFT(#col, CHARINDEX ('#', #col))) - LEN(RIGHT(#col, LEN(#col) - CHARINDEX ('.', #col))) - 4);
DECLARE #str varchar(50) = 'firstname.lastname#email.com';
SELECT LEFT(
RIGHT(#str, LEN(#str) - CHARINDEX('#', #str))
,CHARINDEX('.', RIGHT(#str, LEN(#str) - CHARINDEX('#', #str))
) - 1) AS OUTPUT
Above query gives only domain-name from Email. The query can be applied for column in a table
Try This:-
DECLARE #Text varchar(100)
SET #Text = 'firstname.lastname#email.com'
SELECT SUBSTRING(STUFF(#Text, 1, CHARINDEX('#',#Text), ''), 0,
CHARINDEX('.', STUFF(#Text, 1, CHARINDEX('#',#Text), '')))
Result:-
email
DECLARE #myStr varchar(100) = 'firstname.lastname#email.com'
SELECT
SUBSTRING(SUBSTRING(#myStr,CHARINDEX('#',#myStr)+1,LEN(#myStr)-CHARINDEX('#',#myStr)+1),0,CHARINDEX('.',SUBSTRING(#myStr,CHARINDEX('#',#myStr)+1,LEN(#myStr)-CHARINDEX('#',#myStr)+1)))
That can be useful but I really recommend you to build user defined function in C#/Visaul basic they could be much more faster that this.
Using charindex, len and reverse to search for the positions of the # and the last dot.
And substring to get the name based on those positions:
create table test (id int identity(1,1), email varchar(60));
insert into test (email) values
('jane.doe#email.com'),
('not an email'),
('#invalid.email.xxx'),
('john.doe#longer.domainname.net');
select *,
(case
when email like '[a-z]%#%.%'
then substring(email,
charindex('#',email)+1,
len(email) - charindex('#',email) - charindex('.',reverse(email))
)
end) as email_domain_without_extension
from test;
The CASE WHEN is used to return NULL when it's not an email (instead of an empty string).

Get substring between second and fourth slash

I have a string that looks like this:
Y:\Data\apples\oranges\Scott\notes
I need a column that looks like this:
apples\oranges
This is what I have so far and it does not work:
SELECT SUBSTRING(
[Group],
CHARINDEX('\', [Group]) + 1,
LEN([Group]) - CHARINDEX('\', [Group]) - CHARINDEX('\', REVERSE([Group]))
) from datamap.finaltest
The strings will not always have a finite amount of slashes. For example you could have:
Y:\Data\Apples\bananas
Y:\Apples\Pears\oranges\peanuts
The data will always have:
drive letter + '\' + '1st level folder' + '\' + 'Second level folder'
It may have more than two levels though.
I have searched the forum but can't find anything specific.
Thanks
A blatant approach by converting your input into XML and taking the values by node and re-concatenating the nodes you want in output
;WITH MyTempData
AS
(
SELECT Convert(xml,'<n>'+Replace('Y:\Data\Apples','\','</n><n>')+'</n>') XMLString
)
SELECT COALESCE(XMLString.value('(/n[3])', 'varchar(20)'),'') + '\' +
COALESCE(XMLString.value('(/n[4])', 'varchar(20)'),'') MyFinalOutput
FROM MyTempData
Probably not the best way, but this will get you there.
DECLARE #string varchar(255) = 'Y:\data\apples\oranges\Scott\notes'
SELECT LEFT(RIGHT(#string,LEN(#string)-CHARINDEX('\', #string, CHARINDEX('\', #string,1) + 1)),CHARINDEX('\', RIGHT(#string,LEN(#string)-CHARINDEX('\', #string, CHARINDEX('\', #string,1) + 1)), CHARINDEX('\',RIGHT(#string,LEN(#string)-CHARINDEX('\', #string, CHARINDEX('\', #string,1) + 1)),1)+1)-1)
Here is a way using recursive CHARINDEX
declare #var varchar(4000) = 'Y:\Data\apples\oranges\Scott\notes'
declare #firstSlash int = (select CHARINDEX('\',#var,CHARINDEX('\',#var) + 1))
declare #fourthSlash int = (select CHARINDEX('\',#var,CHARINDEX('\',#var,CHARINDEX('\',#var,CHARINDEX('\',#var) + 1)+1)+1))
select SUBSTRING(#var,#firstSlash + 1,#fourthSlash - #firstSlash - 1)
Or, for your data table...
select SUBSTRING([Group],CHARINDEX('\',[Group],CHARINDEX('\',[Group]) + 1) + 1,CHARINDEX('\',[Group],CHARINDEX('\',[Group],CHARINDEX('\',[Group],CHARINDEX('\',[Group]) + 1)+1)+1) - CHARINDEX('\',[Group],CHARINDEX('\',[Group]) + 1) - 1)
If this is something you need to do often, or is prone to changing, it may be beneficial to implement a function which will make your code more readable/maintainable:
SELECT SUBSTRING(#t, dbo.CHARINDEX2('\', #t, 2) + 1, dbo.CHARINDEX2('\', #t, 3));
Using this 'find nth occurence' function:
http://www.sqlservercentral.com/scripts/Miscellaneous/30497/

Get everything after and before certain character in SQL Server

I got the following entry in my database:
images/test.jpg
I want to trim the entry so I get: test
So basically, I want everything after / and before .
How can I solve it?
use the following function
left(#test, charindex('/', #test) - 1)
If you want to get this out of your table using SQL, take a look at the following functions that will help you: SUBSTRING and CHARINDEX. You can use those to trim your entries.
A possible query will look like this (where col is the name of the column that contains your image directories:
SELECT SUBSTRING(col, LEN(SUBSTRING(col, 0, LEN(col) - CHARINDEX ('/', col))) + 1,
LEN(col) - LEN(SUBSTRING(col, 0, LEN(col) - CHARINDEX ('/', col))) - LEN(SUBSTRING(
col, CHARINDEX ('.', col), LEN(col))));
Bit of an ugly beast. It also depends on the standard format of 'dir/name.ext'.
Edit:
This one (inspired by praveen) is more generic and deals with extensions of different length:
SELECT SUBSTRING(col, LEN(LEFT(col, CHARINDEX ('/', col))) + 1, LEN(col) - LEN(LEFT(col,
CHARINDEX ('/', col))) - LEN(RIGHT(col, LEN(col) - CHARINDEX ('.', col))) - 1);
Before
SELECT SUBSTRING(ParentBGBU,0,CHARINDEX('/',ParentBGBU,0)) FROM dbo.tblHCMMaster;
After
SELECT SUBSTRING(ParentBGBU,CHARINDEX('-',ParentBGBU)+1,LEN(ParentBGBU)) FROM dbo.tblHCMMaster
----select characters before / including /
select SUBSTRING ('abcde/wxyz',0,CHARINDEX('/','abcde/wxyz')+1)
--select characters after / including /
select SUBSTRING('abcde/wxyz',CHARINDEX('/','abcde/wxyz'),LEN('abcde/wxyz'))
declare #T table
(
Col varchar(20)
)
insert into #T
Select 'images/test1.jpg'
union all
Select 'images/test2.png'
union all
Select 'images/test3.jpg'
union all
Select 'images/test4.jpeg'
union all
Select 'images/test5.jpeg'
Select substring( LEFT(Col,charindex('.',Col)-1),charindex('/',Col)+1,len(LEFT(Col,charindex('.',Col)-1))-1 )
from #T
I have made a method which is much more general :
so :
DECLARE #a NVARCHAR(MAX)='images/test.jpg';
--Touch here
DECLARE #keysValueToSearch NVARCHAR(4000) = '/'
DECLARE #untilThisCharAppears NVARCHAR(4000) = '.'
DECLARE #keysValueToSearchPattern NVARCHAR(4000) = '%' + #keysValueToSearch + '%'
--Nothing to touch here
SELECT SUBSTRING(
#a,
PATINDEX(#keysValueToSearchPattern, #a) + LEN(#keysValueToSearch),
CHARINDEX(
#untilThisCharAppears,
#a,
PATINDEX(#keysValueToSearchPattern, #a) + LEN(#keysValueToSearch)
) -(PATINDEX(#keysValueToSearchPattern, #a) + LEN(#keysValueToSearch))
)
SELECT Substring('ravi1234#gmail.com', 1, ( Charindex('#', 'ravi1234#gmail.com')
- 1 ))
Before,
RIGHT('ravi123#gmail.com', ( Charindex('#', 'ravi123#gmail.com') + 1 ))
After
I just did this in one of my reports and it was very simple.
Try this:
=MID(Fields!.Value,8,4)
Note: This worked for me because the value I was trying to get was a constant not sure it what you are trying to get is a constant as well.
I know this has been a while.. but here is an idea
declare #test varchar(25) = 'images/test.jpg'
select
#test as column_name
, parsename(replace(#test,'/','.'),1) as jpg
,parsename(replace(#test,'/','.'),2) as test
,parsename(replace(#test,'/','.'),3) as images
I found Royi Namir's answer useful but expanded upon it to create it as a function. I renamed the variables to what made sense to me but you can translate them back easily enough, if desired.
Also, the code in Royi's answer already handled the case where the character being searched from does not exist (it starts from the beginning of the string), but I wanted to also handle cases where the character that is being searched to does not exist.
In that case it acts in a similar manner by starting from the searched from character and returning the rest of the characters to the end of the string.
CREATE FUNCTION [dbo].[getValueBetweenTwoStrings](#inputString
NVARCHAR(4000), #stringToSearchFrom NVARCHAR(4000), #stringToSearchTo
NVARCHAR(4000))
RETURNS NVARCHAR(4000)
AS
BEGIN
DECLARE #retVal NVARCHAR(4000)
DECLARE #stringToSearchFromSearchPattern NVARCHAR(4000) = '%' +
#stringToSearchFrom + '%'
SELECT #retVal = SUBSTRING (
#inputString,
PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom),
(CASE
CHARINDEX(
#stringToSearchTo,
#inputString,
PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom))
WHEN
0
THEN
LEN(#inputString) + 1
ELSE
CHARINDEX(
#stringToSearchTo,
#inputString,
PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom))
END) - (PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom))
)
RETURN #retVal
END
Usage:
SELECT dbo.getValueBetweenTwoStrings('images/test.jpg','/','.') AS MyResult
I got some invalid length errors. So i made this function, this should not give any length problems. Also when you do not find the searched text it will return a NULL.
CREATE FUNCTION [FN].[SearchTextGetBetweenStartAndStop](#string varchar(max),#SearchStringToStart varchar(max),#SearchStringToStop varchar(max))
RETURNS varchar(max)
BEGIN
SET #string = CASE
WHEN CHARINDEX(#SearchStringToStart,#string) = 0
OR CHARINDEX(#SearchStringToStop,RIGHT(#string,LEN(#string) - CHARINDEX(#SearchStringToStart,#string) + 1 - LEN(#SearchStringToStart))) = 0
THEN NULL
ELSE SUBSTRING(#string
,CHARINDEX(#SearchStringToStart,#string) + LEN(#SearchStringToStart) + 1
,(CHARINDEX(#SearchStringToStop,RIGHT(#string,LEN(#string) - CHARINDEX(#SearchStringToStart,#string) + 1 - LEN(#SearchStringToStart)))-2)
)
END
RETURN #string
END
if Input= pg102a-wlc01s.png.intel.com and Output should be pg102a-wlc01s
we can use below query :
select Substring(pc.name,0,charindex('.',pc.name,0)),pc.name from tbl_name pc
You can try this:
Declare #test varchar(100)='images/test.jpg'
Select REPLACE(RIGHT(#test,charindex('/',reverse(#test))-1),'.jpg','')
Below query gives you data before '-'
Ex- W12345A-4S
SELECT SUBSTRING(Column_Name,0, CHARINDEX('-',Column_Name)) as 'new_name'
from [abc].
Output - W12345A
Inspired by the work of Josien, I wondered about a simplification.
Would this also work? Much shorter:
SELECT SUBSTRING(col, CHARINDEX ('/', col) + 1, CHARINDEX ('.', col) - CHARINDEX ('/', col) - 1);
(I can't test right now because of right issues at my company SQL server, which is a problem in its own right)
Simply Try With LEFT ,RIGHT ,CHARINDEX
select
LEFT((RIGHT(a.name,((CHARINDEX('/', name))+1))),((CHARINDEX('.', (RIGHT(a.name,
((CHARINDEX('/', name))+1)))))-1)) splitstring,
a.name
from
(select 'images/test.jpg' as name)a
declare #searchStart nvarchar(100) = 'search ';
declare #searchEnd nvarchar(100) = ' ';
declare #string nvarchar(4000) = 'This is a string to search (hello) in this text ';
declare #startIndex int = CHARINDEX(#searchStart, #string,0) + LEN(#searchStart);
declare #endIndex int = CHARINDEX(#searchEnd, #string, #startIndex + 1);
declare #length int = #endIndex - #startIndex;
declare #sub nvarchar(4000) = SUBSTRING(#string, #startIndex, #length)
select #startIndex, #endIndex, #length, #sub
This is a little more legible than the one-liners in this answer which specifically answer the question, but not in a generic way that would benefit all readers. This could easily be made into a function as well with a slight modification.
If there are more than one or none occurences of given character use this:
DECLARE #rightidx int = CASE
WHEN 'images/images/test.jpg' IS NULL OR (CHARINDEX('.', 'images/images/test.jpg')) <= 0 THEN LEN('images/images/test.jpg')
ELSE (CHARINDEX('.', REVERSE('images/images/test.jpg')) - 1)
END
SELECT RIGHT('images/images/test.jpg', #rightidx)
This was the approach I took.
CREATE FUNCTION dbo.get_text_before_char(#my_string nvarchar(255),#my_char char(1))
RETURNS nvarchar(255)
AS
BEGIN;
return IIF(#my_string LIKE '%' + #my_char + '%',left (#my_string, IIF(charindex(#my_char, #my_string) - 1<1,1,charindex(#my_char, #my_string) - 1)),'');
END;
CREATE FUNCTION dbo.get_text_after_char(#my_string nvarchar(255),#my_char char(1))
RETURNS nvarchar(255)
AS
BEGIN;
return IIF ( #my_string LIKE '%' + #my_char + '%' ,RIGHT ( #my_string , IIF ( charindex ( #my_char ,reverse(#my_string) )-1 < 1 ,1 ,charindex ( #my_char ,reverse(#my_string) )-1 ) ) , '' )
END;
SELECT
dbo.get_text_before_char('foo-bar','-')
, dbo.get_text_after_char('foo-bar','-')
declare #test varchar(100)='images/test.jpg'
select right(left(#test, charindex('.', #test) - 1),4)

Using PATINDEX to find varying length patterns in T-SQL

I'm looking to pull floats out of some varchars, using PATINDEX() to spot them. I know in each varchar string, I'm only interested in the first float that exists, but they might have different lengths.
e.g.
'some text 456.09 other text'
'even more text 98273.453 la la la'
I would normally match these with a regex
"[0-9]+[.][0-9]+"
However, I can't find an equivalent for the + operator, which PATINDEX accepts. So they would need to be matched (respectively) with:
'[0-9][0-9][0-9].[0-9][0-9]' and '[0-9][0-9][0-9][0-9][0-9].[0-9][0-9][0-9]'
Is there any way to match both of these example varchars with one single valid PATINDEX pattern?
I blogged about this a while ago.
Extracting numbers with SQL server
Declare #Temp Table(Data VarChar(100))
Insert Into #Temp Values('some text 456.09 other text')
Insert Into #Temp Values('even more text 98273.453 la la la')
Insert Into #Temp Values('There are no numbers in this one')
Select Left(
SubString(Data, PatIndex('%[0-9.-]%', Data), 8000),
PatIndex('%[^0-9.-]%', SubString(Data, PatIndex('%[0-9.-]%', Data), 8000) + 'X')-1)
From #Temp
Wildcards.
SELECT PATINDEX('%[0-9]%[0-9].[0-9]%[0-9]%','some text 456.09 other text')
SELECT PATINDEX('%[0-9]%[0-9].[0-9]%[0-9]%','even more text 98273.453 la la la')
Yes you need to link to the clr to get regex support. But if PATINDEX does not do what you need then regex was designed exactly for that.
http://msdn.microsoft.com/en-us/magazine/cc163473.aspx
Should be checked for robustness (what if you only have an int, for example), but this is just to put you on a track:
if exists (select routine_name from information_schema.routines where routine_name = 'GetFirstFloat')
drop function GetFirstFloat
go
create function GetFirstFloat (#string varchar(max))
returns float
as
begin
declare #float varchar(max)
declare #pos int
select #pos = patindex('%[0-9]%', #string)
select #float = ''
while isnumeric(substring(#string, #pos, 1)) = 1
begin
select #float = #float + substring(#string, #pos, 1)
select #pos = #pos + 1
end
return cast(#float as float)
end
go
select dbo.GetFirstFloat('this is a string containing pi 3.14159216 and another non float 3 followed by a new fload 5.41 and that''s it')
select dbo.GetFirstFloat('this is a string with no float')
select dbo.GetFirstFloat('this is another string with an int 3')
Given that the pattern is going to be varied in length, you're not going to have a rough time getting this to work with PATINDEX. There is another post that I wrote, which I've modified to accomplish what you're trying to do here. Will this work for you?
CREATE TABLE #nums (n INT)
DECLARE #i INT
SET #i = 1
WHILE #i < 8000
BEGIN
INSERT #nums VALUES(#i)
SET #i = #i + 1
END
CREATE TABLE #tmp (
id INT IDENTITY(1,1) not null,
words VARCHAR(MAX) null
)
INSERT INTO #tmp
VALUES('I''m looking for a number, regardless of length, even 23.258 long'),('Maybe even pi which roughly 3.14159265358,'),('or possibly something else that isn''t a number')
UPDATE #tmp SET words = REPLACE(words, ',',' ')
;WITH CTE AS (SELECT ROW_NUMBER() OVER (ORDER BY ID) AS rownum, ID, NULLIF(SUBSTRING(' ' + words + ' ' , n , CHARINDEX(' ' , ' ' + words + ' ' , n) - n) , '') AS word
FROM #nums, #tmp
WHERE ID <= LEN(' ' + words + ' ') AND SUBSTRING(' ' + words + ' ' , n - 1, 1) = ' '
AND CHARINDEX(' ' , ' ' + words + ' ' , n) - n > 0),
ids AS (SELECT ID, MIN(rownum) AS rownum FROM CTE WHERE ISNUMERIC(word) = 1 GROUP BY id)
SELECT CTE.rownum, cte.id, cte.word
FROM CTE, ids WHERE cte.id = ids.id AND cte.rownum = ids.rownum
The explanation and origin of the code is covered in more detail in the origional post
PATINDEX is not powerful enough to do that. You should use regular expressions.
SQL Server has Regular expression support since SQL Server 2005.