SQL split the first name with underscore - sql

I'm trying to split the first name from the middle name or middle initial when there's an underscore in the name. I was able to split the first name from the middle name when there is a space, but having trouble with the underscore.
I would like to keep it all together in the case statement if possible.
SELECT
[first name]
, SUBSTRING([first name], 1,
CASE WHEN SUBSTRING(REVERSE([first name]), 2, 1) = ' '
THEN CHARINDEX(' ', [first name]) - 1
ELSE LEN([first name])
END) AS FirstName ,
CASE WHEN SUBSTRING(REVERSE([first name]), 2, 1) = ' '
THEN SUBSTRING([first name], LEN([first name]), 1)
ELSE NULL
END AS MiddleName
, [Last Name]
FROM nametable

The following example uses two case expressions to separate one column into two. It makes use of a feature of Substring: no error is raised if the specified length exceeds the length of the input string.
Note that the sample data is not an image of data, but useful data.
declare #Samples as Table ( Name VarChar(20) );
insert into #Samples ( Name ) values
( 'Billy' ), ( 'Billy Bob' ), ( 'Billy_Joe' ), ( 'Edgar_7' ),
( '_' ), ( 'X_' ), ( '_Y' ), ( '' );
select Name,
case
when CharIndex( '_', Name ) > 0 then Left( Name, CharIndex( '_', Name ) - 1 )
else Name end as FirstName,
case
when CharIndex( '_', Name ) > 0 then Substring( Name, CharIndex( '_', Name ) + 1, 20 )
else NULL end as MiddleName
from #Samples;

Add the REPLACE function inline:
SELECT
[first name]
, SUBSTRING([first name], 1,
CASE WHEN SUBSTRING(REVERSE(REPLACE([first name], '_', ' ')), 2, 1) = ' '
THEN CHARINDEX(' ', REPLACE([first name], '_', ' ')) - 1
ELSE LEN([first name])
END) AS FirstName ,
CASE WHEN SUBSTRING(REVERSE(REPLACE([first name], '_', ' ')), 2, 1) = ' '
THEN SUBSTRING(REPLACE([first name], '_', ' '), LEN([first name]), 1)
ELSE NULL
END AS MiddleName
, [Last Name]
FROM nametable

Thanks to sample data provided by #HABO, you can try this too:
SELECT L.Name,
L.FirstName,
CASE L.MiddleName WHEN '' THEN NULL ELSE L.MiddleName END AS MiddleName
FROM
(
SELECT P.Name,
REPLACE(SUBSTRING(P.UnderscoredName, 1, P.UnderscoreIndex), '_', '') AS FirstName,
REPLACE(SUBSTRING(P.UnderscoredName, P.UnderscoreIndex, LEN(P.UnderscoredName) - P.UnderscoreIndex + 1),'_','') AS MiddleName
FROM
(
SELECT K.Name,
K.UnderscoredName,
CHARINDEX('_', K.UnderscoredName) AS UnderscoreIndex
FROM
(
SELECT Name,
REPLACE(CASE WHEN Name LIKE N'%[_]%' THEN Name ELSE Name + '_' END,' ','_') AS UnderscoredName
FROM #Samples
) AS K
) AS P
) AS L;

Related

I'm having trouble separating the first name from the middle name column in SQL Server 2017

I created this syntax to separate first name, middle name and last name from a column called invertornames. Just to note that the investor names are in arabic and their middle names are more than 3 words. It worked fine but the first name is also being included in the middle name as you can see below in the image
This is the query I wrote:
SELECT
SUBSTRING(investor_name, CHARINDEX(', ', investor_name) + 2, CASE WHEN CHARINDEX(' ', investor_name, CHARINDEX(', ', investor_name) + 2) = 0 THEN LEN(investor_name) + 1 ELSE CHARINDEX(' ', investor_name, CHARINDEX(', ', investor_name) + 2) END - CHARINDEX(', ', investor_name) - 2)AS FirstName,
RTRIM(LTRIM(REPLACE(REPLACE(investor_name,SUBSTRING(investor_name , 1, CHARINDEX(' ', investor_name) -1),''),REVERSE( LEFT( REVERSE(investor_name), CHARINDEX(' ', REVERSE(investor_name))-1 ) ),''))) AS MiddleName,
RIGHT(investor_name, CHARINDEX(' ', REVERSE(investor_name))) AS LastName
FROM
investornames
If you need any data to try it please let me know.
You may try this. I consider that First word is considered as Firstname, second word is considered as MiddleName and remaining word will considered as LastName.
For the case of arabic names. Software will not automatically detect that for which name calculation start from the front and for which it is taken in reverse. So I guess you need to maintain a flag for same.
In case of arabic name in the portion of cte use reverse to arrange them in left to right order instead of right to left order. And at the end use reverse function again to convert them into their original state.
Hope I am clear about what I am explaining. Sample code is following :-
; with cte as (
select 'Deepak kumar singh' as names
union
select 'Deep'
union
select 'deep kumar'
union
select 'Deepak kumar singh chandel')
SELECT
Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 1)) As [FirstName]
, Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) As [MiddleName]
, case when len( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 3)))>0 then substring ( names ,
charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ') + 1
, len(names) - charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ')) else null end
As [LastName]
FROM (Select names from cte ) As [x]
Result of above query is:
FirstName MiddleName LastName
Deep NULL NULL
deep kumar NULL
Deepak kumar singh
deepak kumar singh chandel
Edit
Updated this ans check this
; with cte as (
select ' شركة عبدالمحسن عبدالعزيزالبابطين ' as names
union
select 'شركة'
union
select 'عبدالمحسن عبدالعزيز'
union
select 'البابطين')
, ct as (
select RTRIM(LTRIM(names)) as Names from cte )
SELECT
Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 1)) As [FirstName]
, Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) As [MiddleName]
, case when len( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 3)))>0 then substring ( names ,
charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ') + 1
, len(names) - charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ')) else null end
As [LastName]
FROM (Select names from ct ) As [x]
Result
FirstName MiddleName LastName
???? ????????? ?????????????????
???? NULL NULL
???????? NULL NULL
????????? ????????? NULL
Here I am expecting on place of ? you'll get your result. BTW I've updated my query, have you tried this one. Just give one more try.

Extract forename if the character is more then 2 letter

I have to get the forename from the c.forename if c.known_as column is null or blank.
This i achieved with case when statement using
CASE
WHEN IND.KNOWN_AS IS NULL OR ind.KNOWN_AS=''
THEN ind.FORENAMES
ELSE ind.KNOWN_AS
END AS 'Known As'
My issue is in the forename column i have name like Jhon Smith where i would like to extract only John, below is an example what i want to achieve
Desire output c.forename
John Mr John
Jhon Jhon Smith
blank Jo
blank J
So , basically it will only take forname skipping 'Mr', 2nd it should take only forename which has more than 2 character.
My current query is:
Select ind.FORENAMES,
ind.KNOWN_AS,
case when (known_as is null or known_as = '' ) and charindex(' ', forenames) > 2
then substring(forenames, 1, charindex(' ', forenames) - 1) end as FORENAMES2,
output
from individual ind
join member m on m.individual_ref=ind.individual_ref
and m.MEMBERSHIP_NO in ('001','002','003','004','005','006','007')
where m.member_status=33
You could use following case when statement to verify your conditions:
For SQL Server:
case when (c.known_as is null or c.known_as = '' )
and charindex(' ', c.forename) > 3 then substring(c.forename, 1, charindex(' ', c.forename) - 1) end
For MySQL:
case when (c.known_as is null or c.known_as = '' )
and locate(' ', c.forename) > 3 then substring(c.forename, 1, locate(' ', c.forename) - 1) end
Little explanation: if the first name must be longer than 2 characters, that means that first space must occur at least at index 4. And that what the condition is about: locate(' ', c.forename) > 3 or substring(' ', c.forename) > 3
NOTE
You have to first strip down all occurences of Mr, Mrs, Ms in c.forename column, like this (syntax for MySQL and SQL Server):
replace(replace(replace(c.forename, 'Mrs ', ''), 'Mr ', ''), 'Ms ', '')
You have to include it in your query lke this:
Select FORENAMES,
KNOWN_AS,
case when (known_as is null or known_as = '' ) and charindex(' ', FORENAMES2) > 2
then substring(FORENAMES2, 1, charindex(' ', FORENAMES2) - 1) end as FORENAMES2,
output
from (
Select ind.FORENAMES,
ind.KNOWN_AS,
replace(replace(replace(ind.FORENAMES, 'Mrs ', ''), 'Mr ', ''), 'Ms ', '') FORENAMES2,
output
from individual ind
join member m on m.individual_ref = ind.individual_ref
where m.member_status=33
and m.MEMBERSHIP_NO in ('001','002','003','004','005','006','007')
)
Try this:
DECLARE #DataSource TABLE
(
[name] VARCHAR(32)
);
INSERT INTO #DataSource ([name])
VALUES (' Mr John ')
,('Jhon Smith')
,(' Jo ')
,(' J ');
WITH SanitizeDataSoruce ([name], [name_reversed]) AS
(
SELECT LTRIM(RTRIM([name]))
,REVERSE(LTRIM(RTRIM([name])))
FROM #DataSource
)
SELECT [name]
,CASE
WHEN CHARINDEX(' ', [name]) > 1 THEN REVERSE(SUBSTRING([name_reversed], 0, CHARINDEX(' ', [name_reversed])))
ELSE ''
END
FROM SanitizeDataSoruce;

Split Strings into columns in SQL Server

I have a name field in Students table which is a comma separated string in format "LastName, FirstName, Middle Name".While doing a select statement in SQL query I need to break this up into separate fields.How can I achieve this in SQL?.Some times Middle intial won't be available.
SUBSTRING(Name,CHARINDEX(',',Name,1)+2,LEN(Name)) AS FirstName,
SUBSTRING(Name,1,CHARINDEX(',',Name,1)-1) AS LastName,
Above code works fine when there is no Middle name.
This should give you what you need:
declare #tmp table (fullname varchar(100));
insert #tmp values('James, Billy, L'), ('John, Snow');
select
fullname
, [Last Name]
, case
when charindex(',', Remainder, 0) > 0
then ltrim(substring(Remainder, 0, charindex(',', Remainder, 0)))
else ltrim(Remainder)
end [First Name]
, case
when charindex(',', Remainder, 0) = 0
then NULL
else ltrim(substring(Remainder, charindex(',', Remainder, 0) + 1, len(Remainder)))
end [Middle Name]
from
(select
fullname
, substring(fullname, 0, charindex(',', fullname, 0)) [Last Name]
, substring(fullname, charindex(',', fullname, 0) + 1, len(fullname)) [Remainder]
from #tmp) result;
First just find the occurrences of comma(,) in the string. Then use CASE expression to get the number of comma. If there is 2 comma then we can assume that middle name is also there. If 1 then only first name and last name. Then use the combinations of LEFT, RIGHT, SUBSTRING, CHARINDEX string functions.
Query
select t.name,
left(
t.name,
charindex(',', t.name, 1) - 1
) last_name,
case t.comma_num
when 2
then substring(
t.name,
charindex(',', t.name, 1) + 1,
len(name) -
(charindex(',', t.name, 1) + 1) - charindex(',', reverse(t.name), 1) + 1
)
when 1
then right(
t.name,
charindex(',', reverse(t.name), 1) - 1
)
else null end as first_name,
case t.comma_num
when 2
then right(
t.name, charindex(',', reverse(t.name), 1) - 1
)
else null end as middle_name
from (
select name,
len(name) - len(replace(name, ',', '')) comma_num
from [your_table_name]
)t;
Find demo here
Use CTE and SUBSTRING AND CHARINDEX funntions
DECLARE #Name VARCHAR(100) = 'James, Billy, L'
--DECLARE #Name VARCHAR(100) = 'James, '', L'
;WITH _CTE ( SplitedNames ,RemainStr) AS
(
SELECT SUBSTRING(#Name,0,CHARINDEX(',',#Name)),
SUBSTRING(#Name,CHARINDEX(',',#Name)+1,LEN(#Name))
UNION ALL
SELECT CASE WHEN CHARINDEX(',',RemainStr) = 0 THEN RemainStr ELSE
SUBSTRING(RemainStr,0,CHARINDEX(',',RemainStr)) END,
CASE WHEN CHARINDEX(',',RemainStr) = 0 THEN '' ELSE
SUBSTRING(RemainStr,CHARINDEX(',',RemainStr)+1,LEN(RemainStr))
END
FROM _CTE
WHERE RemainStr <> ''
)
SELECT SplitedNames FROM _CTE

How to find second value inside a column

How do I list the names of all band members with the same last name?
The column has values like this
band_NAME
-------------------
Carla Thomas
Stephen E. Rice
Cynthia P. Tree
Richard Anthony Paul
Ann Frances Smith
Lorace Black
Timothy Adam Paul
I know we would have to use instr and substr. I just don't get how we would determine the position.
I know the basic format is going to be like
SELECT band_NAME
FROM TABLE
where substr(band_name, ?, instr( ) IN
(select substr(band_name, ?, instr( )-1)
from table
group by SUBSTR(band_NAME , ?, INSTR( )-1 )
HAVING COUNT(* ) > 1 );
But what goes in the question marks and inside the instr?
Would appreciate any help on this!
I'm assuming that your delimiter between first and last name is a single space. More spaces in the string are a part of last name. Thus, you probably want to search for the first space character.
Return position of the first occurence of substring with instr(str, substr).
Then, use substring(str, pos) to return the substring starting at a given position (feed by instr function).
SELECT substring(band_name, instr(band_name, ' '))
FROM yourtable
Try this:
SELECT t1.band_NAME
FROM TABLE t1 LEFT JOIN TABLE t2
ON SUBSTRING_INDEX(t1.band_name, ' ', - 1) = SUBSTRING_INDEX(t2.band_name, ' ', - 1)
WHERE t1.band_name <> t2.band_name
And this like your pseudocode MySQL:
SELECT band_NAME FROM TABLE
Where FIND_IN_SET (SUBSTRING_INDEX(band_name, ' ', -1),
(Select SUBSTRING_INDEX(band_name, ' ', -1) bn
From TABLE Group by bn
having Count(bn) > 1
)
)
SQL Server
SELECT band_NAME FROM TABLE
Where
SUBSTRING(band_NAME, CHARINDEX(' ', band_NAME) + 1, LEN(band_NAME)) AS [Last Name]
IN
(Select SUBSTRING(band_NAME, CHARINDEX(' ', band_NAME) + 1, LEN(band_NAME)) AS [Last Name]
From TABLE Group by [Last Name] -- or SUBSTRING(band_NAME, CHARINDEX(' ', band_NAME) + 1, LEN(band_NAME)) AS [Last Name]
having Count(*) > 1
)
)
Additionally I thinks you can benefit from STRING_SPLIT in some way
Try This
with cte as
(
select band_name, ROW_NUMBER() over(partition by SUBSTRING(band_name,CHARINDEX(' ',band_name),LEN(band_name)) order by band_name) as cnt,
SUBSTRING(band_name,CHARINDEX(' ',band_name),LEN(band_name)) as lastname
from your_table
)
select band_name
from cte
where lastname in (select lastname from cte where cnt > 1)
okay,
The best solution is to change your schema and store last name in a separate column.
In the mean time you could get the last name like this,
SELECT
[band_NAME],
CASE WHEN CHARINDEX(' ', [band_NAME]) > 0
THEN
RIGHT([band_NAME], CHARINDEX(' ', REVERSE([band_NAME])))
ELSE
[band_NAME]
END [LastName]
FROM
[TABLE]
You could then group them like this
SELECT
[LastName],
COUNT(*)
FROM
(
SELECT
[band_NAME],
CASE WHEN CHARINDEX(' ', [band_NAME]) > 0
THEN
RIGHT([band_NAME], CHARINDEX(' ', REVERSE([band_NAME])))
ELSE
[band_NAME]
END [LastName]
FROM
[TABLE]
) [TABLEWithLastName]
GROUP BY
[LastName];

How to split a single column values to multiple column values?

I have a problem splitting single column values to multiple column values.
For Example:
Name
------------
abcd efgh
ijk lmn opq
asd j. asdjja
asb (asdfas) asd
asd
and I need the output something like this:
first_name last_name
----------------------------------
abcd efgh
ijk opq
asd asdjja
asb asd
asd null
The middle name can be omitted (no need for a middle name) The columns are already created and need to insert the data from that single Name column.
Your approach won't deal with lot of names correctly but...
SELECT CASE
WHEN name LIKE '% %' THEN LEFT(name, Charindex(' ', name) - 1)
ELSE name
END,
CASE
WHEN name LIKE '% %' THEN RIGHT(name, Charindex(' ', Reverse(name)) - 1)
END
FROM YourTable
An alternative to Martin's
select LEFT(name, CHARINDEX(' ', name + ' ') -1),
STUFF(name, 1, Len(Name) +1- CHARINDEX(' ',Reverse(name)), '')
from somenames
Sample table
create table somenames (Name varchar(100))
insert somenames select 'abcd efgh'
insert somenames select 'ijk lmn opq'
insert somenames select 'asd j. asdjja'
insert somenames select 'asb (asdfas) asd'
insert somenames select 'asd'
insert somenames select ''
insert somenames select null
;WITH Split_Names (Name, xmlname)
AS
(
SELECT
Name,
CONVERT(XML,'<Names><name>'
+ REPLACE(Name,' ', '</name><name>') + '</name></Names>') AS xmlname
FROM somenames
)
SELECT
xmlname.value('/Names[1]/name[1]','varchar(100)') AS first_name,
xmlname.value('/Names[1]/name[2]','varchar(100)') AS last_name
FROM Split_Names
and also check the link below for reference
http://jahaines.blogspot.in/2009/06/converting-delimited-string-of-values.html
What you need is a split user-defined function. With that, the solution looks like
With SplitValues As
(
Select T.Name, Z.Position, Z.Value
, Row_Number() Over ( Partition By T.Name Order By Z.Position ) As Num
From Table As T
Cross Apply dbo.udf_Split( T.Name, ' ' ) As Z
)
Select Name
, FirstName.Value
, Case When ThirdName Is Null Then SecondName Else ThirdName End As LastName
From SplitValues As FirstName
Left Join SplitValues As SecondName
On S2.Name = S1.Name
And S2.Num = 2
Left Join SplitValues As ThirdName
On S2.Name = S1.Name
And S2.Num = 3
Where FirstName.Num = 1
Here's a sample split function:
Create Function [dbo].[udf_Split]
(
#DelimitedList nvarchar(max)
, #Delimiter nvarchar(2) = ','
)
RETURNS TABLE
AS
RETURN
(
With CorrectedList As
(
Select Case When Left(#DelimitedList, Len(#Delimiter)) <> #Delimiter Then #Delimiter Else '' End
+ #DelimitedList
+ Case When Right(#DelimitedList, Len(#Delimiter)) <> #Delimiter Then #Delimiter Else '' End
As List
, Len(#Delimiter) As DelimiterLen
)
, Numbers As
(
Select TOP( Coalesce(DataLength(#DelimitedList)/2,0) ) Row_Number() Over ( Order By c1.object_id ) As Value
From sys.columns As c1
Cross Join sys.columns As c2
)
Select CharIndex(#Delimiter, CL.list, N.Value) + CL.DelimiterLen As Position
, Substring (
CL.List
, CharIndex(#Delimiter, CL.list, N.Value) + CL.DelimiterLen
, CharIndex(#Delimiter, CL.list, N.Value + 1)
- ( CharIndex(#Delimiter, CL.list, N.Value) + CL.DelimiterLen )
) As Value
From CorrectedList As CL
Cross Join Numbers As N
Where N.Value <= DataLength(CL.List) / 2
And Substring(CL.List, N.Value, CL.DelimiterLen) = #Delimiter
)
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX(rent, ' ', 1), ' ', -1) AS currency,
SUBSTRING_INDEX(SUBSTRING_INDEX(rent, ' ', 3), ' ', -1) AS rent
FROM tolets
I used it recently:
select
substring(name,1,charindex(' ',name)-1) as Col1,
substring(name,charindex(' ',name)+1,len(name)) as Col2
from TableName
Here is how I did this on a SQLite database:
SELECT SUBSTR(name, 1,INSTR(name, " ")-1) as Firstname,
SUBSTR(name, INSTR(name," ")+1, LENGTH(name)) as Lastname
FROM YourTable;
Hope it helps.