How to get the first letter of each word in SQL - sql

I tried to run this query to get the initial letter of each word, and it worked for strings of 4 words, yet if the string has only two words, it duplicates the second word's initial.
select
substring(column_name, 1, 1) +
case
when 0 <> charindex(' ', column_name) + 1
then substring(column_name, charindex(' ',column_name) + 1, 1)
else ''
end +
case
when 0 <> charindex(' ', column_name, charindex(' ', column_name) + 1)
then substring(column_name, charindex(' ', column_name, charindex(' ', column_name) + 1) + 1, 1)
else ''
end +
case
when 0 <> charindex(' ', column_name, charindex(' ', column_name, charindex(' ', column_name) + 1) + 1)
then substring(column_name, charindex(' ', column_name, charindex(' ', column_name, charindex(' ', column_name) + 1) + 1) + 1, 1)
else ''
end
from table_name

You didn't specify which RDBMS you are using. This should work in SQL Server:
drop table if exists table_name
create table table_name (
column_name varchar(255)
)
insert table_name
values ('See Jane')
, ('See Jane run')
, ('See Jane run and jump over the lazy dog.')
select stuff((SELECT '' + t2.fc
from (
select left(str.value, 1) fc
, charindex(' ' + str.value + ' ', ' ' + t.column_name + ' ') idx
from string_split(t.column_name, ' ') str
) t2
order by t2.idx
FOR XML PATH('')
), 1, 0, '') as FirstChars
from table_name t
The idx column is used to order the ouptut because string_split does not promise to return the results in any particular order. Thanks to Aaron Bertrand - https://dba.stackexchange.com/questions/207274/string-split-and-ordered-results

Given the use of charindex in your question, I'm assuming you are using SQL Server. The CTE generates a tall view of your data using string_split function, with each letter on it's own row. We then select from it and group by id, and apply the string_agg function to place back into a single row.
Password guessing?
create table my_data (
id integer,
comments varchar(50)
);
insert into my_data (id, comments) values
(1, 'Thank goodness its friday'),
(2, 'I want 2 scoops of ice cream');
select * from my_data;
id
comments
1
Thank goodness its friday
2
I want 2 scoops of ice cream
with cte (id, first_char) as (
select id, substring(ss.value, 1, 1) as first_char
from my_data
cross apply string_split(comments, ' ')ss
)
select t.id,
string_agg(t.first_char, ',') as letters_delimited,
string_agg(t.first_char, '') as letters_not_delimited
from cte t
group by t.id
id
letters_delimited
letters_not_delimited
1
T,g,i,f
Tgif
2
I,w,2,s,o,i,c
Iw2soic
fiddle here

You can extend your approach with recursion
WITH cte_name AS (
select CONVERT(nvarchar(max), substring(column_name, 1, 1)) conc,
column_name n,
charindex(' ', column_name, 0) pos
from table_name
UNION ALL
select conc + substring(n, pos + 1, 1) as conc,
n,
charindex(' ', n, pos + 1) pos
from cte_name where pos > 0
)
SELECT *
FROM cte_name
where pos = 0;

Related

How to query only first letters of name and surname in CONTACTS column in SQL Server

I was asked to query only first letters of name and surname from a column in SQL Server. And the rest should be "*" instead of letters
For example: Waldemar Fisar, should be queried like. W******* F****
Updated question:
I am getting this:
John Snow after query becomes J S
Lora White after query becomes L W
But need to get:
-John Snow should become J*** S***
-Jonathan Conan J******* C****
Lastly, both names and surnames are in the same column
SELECT
Personal info, SUBSTRING([Primary Contact], 1, 1) + ' ' +
SUBSTRING([Primary Contact], CHARINDEX(' ', [Primary Contact]) + 1, 1) AS CI
FROM
xx
You can write a function for that task like the example below:
create function hide_name(#text nvarchar(max), #ch nchar(1), #n int)
returns nvarchar(max)
as
begin
return LEFT(#text, #n) + REPLICATE(#ch, LEN(#text) - #n)
end
go
SELECT
dbo.hide_name(yourNameColumn, '*', 1) + ' ' + dbo.hide_name(yourFamilyNameColumn, '*', 1)
FROM yourTableName
Not recommended, but someone might need
declare #Person table (
name nvarchar(max),
surname nvarchar(max)
);
insert into #Person values ('John', 'Snow'), ('Lora', 'White');
select CONCAT(
IIF(len(name) > 0, concat(LEFT(name, 1), REPLICATE('*', len(name) - 1)), ''),
IIF(len(name) > 0 and len(surname) > 0, ' ', ''),
IIF(len(surname) > 0, concat(LEFT(surname, 1), REPLICATE('*', len(name) - 1)), '')
) as HiddenName
from #Person
SELECT
Personal info, SUBSTRING([Primary Contact], 1, 1) + ' ' +
SUBSTRING([Primary Contact], CHARINDEX(' ', [Primary Contact]) + 1, 1) AS CI
, SUBSTRING([Primary Contact], 1, 1) + replicate('*',CHARINDEX(' ', [Primary Contact])-2)
+ ' ' +
SUBSTRING([Primary Contact], CHARINDEX(' ', [Primary Contact]) + 1, 1)
+ replicate('*',len([Primary Contact]) - CHARINDEX(' ', [Primary Contact])-1) AS CI_Star
FROM
xx
A pure positional solution
DROP TABLE IF EXISTS #names
GO
CREATE TABLE #names(thename NVARCHAR(50))
INSERT INTO #names(thename)
VALUES
('Alison Arnold'),
('Dorothy Jones'),
('Christopher Mackay'),
('Jason H Paterson'),
('Thomas Johnson'),
('Dave')
SELECT subnames.thename,STRING_AGG(subnames.maskedsubname,' ')
FROM
(
SELECT
n.TheName,
SubNames.SubName,
LEFT(SubNames.SubName,1)+REPLICATE('*',(LEN(SubNames.SubName)-1))AS MaskedSubName
FROM #names n
CROSS APPLY(SELECT Value AS SubName FROM STRING_SPLIT(n.TheName,' ')) SubNames
)subnames
GROUP BY SubNames.SubName

I'm having trouble separating the first name from the middle name column in SQL Server 2017

I created this syntax to separate first name, middle name and last name from a column called invertornames. Just to note that the investor names are in arabic and their middle names are more than 3 words. It worked fine but the first name is also being included in the middle name as you can see below in the image
This is the query I wrote:
SELECT
SUBSTRING(investor_name, CHARINDEX(', ', investor_name) + 2, CASE WHEN CHARINDEX(' ', investor_name, CHARINDEX(', ', investor_name) + 2) = 0 THEN LEN(investor_name) + 1 ELSE CHARINDEX(' ', investor_name, CHARINDEX(', ', investor_name) + 2) END - CHARINDEX(', ', investor_name) - 2)AS FirstName,
RTRIM(LTRIM(REPLACE(REPLACE(investor_name,SUBSTRING(investor_name , 1, CHARINDEX(' ', investor_name) -1),''),REVERSE( LEFT( REVERSE(investor_name), CHARINDEX(' ', REVERSE(investor_name))-1 ) ),''))) AS MiddleName,
RIGHT(investor_name, CHARINDEX(' ', REVERSE(investor_name))) AS LastName
FROM
investornames
If you need any data to try it please let me know.
You may try this. I consider that First word is considered as Firstname, second word is considered as MiddleName and remaining word will considered as LastName.
For the case of arabic names. Software will not automatically detect that for which name calculation start from the front and for which it is taken in reverse. So I guess you need to maintain a flag for same.
In case of arabic name in the portion of cte use reverse to arrange them in left to right order instead of right to left order. And at the end use reverse function again to convert them into their original state.
Hope I am clear about what I am explaining. Sample code is following :-
; with cte as (
select 'Deepak kumar singh' as names
union
select 'Deep'
union
select 'deep kumar'
union
select 'Deepak kumar singh chandel')
SELECT
Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 1)) As [FirstName]
, Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) As [MiddleName]
, case when len( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 3)))>0 then substring ( names ,
charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ') + 1
, len(names) - charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ')) else null end
As [LastName]
FROM (Select names from cte ) As [x]
Result of above query is:
FirstName MiddleName LastName
Deep NULL NULL
deep kumar NULL
Deepak kumar singh
deepak kumar singh chandel
Edit
Updated this ans check this
; with cte as (
select ' شركة عبدالمحسن عبدالعزيزالبابطين ' as names
union
select 'شركة'
union
select 'عبدالمحسن عبدالعزيز'
union
select 'البابطين')
, ct as (
select RTRIM(LTRIM(names)) as Names from cte )
SELECT
Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 1)) As [FirstName]
, Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) As [MiddleName]
, case when len( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 3)))>0 then substring ( names ,
charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ') + 1
, len(names) - charindex ( Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ', names) + len(Reverse(ParseName(Replace(Reverse(names), ' ', '.'), 2)) + ' ')) else null end
As [LastName]
FROM (Select names from ct ) As [x]
Result
FirstName MiddleName LastName
???? ????????? ?????????????????
???? NULL NULL
???????? NULL NULL
????????? ????????? NULL
Here I am expecting on place of ? you'll get your result. BTW I've updated my query, have you tried this one. Just give one more try.

Trim extra white space within column

I have the following select that is converting a name from Lastname, Firstname format into Firstname Lastname format. It seems to be adding extra white space between the first name and the last name
SELECT substring(D.NAME, charindex(',', replace(D.NAME, ' ', '')) + 1, len(D.NAME))
+ ' '
+ left(D.NAME, charindex(',', D.NAME) -1) AS First_Last
FROM TEST_TABLE D
Here are a few examples of the output I'm getting now:
Johnnyyy Smithsonnn
Kimmey Test1
Denise Stuffing
Desired Format (single space between first and last name):
Johnnyyy Smithsonnn
Kimmey Test1
Denise Stuffing
I tend to like this technique. In this example we use a rare replacement pattern of †‡, but you can use <> and ><
Note: The outer ltrim(rtrim( ... )) is optional, I keep it as a "just in case".
Example
Select NewValue = ltrim(rtrim(replace(replace(replace([Name],' ','†‡'),'‡†',''),'†‡',' ')))
From YourTable
Returns
NewValue
Johnnyyy Smithsonnn
Kimmey Test1
Denise Stuffing
Maybe there are names with or without , or with or without spaces after the ,, or other inconsistencies.
Anyway you can use ltrim(rtrim()) before concatenating:
select
case
when d.name like '%,%' then
ltrim(rtrim(substring(d.name, charindex(',', name) + 1, len(d.name))))
+ ' ' +
ltrim(rtrim(left(D.NAME, charindex(',', d.name) -1)))
when d.name like '% %' then
ltrim(rtrim(substring(d.name, charindex(' ', name) + 1, len(d.name))))
+ ' ' +
ltrim(rtrim(left(D.NAME, charindex(' ', d.name) -1)))
else ltrim(rtrim(d.name))
end AS First_Last
Try:
select ltrim(rtrim(left(D.NAME, charindex(',', D.NAME, 0) - 1)))
+ ' '
+ ltrim(rtrim(right(D.NAME, len(D.NAME) - charindex(',', D.NAME, 0))))
from TEST_TABLE D
This worked on some test data I used below:
insert into #Test ([Name]) values ('Johnnyyy, Smithsonnn')
insert into #Test ([Name]) values ('Kimmey, Test1')
insert into #Test ([Name]) values ('Denise, Stuffing')
Which gives the intended result:
Johnnyyy Smithsonnn
Kimmey Test1
Denise Stuffing
I think you intend to do the replace of spaces after extracting the name. So:
SELECT (replace(left(D.NAME, charindex(',', D.NAME + ',') + 1), ' ', '') +
' ' +
left(D.NAME, charindex(',', D.NAME + ',') - 1)
) AS First_Last
FROM TEST_TABLE D;
This also adds a comma for the charindex() so the comma is optional.

Finding matching values in a field, that a seperated by spaces

In SQL Server I have a field that has delimited data (by space) in it.
E.g.
recid| Delimited data field
1| 1 2 3 4 5
2| 1 2 3 3 5
3| 1 1 1 1 1
I need to loop through all the records in the DB and interrogate the delimited data field and compare the third and fourth parts of data against each other and if they match, return the recid and the whole delimited field.
So from my example records 2 and 3 have matching data parts, so it would return:-
2|1 2 3 3 5
3|1 1 1 1 1
Because 3 3 matches, as does 1 1.
Thanks.
If it is always 1 digit and same format, you can try like following.
select * from #table
where SUBSTRING([data], 5, 1) = SUBSTRING([data], 7, 1)
If not (Numbers are not single digit), you can try like following.
;WITH cte
AS (SELECT F1.recid,
F1.[data],
O.splitdata,
Row_number()
OVER(
partition BY recid
ORDER BY (SELECT 1)) rn
FROM (SELECT *,
Cast('<X>' + Replace(F.data, ' ', '</X><X>') + '</X>' AS
XML)
AS
xmlfilter
FROM #table F)F1
CROSS apply (SELECT fdata.d.value('.', 'varchar(50)') AS
splitdata
FROM f1.xmlfilter.nodes('X') AS fdata(d)) O)
SELECT c1.recid,
c1.data
FROM cte c1
INNER JOIN cte c2
ON c1.recid = c2.recid
AND c1.rn = 3
AND c2.rn = 4
AND c1.splitdata = c2.splitdata
GROUP BY c1.recid,
c1.data
Online Demo
Need to split the data, give the row number and then compare.
Schema:
SELECT * INTO #TAB FROM (
SELECT 1, '1 2 3 4 5' UNION ALL
SELECT 2, '1 2 3 3 5' UNION ALL
SELECT 3, '1 1 1 1 1'
)A (recid , Delimited_data_field)
Solution :
;WITH CTE
AS (
SELECT recid
,Delimited_data_field
,ROW_NUMBER() OVER (PARTITION BY recid ORDER BY (SELECT 1)) RNO
,splt.X.value('.', 'INT') VAL
FROM (
SELECT recid
,Delimited_data_field
,CAST('<M>' + REPLACE(Delimited_data_field, ' ', '</M><M>') + '</M>' AS XML) DATA
FROM #TAB
) A
CROSS APPLY A.DATA.nodes('/M') splt(x)
)
SELECT C.recid
,C2.Delimited_data_field
FROM CTE C
INNER JOIN CTE C2 ON C.recid = C2.recid AND C.RNO = 3 AND C2.RNO = 4
AND C.VAL = C2.VAL
Result :
recid Delimited_data_field
2 1 2 3 3 5
3 1 1 1 1 1
Your question has two parts, find nth split and then compare. Your first approach should be to break the problem until you find built in functions that can do the job.
here is one method inner query return result after split and outer compares:
SELECT recid,Delimited from (
SELECT recid,Delimited, SUBSTRING(Delimited,
charindex(' ', Delimited, (charindex(' ', Delimited, 1))+2)+1,1)
third, SUBSTRING(Delimited, charindex(' ',Delimited,
(charindex(' ', Delimited, 1))+3)+1,1)
fourth FROM YourTable) tr
WHERE third = fourth
See simple substring and charindex can do the job.
Here is one more solution to that.
I tweaked the split function in this link (T-SQL: Opposite to string concatenation - how to split string into multiple records) a bit to make it usefule in your scenario.
Here is the function.
CREATE FUNCTION dbo.SplitAndGetNumberAt (#sep char(1), #s varchar(512), #pos int)
RETURNS INT
BEGIN
declare #val as varchar(10);
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT #val = SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END)
FROM Pieces where pn = #pos;
RETURN #val
END
Now you can use this function to get 3rd and 4th position of numbers and compare easily.
select recid, deldata
from so1
where dbo.SplitAndGetNumberAt (' ', deldata, 3) = dbo.SplitAndGetNumberAt (' ', deldata, 4)
Hope it will help.
If you have SQL Server 2016 or higher, you may try one approach using OPENJSON() to split your input data. The important part here is the fact, that when OPENJSON parses a JSON array the indexes of the elements in the JSON text are returned as keys (0-based).
Input:
CREATE TABLE #Table (
RecId int,
Data varchar(max)
)
INSERT INTO #Table
(RecId, Data)
VALUES
(1, '1 2 3 4 5'),
(2, '1 2 3 3 5'),
(3, '1 1 1 1 1')
Statement:
SELECT
t.RecId,
t.Data
FROM #Table t
CROSS APPLY (SELECT [value] FROM OPENJSON('["' + REPLACE(t.Data,' ','","') + '"]') WHERE [key] = 2) j3
CROSS APPLY (SELECT [value] FROM OPENJSON('["' + REPLACE(t.Data,' ','","') + '"]') WHERE [key] = 3) j4
WHERE j3.[value] = j4.[value]
Output:
RecId Data
2 1 2 3 3 5
3 1 1 1 1 1
Just for fun, sort of crazy coding:
DECLARE #Table Table (
recid INT,
DelimitedDataField VARCHAR(32)
)
INSERT #Table (recid, DelimitedDataField)
VALUES
(1, '1 2 3 4 5'),
(2, '1 2 3 3 5'),
(3, '1 1 1 1 1')
SELECT *
FROM #Table
WHERE
SUBSTRING (
STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
''),
1,
CHARINDEX(' ', STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
'')
)
) =
SUBSTRING (
STUFF(
STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
''),
1,
CHARINDEX(' ', STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
'')
),
''
),
1,
CHARINDEX(' ', STUFF(
STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
''),
1,
CHARINDEX(' ', STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
'')
),
''
))
)
AND SUBSTRING (
STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
''),
1,
CHARINDEX(' ', STUFF(
STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'),
''
),
1,
CHARINDEX(' ', STUFF(
DelimitedDataField + ' - - -',
1,
CHARINDEX(' ', DelimitedDataField + ' - - -'), '')
),
'')
)
) <>'-'

Extract forename if the character is more then 2 letter

I have to get the forename from the c.forename if c.known_as column is null or blank.
This i achieved with case when statement using
CASE
WHEN IND.KNOWN_AS IS NULL OR ind.KNOWN_AS=''
THEN ind.FORENAMES
ELSE ind.KNOWN_AS
END AS 'Known As'
My issue is in the forename column i have name like Jhon Smith where i would like to extract only John, below is an example what i want to achieve
Desire output c.forename
John Mr John
Jhon Jhon Smith
blank Jo
blank J
So , basically it will only take forname skipping 'Mr', 2nd it should take only forename which has more than 2 character.
My current query is:
Select ind.FORENAMES,
ind.KNOWN_AS,
case when (known_as is null or known_as = '' ) and charindex(' ', forenames) > 2
then substring(forenames, 1, charindex(' ', forenames) - 1) end as FORENAMES2,
output
from individual ind
join member m on m.individual_ref=ind.individual_ref
and m.MEMBERSHIP_NO in ('001','002','003','004','005','006','007')
where m.member_status=33
You could use following case when statement to verify your conditions:
For SQL Server:
case when (c.known_as is null or c.known_as = '' )
and charindex(' ', c.forename) > 3 then substring(c.forename, 1, charindex(' ', c.forename) - 1) end
For MySQL:
case when (c.known_as is null or c.known_as = '' )
and locate(' ', c.forename) > 3 then substring(c.forename, 1, locate(' ', c.forename) - 1) end
Little explanation: if the first name must be longer than 2 characters, that means that first space must occur at least at index 4. And that what the condition is about: locate(' ', c.forename) > 3 or substring(' ', c.forename) > 3
NOTE
You have to first strip down all occurences of Mr, Mrs, Ms in c.forename column, like this (syntax for MySQL and SQL Server):
replace(replace(replace(c.forename, 'Mrs ', ''), 'Mr ', ''), 'Ms ', '')
You have to include it in your query lke this:
Select FORENAMES,
KNOWN_AS,
case when (known_as is null or known_as = '' ) and charindex(' ', FORENAMES2) > 2
then substring(FORENAMES2, 1, charindex(' ', FORENAMES2) - 1) end as FORENAMES2,
output
from (
Select ind.FORENAMES,
ind.KNOWN_AS,
replace(replace(replace(ind.FORENAMES, 'Mrs ', ''), 'Mr ', ''), 'Ms ', '') FORENAMES2,
output
from individual ind
join member m on m.individual_ref = ind.individual_ref
where m.member_status=33
and m.MEMBERSHIP_NO in ('001','002','003','004','005','006','007')
)
Try this:
DECLARE #DataSource TABLE
(
[name] VARCHAR(32)
);
INSERT INTO #DataSource ([name])
VALUES (' Mr John ')
,('Jhon Smith')
,(' Jo ')
,(' J ');
WITH SanitizeDataSoruce ([name], [name_reversed]) AS
(
SELECT LTRIM(RTRIM([name]))
,REVERSE(LTRIM(RTRIM([name])))
FROM #DataSource
)
SELECT [name]
,CASE
WHEN CHARINDEX(' ', [name]) > 1 THEN REVERSE(SUBSTRING([name_reversed], 0, CHARINDEX(' ', [name_reversed])))
ELSE ''
END
FROM SanitizeDataSoruce;