Retrieve initials from a SQL Server Table - sql

I've been working on treating a sql table, and splitting the data. I've come to splitting some initials from the last name. The only problem is, the initials are spaced out. For example (data from my table)
Hanse J S P > J S P are the initials
Gerson B D V > B D V are the initials
J D Timberland > J D are the initials
So basically, it's up to four initials, that can be either at the begin, middle, or end of the string. I'm at a loss as to how I should import these. into a seperate column where the result will be:
COL A | COL B
J S P | Jansen
B D V | Gerson
J D | Timberland
Can anyone please point me in the right direction? I'm using SQL Server.

Here's a rather hamfisted way of doing it by abusing the Parsename function. The big caveat here is that Parsename is limited to 4 tokens so J S P Jansen will work but J S P C Jansen or John J S P Jansen will not.
With parsedname AS
(
SELECT
PARSENAME(replace(name, ' ', '.'), 1) name1,
PARSENAME(replace(name, ' ', '.'), 2) name2,
PARSENAME(replace(name, ' ', '.'), 3) name3,
PARSENAME(replace(name, ' ', '.'), 4) name4
FROM yourtable
)
SELECT
CASE WHEN LEN(name4) = 1 THEN name4 ELSE '' END +
CASE WHEN LEN(name3) = 1 THEN name3 ELSE '' END +
CASE WHEN LEN(name2) = 1 THEN name2 ELSE '' END +
CASE WHEN LEN(name1) = 1 THEN name1 ELSE '' END as initials,
CASE WHEN LEN(name1) > 1 THEN name1
WHEN LEN(name2) > 1 THEN name2
WHEN LEN(name3) > 1 THEN name3
WHEN LEN(name4) > 1 THEN name4
END as surname
FROM parsedname
Here is a sqlfiddle of this in action
CREATE TABLE NAMES (name varchar(50));
INSERT INTO NAMES VALUES ('J S P Jansen');
INSERT INTO NAMES VALUES ('B D V Gerson');
INSERT INTO NAMES VALUES ('J D Timberland');
With parsedname AS
(
SELECT
PARSENAME(replace(name, ' ', '.'), 1) name1,
PARSENAME(replace(name, ' ', '.'), 2) name2,
PARSENAME(replace(name, ' ', '.'), 3) name3,
PARSENAME(replace(name, ' ', '.'), 4) name4
FROM names
)
SELECT
CASE WHEN LEN(name4) = 1 THEN name4 ELSE '' END +
CASE WHEN LEN(name3) = 1 THEN name3 ELSE '' END +
CASE WHEN LEN(name2) = 1 THEN name2 ELSE '' END +
CASE WHEN LEN(name1) = 1 THEN name1 ELSE '' END as initials,
CASE WHEN LEN(name1) > 1 THEN name1
WHEN LEN(name2) > 1 THEN name2
WHEN LEN(name3) > 1 THEN name3
WHEN LEN(name4) > 1 THEN name4
END as surname
FROM parsedname
+----------+------------+
| initials | surname |
+----------+------------+
| JSP | Jansen |
| BDV | Gerson |
| JD | Timberland |
+----------+------------+
If a space is needed in between those letters you can just flip around that CASE statement to something like:
TRIM(CASE WHEN LEN(name4) = 1 THEN name4 + ' ' ELSE '' END +
CASE WHEN LEN(name3) = 1 THEN name3 + ' ' ELSE '' END +
CASE WHEN LEN(name2) = 1 THEN name2 + ' ' ELSE '' END +
CASE WHEN LEN(name1) = 1 THEN name1 + ' ' ELSE '' END) as initials
SQLFiddle with the spaces
+----------+------------+
| initials | surname |
+----------+------------+
| J S P | Jansen |
| B D V | Gerson |
| J D | Timberland |
+----------+------------+

This one uses CHARINDEX and recursive CTE to extract space delimited substrings from name:
Find the substring before the first space
Feed the remaining substring to the same CTE
Once you have the substrings, it is only a matter of gluing them back:
WITH yourdata(FullName) AS (
SELECT 'Hanse J S P' UNION
SELECT 'Gerson B D V' UNION
SELECT 'J D Timberland' UNION
SELECT 'TEST 1 TEST 2 TEST 3'
), cte AS (
SELECT
FullName,
CASE WHEN Pos1 = 0 THEN FullName ELSE SUBSTRING(FullName, 1, Pos1 - 1) END AS LeftPart,
CASE WHEN Pos1 = 0 THEN Null ELSE SUBSTRING(FullName, Pos1 + 1, Pos2 - Pos1) END AS NextPart,
1 AS PartSort
FROM yourdata
CROSS APPLY (SELECT CHARINDEX(' ', FullName) AS Pos1, LEN(FullName) AS Pos2) AS CA
UNION ALL
SELECT
FullName,
CASE WHEN Pos1 = 0 THEN NextPart ELSE SUBSTRING(NextPart, 1, Pos1 - 1) END,
CASE WHEN Pos1 = 0 THEN Null ELSE SUBSTRING(NextPart, Pos1 + 1, Pos2 - Pos1) END,
PartSort + 1
FROM cte
CROSS APPLY (SELECT CHARINDEX(' ', NextPart) AS Pos1, LEN(NextPart) AS Pos2) AS CA
WHERE NextPart IS NOT NULL
)
SELECT yourdata.FullName, STUFF(CA1.XMLStr, 1, 1, '') AS Initials, STUFF(CA2.XMLStr, 1, 1, '') AS Names
FROM yourdata
CROSS APPLY (
SELECT CONCAT(' ', LeftPart)
FROM cte
WHERE FullName = yourdata.FullName AND LEN(LeftPart) = 1
ORDER BY PartSort
FOR XML PATH('')
) AS CA1(XMLStr)
CROSS APPLY (
SELECT CONCAT(' ', LeftPart)
FROM cte
WHERE FullName = yourdata.FullName AND LEN(LeftPart) > 1
ORDER BY PartSort
FOR XML PATH('')
) AS CA2(XMLStr)
Result:
| FullName | Initials | Names |
|----------------------|----------|----------------|
| Gerson#B#D#V | B D V | Gerson |
| Hanse#J#S#P | J S P | Hanse |
| J#D#Timberland | J D | Timberland |
| TEST#1#TEST#2#TEST#3 | 1 2 3 | TEST TEST TEST |

Similar to JNevil's answer (+1), but not limited to 4 tokens.
Example
Declare #YourTable table (SomeCol varchar(50))
Insert Into #YourTable values
('Hanse J S P')
,('Gerson B D V')
,('J D Timberland')
,('J D Timberland / J R R Tolkien')
Select A.SomeCol
,ColA = ltrim(
concat(IIF(len(Pos1)=1,' '+Pos1,null)
,IIF(len(Pos2)=1,' '+Pos2,null)
,IIF(len(Pos3)=1,' '+Pos3,null)
,IIF(len(Pos4)=1,' '+Pos4,null)
,IIF(len(Pos5)=1,' '+Pos5,null)
,IIF(len(Pos6)=1,' '+Pos6,null)
,IIF(len(Pos7)=1,' '+Pos7,null)
,IIF(len(Pos8)=1,' '+Pos8,null)
,IIF(len(Pos9)=1,' '+Pos9,null)
)
)
,ColB = ltrim(
concat(IIF(Pos1 not Like '[a-z]',' '+Pos1,null)
,IIF(Pos2 not Like '[a-z]',' '+Pos2,null)
,IIF(Pos3 not Like '[a-z]',' '+Pos3,null)
,IIF(Pos4 not Like '[a-z]',' '+Pos4,null)
,IIF(Pos5 not Like '[a-z]',' '+Pos5,null)
,IIF(Pos6 not Like '[a-z]',' '+Pos6,null)
,IIF(Pos7 not Like '[a-z]',' '+Pos7,null)
,IIF(Pos8 not Like '[a-z]',' '+Pos8,null)
,IIF(Pos9 not Like '[a-z]',' '+Pos9,null)
)
)
From #YourTable A
Cross Apply (
Select Pos1 = xDim.value('/x[1]','varchar(max)')
,Pos2 = xDim.value('/x[2]','varchar(max)')
,Pos3 = xDim.value('/x[3]','varchar(max)')
,Pos4 = xDim.value('/x[4]','varchar(max)')
,Pos5 = xDim.value('/x[5]','varchar(max)')
,Pos6 = xDim.value('/x[6]','varchar(max)')
,Pos7 = xDim.value('/x[7]','varchar(max)')
,Pos8 = xDim.value('/x[8]','varchar(max)')
,Pos9 = xDim.value('/x[9]','varchar(max)')
From (Select Cast('<x>' + replace(SomeCol,' ','</x><x>')+'</x>' as xml) as xDim) as A
) B
Returns
SomeCol ColA ColB
Hanse J S P J S P Hanse
Gerson B D V B D V Gerson
J D Timberland J D Timberland
J D Timberland / J R R Tolkien J D / J R R Timberland / Tolkien

I used some built-in functions for this. The general idea is to use string_split to split the string into rows, use ROW_NUMBER to save the order according to length and the char(s) position in the string, then use FOR XML PATH() to concatenate from rows to a single column.
--Assume your data structure
DECLARE #temp TABLE (thestring varchar(1000))
INSERT INTO #temp VALUES
('Hanse J S P'), ('Gerson B D V'), ('J D Timberland')
;WITH CTE AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY thestring ORDER BY thestring, LEN(value) ASC, pos ASC) [order]
FROM (
SELECT *
, value AS [theval]
, CHARINDEX(CASE WHEN len(value) = 1 THEN ' ' + value ELSE value END, thestring) AS [pos]
FROM #temp CROSS APPLY string_split(thestring, ' ')
) AS dT
)
SELECT ( SELECT value + ' ' AS [text()]
FROM cte
WHERE cte.thestring = T.thestring
AND LEN(theval) = 1
FOR XML PATH('')
) AS [COL A]
,( SELECT value + ' ' AS [text()]
FROM cte
WHERE cte.thestring = T.thestring
AND LEN(theval) > 1
FOR XML PATH('')
) AS [COL B]
FROM #temp T
GROUP BY thestring
Produces output:
COL A COL B
----- -----
B D V Gerson
J S P Hanse
J D Timberland

Which version of SQL Server do you have? Is STRING_SPLIT() available?
If yes, split using the space as a delimiter, iterate through the resulting strings, evaluate their length and concatenate a result string with the string when said string is one character in length and is a letter.
Add a space before unless the result string is so far empty.
If STRING_SPLIT() is not available... Well... Here are a few solutions:
T-SQL split string based on delimiter
-- Addendum
To your second part of the question (which did not originally exist when I originally posted my reply) where you would like to isolate the non-initials part into a second column, I would basically separate two blocks of logic with two result strings based on the length of each element.
Note: this is not going to be very elegant in pre-2016 SQL Server and may even require a CURSOR (sigh)
I know I am going to be downvoted for mentioning a cursor.

Related

Pickup words in text field - SQL server

My data has a text field and I want to pick up the keywords in the list of: 'mr' , 'jr', 'dr', 'ii'
these words need to have space in front and at the end.
So with the data below, the output should be:
id|text|keyword1|keyword2|keyword3|keyword4
1, 'xxxx', 'jr','mr','ii'
2, 'xxxx','mr','',''
Thank you for helping.
HHC
Create TABLE have (
id int,
text varchar(225)
);
Insert into have (id,text) values (1,'monday jr due date mr ii final');
Insert into have (id,text) values (2,'happy new year mr J');
You can use CHARINDEX() function within conditionals such as
SELECT id, text,
CASE WHEN CHARINDEX(' jr ',text) > 0 THEN 'jr' END AS keyword1,
CASE WHEN CHARINDEX(' mr ',text) > 0 THEN 'mr' END AS keyword2,
CASE WHEN CHARINDEX(' ii ',text) > 0 THEN 'ii' END AS keyword3,
CASE WHEN CHARINDEX(' dr ',text) > 0 THEN 'dr' END AS keyword4
FROM have
First grab a copy of Ngrams8K.
Next you can do this:
SELECT
h.Id,
h.[text],
ng.Token,
Keyword = ROW_NUMBER() OVER (PARTITION BY h.Id ORDER BY ng.Position)
FROM dbo.have AS h
CROSS APPLY dbo.NGrams8k(h.[text], 4) AS ng
WHERE ng.token IN (' mr ' , ' jr ', ' dr ', ' ii ');
Returns:
Id text Token Keyword
---- -------------------------------- ----------------------------
1 monday jr due date mr ii final jr 1
1 monday jr due date mr ii final mr 2
1 monday jr due date mr ii final ii 3
2 happy new year mr J mr 1
A simple modification:
SELECT
f.Id,
f.[Text],
Keyword1 = MAX(CASE f.Keyword WHEN 1 THEN f.Token ELSE '' END),
Keyword2 = MAX(CASE f.Keyword WHEN 2 THEN f.Token ELSE '' END),
Keyword3 = MAX(CASE f.Keyword WHEN 3 THEN f.Token ELSE '' END),
Keyword4 = MAX(CASE f.Keyword WHEN 4 THEN f.Token ELSE '' END)
FROM
(
SELECT h.Id, h.[text], ng.Token, Keyword =
ROW_NUMBER() OVER (PARTITION BY h.Id ORDER BY ng.Position)
FROM dbo.have AS h
CROSS APPLY dbo.NGrams8k(h.[text], 4) AS ng
WHERE ng.token IN (' mr ' , ' jr ', ' dr ', ' ii ')
) AS f
GROUP BY f.Id, f.[Text]
ORDER BY f.Id;
Returns:
Id Text Keyword1 Keyword2 Keyword3 Keyword4
---- ------------------------------- -------------- ------------ ------------- ------------
1 monday jr due date mr ii final jr mr ii
2 happy new year mr J mr

What is the SQL code for aggregating values?

I have the following table:
GR WORD NO.
1 A 4
2 B 5
3 C 6
1 G 5
2 H 5
3 I 5
I would like to get the following table:
GR 4 5 6
1 1 1 0
2 0 2 0
3 0 1 1
For each GR column value I count the NO. values.
Here's a dynamic solution:
--Sample data
--CREATE TABLE tbl (GR int, WORD char(1), [NO] int)
--INSERT INTO tbl values
--(1,'A',4),
--(2,'B',5),
--(3,'C',6),
--(1,'G',5),
--(2,'H',5),
--(3,'I',5)
DECLARE #sql NVARCHAR(MAX)
SELECT #sql = '
SELECT *
FROM tbl
PIVOT(
COUNT(WORD) FOR [NO] IN (' +
(SELECT STUFF(
(
SELECT DISTINCT ',' + QUOTENAME(CAST([NO] AS VARCHAR(10)))
FROM tbl
FOR XML PATH('')
)
, 1, 1, ''))
+ ')
) p
'
EXEC sp_executesql #sql
This is a conditional aggregation
select
GR
,[4] = count(case when NO. = 4 then WORD end)
,[5] = count(case when NO. = 5 then WORD end)
,[6] = count(case when NO. = 6 then WORD end)
from YourTable
group by GR
Or a pivot
select *
from YourTable
pivot(
count(WORD) for NO. in ([4],[5],[6])
) p

Split comma separated values in sql based on condition [duplicate]

This question already has answers here:
T-SQL split string
(27 answers)
Closed 5 years ago.
Hi all i am newbie in SQL i have a table in which there is a column named dilution_name in this column there are values coming in comma separated format like A,B,C etc. also these values may vary like in some row the values are A,B,C and in some case its like A,B,C,D i just want to separate these values and print them in multiple column if there is only 3 comma separated values then there should be 3 values in comma would be written rest should be null
I have tried
select ParsedData.*
from dilution_table mt
cross apply ( select str = mt.dilution_name + ',,' ) f1
cross apply ( select p1 = charindex( ',', str ) ) ap1
cross apply ( select p2 = charindex( ',', str, p1 + 1 ) ) ap2
cross apply ( select p3 = charindex( ',', str, p2 + 2 ) ) ap3
cross apply ( select p4 = charindex( ',', str, p3 + 3 ) ) ap4
cross apply ( select p5 = charindex( ',', str, p4 + 4 ) ) ap5
cross apply ( select p6 = charindex( ',', str, p5 + 5 ) ) ap6
cross apply ( select val1 = substring( str, 1, p1-1 )
, val2 = substring( str, p1+1, p2-p1-1 ),
val3 = substring( str, p2+1, p2-p1-1 ),
val4 = substring( str, p3+1, p2-p1-1 ),
val5 = substring( str, p4+1, p2-p1-1 ),
val6 = substring( str, p5+1, p2-p1-1 ),
val7 = substring( str, p6+1, p2-p1-1 )
) ParsedData
[sample data][1]
sample data
In SQL Server 2016+ you can use string_split() (though it has no ordinal number).
In SQL Server pre-2016, using a CSV Splitter table valued function by Jeff Moden:
declare #str varchar(128) = 'a,b,c,d'
select s.ItemNumber, s.Item
from dbo.delimitedsplit8k(#str,',') s;
rextester demo: http://rextester.com/EGZ24917
returns:
+------------+------+
| ItemNumber | Item |
+------------+------+
| 1 | a |
| 2 | b |
| 3 | c |
| 4 | d |
+------------+------+
To pivot the data after splitting, you can use conditional aggregation like so:
select
v1 = max(case when s.ItemNumber = 1 then s.Item end)
, v2 = max(case when s.ItemNumber = 2 then s.Item end)
, v3 = max(case when s.ItemNumber = 3 then s.Item end)
, v4 = max(case when s.ItemNumber = 4 then s.Item end)
, v5 = max(case when s.ItemNumber = 5 then s.Item end)
from dbo.delimitedsplit8k(#str,',') s;
returns:
+----+----+----+----+------+
| v1 | v2 | v3 | v4 | v5 |
+----+----+----+----+------+
| a | b | c | d | NULL |
+----+----+----+----+------+
splitting strings reference:
Tally OH! An Improved SQL 8K “CSV Splitter” Function - Jeff Moden
Splitting Strings : A Follow-Up - Aaron Bertrand
Split strings the right way – or the next best way - Aaron Bertrand
string_split() in SQL Server 2016 : Follow-Up #1 - Aaron Bertrand
Ordinal workaround for **string_split()** - Solomon Rutzky

Group count values in comma separated field in SQL Server 2008

I have a users table with a column product.
I would like to count how many products are in my table
Users table
+----------+
| Products |
+----------+
| A |
| B |
| A,c |
| C,B,A |
| D |
+----------+
i.e. count for A is: 3, count for B is: 2, count for C is: 2, count for D is: 1
Please try:
SELECT Products, COUNT(Products)
FROM(
SELECT
Split.a.value('.', 'VARCHAR(100)') AS Products
FROM
(
SELECT
CAST ('<M>' + REPLACE(Products, ',', '</M><M>') + '</M>' AS XML) AS CVS
from YourTable
) AS A CROSS APPLY CVS.nodes ('/M') AS Split(a)
)x GROUP BY Products
Use recursive queries - step of recursion splits on 2 columns - l - that contains entry without comma and r - tail of Products, after that make GROUP BY by l column:
WITH expandProd as(
SELECT
CASE
WHEN charindex(',', Products) < 1 THEN Products
ELSE LEFT(Products, charindex(',', Products)-1)
END as l, -- the column without comma
CASE
WHEN charindex(',', Products) < 1 THEN NULL
ELSE RIGHT(Products, LEN(Products) - charindex(',', Products))
END as r -- the column with tail
FROM prods
UNION ALL --recursive query that enters again to itself
SELECT
CASE
WHEN charindex(',', r) < 1 THEN r
ELSE LEFT(r, charindex(',', r)-1)
END as l,
CASE
WHEN charindex(',', r) < 1 THEN NULL
ELSE RIGHT(r, LEN(r) - charindex(',', r))
END as r
FROM expandProd
WHERE r is not null --small optimization
)
SELECT l, COUNT(l)
FROM expandProd
GROUP BY l

Generate comma separated value based on input in sql

I have a table called Rule_X_ListType in the following structure
Rue_ID ListType_ID Value
---------------------------
1 2 319
1 2 400
1 5 8150
1 5 1000
1 3 10211
2 2 400
2 6 10211
3 7 10211
3 3 8051
2 2 319
If I will give the input as Rule_ID = 1 and ListType_ID = 2, then I need the output as a string with values :
319,400
Anybody please help out...Thanks in advance...
I do not feel the neccessity for either the CTE or the FOR XML PATH.
This can be accomplished using the much more simple method of COALESCE
DECLARE #List varchar(100)
SELECT
#List = COALESCE(#List + ', ', '') + CAST(Value AS varchar(10))
FROM
Rule_X_ListType
WHERE
Rule_ID = 1 and ListType_ID = 2
SELECT #List
Try this
;WITH CTE AS
(
SELECT * FROM Rule_X_ListType WHERE Rue_ID = 1 AND ListType_ID = 2
)
SELECT STUFF
(
(SELECT ',' + A.Value FROM CTE A ORDER BY A.VALUE FOR XML PATH('')),1,1,''
) AS CSVValues
SELECT DISTINCT T1.Rule_ID,T1.ListType_ID,STUFF(VAL,1,1,'') AS VALUE
FROM Rule_X_ListType T1
CROSS APPLY (SELECT ',' + CONVERT(VARCHAR,Value)
FROM Rule_X_ListType T2
WHERE T1.Rule_ID =T2.Rule_ID and T1.ListType_ID =T2.ListType_ID
FOR XML PATH(''))A(VAL)
WHERE T1.Rule_ID = 1 and T1.ListType_ID = 2
SQL Tips and Tricks in http://sqlbay.blogspot.in/