Remove the last character in a string in T-SQL? - sql

How do I remove the last character in a string in T-SQL?
For example:
'TEST STRING'
to return:
'TEST STRIN'

e.g.
DECLARE #String VARCHAR(100)
SET #String = 'TEST STRING'
-- Chop off the end character
SET #String =
CASE #String WHEN null THEN null
ELSE (
CASE LEN(#String) WHEN 0 THEN #String
ELSE LEFT(#String, LEN(#String) - 1)
END
) END
SELECT #String

If for some reason your column logic is complex (case when ... then ... else ... end), then the above solutions causes you to have to repeat the same logic in the len() function. Duplicating the same logic becomes a mess. If this is the case then this is a solution worth noting. This example gets rid of the last unwanted comma. I finally found a use for the REVERSE function.
select reverse(stuff(reverse('a,b,c,d,'), 1, 1, ''))

Try this:
select substring('test string', 1, (len('test string') - 1))

If your string is empty,
DECLARE #String VARCHAR(100)
SET #String = ''
SELECT LEFT(#String, LEN(#String) - 1)
then this code will cause error message 'Invalid length parameter passed to the substring function.'
You can handle it this way:
SELECT LEFT(#String, NULLIF(LEN(#String)-1,-1))
It will always return result, and NULL in case of empty string.

This will work even when source text/var is null or empty:
SELECT REVERSE(SUBSTRING(REVERSE(#a), 2, 9999))

select left('TEST STRING', len('TEST STRING')-1)

#result = substring(#result, 1, (LEN(#result)-1))

This is quite late, but interestingly never mentioned yet.
select stuff(x,len(x),1,'')
ie:
take a string x
go to its last character
remove one character
add nothing

If your coloumn is text and not varchar, then you can use this:
SELECT SUBSTRING(#String, 1, NULLIF(DATALENGTH(#String)-1,-1))

If you want to do this in two steps, rather than the three of REVERSE-STUFF-REVERSE, you can have your list separator be one or two spaces. Then use RTRIM to trim the trailing spaces, and REPLACE to replace the double spaces with ','
select REPLACE(RTRIM('a b c d '),' ', ', ')
However, this is not a good idea if your original string can contain internal spaces.
Not sure about performance. Each REVERSE creates a new copy of the string, but STUFF is a third faster than REPLACE.
also see this

I can suggest this -hack- ;).
select
left(txt, abs(len(txt + ',') - 2))
from
t;
SQL Server Fiddle Demo

you can create function
CREATE FUNCTION [dbo].[TRUNCRIGHT] (#string NVARCHAR(max), #len int = 1)
RETURNS NVARCHAR(max)
AS
BEGIN
IF LEN(#string)<#len
RETURN ''
RETURN LEFT(#string, LEN(#string) - #len)
END

Get the last character
Right(#string, len(#String) - (len(#String) - 1))

Try this
DECLARE #String VARCHAR(100)
SET #String = 'TEST STRING'
SELECT LEFT(#String, LEN(#String) - 1) AS MyTrimmedColumn

I encountered this problem and this way my problem was solved:
Declare #name as varchar(30)='TEST STRING'
Select left(#name, len(#name)-1) as AfterRemoveLastCharacter

My answer is similar to the accepted answer, but it also check for Null and Empty String.
DECLARE #String VARCHAR(100)
SET #String = 'asdfsdf1'
-- If string is null return null, else if string is empty return as it is, else chop off the end character
SET #String = Case #String when null then null else (case LEN(#String) when 0 then #String else LEFT(#String, LEN(#String) - 1) end ) end
SELECT #String

declare #string varchar(20)= 'TEST STRING'
Select left(#string, len(#string)-1) as Tada
output:
Tada
--------------------
TEST STRIN

I love #bill-hoenig 's answer; however, I was using a subquery and I got caught up because the REVERSE function needed two sets of parentheses. Took me a while to figure that one out!
SELECT
-- Return comma delimited list of all payment reasons for this Visit
REVERSE(STUFF(REVERSE((
SELECT DISTINCT
CAST(CONVERT(varchar, r1.CodeID) + ' - ' + c.Name + ', ' AS VARCHAR(MAX))
FROM VisitReason r1
LEFT JOIN ReasonCode c ON c.ID = r1.ReasonCodeID
WHERE p.ID = r1.PaymentID
FOR XML PATH('')
)), 1, 2, '')) ReasonCode
FROM Payments p

To update the record by trimming the last N characters of a particular column:
UPDATE tablename SET columnName = LEFT(columnName , LEN(columnName )-N) where clause

Try It :
DECLARE #String NVARCHAR(100)
SET #String = '12354851'
SELECT LEFT(#String, NULLIF(LEN(#String)-1,-1))

declare #x varchar(20),#y varchar(20)
select #x='sam'
select
case when #x is null then #y
when #y is null then #x
else #x+','+#y
end
go
declare #x varchar(20),#y varchar(20)
select #x='sam'
--,#y='john'
DECLARE #listStr VARCHAR(MAX)
SELECT #listStr = COALESCE(#x + ', ' ,'') +coalesce(#y+',','')
SELECT left(#listStr,len(#listStr)-1)

Try this,
DECLARE #name NVARCHAR(MAX) SET #name='xxxxTHAMIZHMANI****'SELECT Substring(#name, 5, (len(#name)-8)) as UserNames
And the output will be like, THAMIZHMANI

Related

Removing zeros after a dash

I have data in a column formatted like:
012345-0001
001234-0011
123456-0111
000012-1234
i need a query to return the data like:
012345-1
001234-11
123456-111
000012-1234
i have created a function to remove a varying number of leading zeros, but can't figure out how to do it after the dash.
Thanks for any assistance
Function:
DECLARE #Output varchar(8000), #Value int, #Input varchar(8000)
SELECT
#Value = CASE
WHEN #Input LIKE '%[A-Z]%' THEN -1
ELSE CONVERT(int, RIGHT([matcode], 4))
END
FROM matter
WHERE matcode = #Input
IF (#Value = -1) SET #Output = #Input
ELSE IF (#Value BETWEEN 1 AND 999) SET #Output = RIGHT('000' + CONVERT(varchar(3), #Value), 3)
ELSE IF (#Value BETWEEN 1000 AND 9999) SET #Output = RIGHT('0000' + CONVERT(varchar(4), #Value), 4)
ELSE SET #Output = #Input
RETURN #Output
This should work, but can't test it..
DECLARE #Output varchar(8000), #Value int, #Input varchar(8000)
SELECT
#Value = CASE
WHEN #Input LIKE '%[A-Z]%' THEN -1
ELSE CONVERT(int, RIGHT([matcode], 4))
END
FROM matter
WHERE matcode = #Input
IF (#Value = -1) SET #Output = #Input
ELSE SET #Output = CONCAT( LEFT(#Input, 7), #Value )
RETURN #Output
Use below code to get the result.
select substr('001234-0011', 0, instr('001234-0011', '-') ) ||
replace(substr('001234-0011', instr('001234-0011', '-')+1), '0', '') from dual;
Use this if SqlServer as you have mentioned tSql
select substring('001234-0011', 0, CHARINDEX('-', '001234-0011')) + replace(substring('001234-0011', CHARINDEX('-', '001234-0011'), 10), '0', '')
The example below simply uses a CASE and a LIKE to check if the string ends with dash and 4 digits .
And a concat of an INT doesn't add leading zero's.
declare #Table table (col varchar(30));
insert into #Table (col) values
('012345-0001'),
('001234-0011'),
('123456-0111'),
('000012-1234');
select col as [in],
(case
when col like '%-0[0-9][0-9][0-9]'
then concat(left(col,len(col)-4), cast(right(col,4) as int))
else col
end) as [out]
from #Table;
Returns:
in out
012345-0001 012345-1
001234-0011 001234-11
123456-0111 123456-111
000012-1234 000012-1234
A test here
Using the varchar variables it could be used like this:
DECLARE #Input varchar(30), #Output varchar(30);
SET #Input = '012345-0001';
SET #Output = case when #Input like '%-0[0-9][0-9][0-9]' then concat(left(#Input,len(#Input)-4), cast(right(#Input,4) as int)) else #Input end;
Try this:
declare #var varchar(30)
set #var = '012345-0021'
select concat(
left(#var,charindex('-',#var)-1), --keeps first side 'as is'
'-',
right( --for the second part
right(#var,charindex('-',reverse(#var))),
charindex( '0', reverse(right(#var,4)) )-1 --finds the position first '0' of the reversed string (the last 0)
)
) as output
output: 012345-21
Try it on db<>fiddle: link
Yet another option is use use parsename() with a concat
Example
Select *
,NewValue = concat( left(YourCol,charindex('-',YourCol+'-') )
,try_convert(int,parseName(replace(YourCol,'-','.'),1))
)
from YourTable
Returns
YourCol NewValue
012345-0001 012345-1
001234-0011 001234-11
123456-0111 123456-111
000012-1234 000012-1234
I noticed that most of the solutions assume that the each part of the text which is seems like a number, is on the length of 4 char.
Here is another solution Based on PARSENAME function which is a bit more flexible:
declare #Table table (col varchar(30));
insert into #Table (col) values
('012345-0001'),
('001234-0011'),
('123456-0111'),
('000012-1234')
SELECT
PARSENAME(REPLACE(col,'-','.'), 2) +
'-' +
CONVERT(VARCHAR(10), CONVERT(INT, PARSENAME(REPLACE(col,'-','.'), 1)))
FROM #Table
What about a one-liner using XQuery:
DECLARE #mockup TABLE (YourValue VARCHAR(30));
INSERT INTO #mockup VALUES
('012345-0001'),
('001234-0011'),
('123456-0111'),
('000012-1234');
--This is the query
SELECT YourValue
,CAST('<x>' + REPLACE(YourValue,'-','</x><x>') + '</x>' AS XML)
.value('concat(/x[1],"-",string(xs:int(/x[2])))','varchar(30)')
FROM #mockup;
The query will first transform your 012345-001 into <x>012345</x><x>001</x> with simple string replacements. The actual magic is happening within the XQuery expression.

Read each string that follows a char in sql

So let us say you have a string:
set #string = 'aaa,2,dqw,3,asdad,5,4'
I would like to read the chars that are after a char and a ","
So the result to this string would be:
Result
--------
2
3
5
How could I do this?is there a way to use CHARINDEX for this?
If your string is just like your example, using Charindex(',', <string>) works too.
Otherwise, use PATINDEX. It functions similarly,but you can also set it to recognize all numeric characters.
IF PATINDEX('%[0-9]%', #String') <> 0
THEN BEGIN
SET #string = SUBSTRING(#string, PATINDEX('%[0-9]%', #string), LEN(#string) )
SET #var = SUBSTRING(#string, PATINDEX('%[^0-9]%', #string) )
END
you now can do as you please with those variables.
To your exception, just use one more case statement and rid of it. No reason the code has to be too complicated.
One more Approach using numbers table and split string functions
declare #string varchar(max)
set #string = 'aaa,2,dqw,3,asdad,5,4'
;with cte
as
(
select * from [dbo].[SplitStrings_Numbers](#string,',')
)
select
* from cte where isnumeric(item)=1
Output:
2
3
5
4
if you are sure about no special characters in your data..You can use above,,but some times using NUMERIC tends to show some characters as numbers
SELECT
ISNUMERIC('123') as '123' --1
,ISNUMERIC('.') as '.' --Period ---1
,ISNUMERIC(',') as ',' --Comma ---1
In this case,you can use TRY_Parse available from SQL server 2012..
declare #string varchar(max)
set #string = 'aaa,2,dqw,3,asdad,5,4'
;with cte
as
(
select b.* from [dbo].[SplitStrings_Numbers](#string,',') a
cross apply
(select try_parse(a.item as int) ) b(val)
)
select
* from cte where val is not null

Get everything after and before certain character in SQL Server

I got the following entry in my database:
images/test.jpg
I want to trim the entry so I get: test
So basically, I want everything after / and before .
How can I solve it?
use the following function
left(#test, charindex('/', #test) - 1)
If you want to get this out of your table using SQL, take a look at the following functions that will help you: SUBSTRING and CHARINDEX. You can use those to trim your entries.
A possible query will look like this (where col is the name of the column that contains your image directories:
SELECT SUBSTRING(col, LEN(SUBSTRING(col, 0, LEN(col) - CHARINDEX ('/', col))) + 1,
LEN(col) - LEN(SUBSTRING(col, 0, LEN(col) - CHARINDEX ('/', col))) - LEN(SUBSTRING(
col, CHARINDEX ('.', col), LEN(col))));
Bit of an ugly beast. It also depends on the standard format of 'dir/name.ext'.
Edit:
This one (inspired by praveen) is more generic and deals with extensions of different length:
SELECT SUBSTRING(col, LEN(LEFT(col, CHARINDEX ('/', col))) + 1, LEN(col) - LEN(LEFT(col,
CHARINDEX ('/', col))) - LEN(RIGHT(col, LEN(col) - CHARINDEX ('.', col))) - 1);
Before
SELECT SUBSTRING(ParentBGBU,0,CHARINDEX('/',ParentBGBU,0)) FROM dbo.tblHCMMaster;
After
SELECT SUBSTRING(ParentBGBU,CHARINDEX('-',ParentBGBU)+1,LEN(ParentBGBU)) FROM dbo.tblHCMMaster
----select characters before / including /
select SUBSTRING ('abcde/wxyz',0,CHARINDEX('/','abcde/wxyz')+1)
--select characters after / including /
select SUBSTRING('abcde/wxyz',CHARINDEX('/','abcde/wxyz'),LEN('abcde/wxyz'))
declare #T table
(
Col varchar(20)
)
insert into #T
Select 'images/test1.jpg'
union all
Select 'images/test2.png'
union all
Select 'images/test3.jpg'
union all
Select 'images/test4.jpeg'
union all
Select 'images/test5.jpeg'
Select substring( LEFT(Col,charindex('.',Col)-1),charindex('/',Col)+1,len(LEFT(Col,charindex('.',Col)-1))-1 )
from #T
I have made a method which is much more general :
so :
DECLARE #a NVARCHAR(MAX)='images/test.jpg';
--Touch here
DECLARE #keysValueToSearch NVARCHAR(4000) = '/'
DECLARE #untilThisCharAppears NVARCHAR(4000) = '.'
DECLARE #keysValueToSearchPattern NVARCHAR(4000) = '%' + #keysValueToSearch + '%'
--Nothing to touch here
SELECT SUBSTRING(
#a,
PATINDEX(#keysValueToSearchPattern, #a) + LEN(#keysValueToSearch),
CHARINDEX(
#untilThisCharAppears,
#a,
PATINDEX(#keysValueToSearchPattern, #a) + LEN(#keysValueToSearch)
) -(PATINDEX(#keysValueToSearchPattern, #a) + LEN(#keysValueToSearch))
)
SELECT Substring('ravi1234#gmail.com', 1, ( Charindex('#', 'ravi1234#gmail.com')
- 1 ))
Before,
RIGHT('ravi123#gmail.com', ( Charindex('#', 'ravi123#gmail.com') + 1 ))
After
I just did this in one of my reports and it was very simple.
Try this:
=MID(Fields!.Value,8,4)
Note: This worked for me because the value I was trying to get was a constant not sure it what you are trying to get is a constant as well.
I know this has been a while.. but here is an idea
declare #test varchar(25) = 'images/test.jpg'
select
#test as column_name
, parsename(replace(#test,'/','.'),1) as jpg
,parsename(replace(#test,'/','.'),2) as test
,parsename(replace(#test,'/','.'),3) as images
I found Royi Namir's answer useful but expanded upon it to create it as a function. I renamed the variables to what made sense to me but you can translate them back easily enough, if desired.
Also, the code in Royi's answer already handled the case where the character being searched from does not exist (it starts from the beginning of the string), but I wanted to also handle cases where the character that is being searched to does not exist.
In that case it acts in a similar manner by starting from the searched from character and returning the rest of the characters to the end of the string.
CREATE FUNCTION [dbo].[getValueBetweenTwoStrings](#inputString
NVARCHAR(4000), #stringToSearchFrom NVARCHAR(4000), #stringToSearchTo
NVARCHAR(4000))
RETURNS NVARCHAR(4000)
AS
BEGIN
DECLARE #retVal NVARCHAR(4000)
DECLARE #stringToSearchFromSearchPattern NVARCHAR(4000) = '%' +
#stringToSearchFrom + '%'
SELECT #retVal = SUBSTRING (
#inputString,
PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom),
(CASE
CHARINDEX(
#stringToSearchTo,
#inputString,
PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom))
WHEN
0
THEN
LEN(#inputString) + 1
ELSE
CHARINDEX(
#stringToSearchTo,
#inputString,
PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom))
END) - (PATINDEX(#stringToSearchFromSearchPattern, #inputString) + LEN(#stringToSearchFrom))
)
RETURN #retVal
END
Usage:
SELECT dbo.getValueBetweenTwoStrings('images/test.jpg','/','.') AS MyResult
I got some invalid length errors. So i made this function, this should not give any length problems. Also when you do not find the searched text it will return a NULL.
CREATE FUNCTION [FN].[SearchTextGetBetweenStartAndStop](#string varchar(max),#SearchStringToStart varchar(max),#SearchStringToStop varchar(max))
RETURNS varchar(max)
BEGIN
SET #string = CASE
WHEN CHARINDEX(#SearchStringToStart,#string) = 0
OR CHARINDEX(#SearchStringToStop,RIGHT(#string,LEN(#string) - CHARINDEX(#SearchStringToStart,#string) + 1 - LEN(#SearchStringToStart))) = 0
THEN NULL
ELSE SUBSTRING(#string
,CHARINDEX(#SearchStringToStart,#string) + LEN(#SearchStringToStart) + 1
,(CHARINDEX(#SearchStringToStop,RIGHT(#string,LEN(#string) - CHARINDEX(#SearchStringToStart,#string) + 1 - LEN(#SearchStringToStart)))-2)
)
END
RETURN #string
END
if Input= pg102a-wlc01s.png.intel.com and Output should be pg102a-wlc01s
we can use below query :
select Substring(pc.name,0,charindex('.',pc.name,0)),pc.name from tbl_name pc
You can try this:
Declare #test varchar(100)='images/test.jpg'
Select REPLACE(RIGHT(#test,charindex('/',reverse(#test))-1),'.jpg','')
Below query gives you data before '-'
Ex- W12345A-4S
SELECT SUBSTRING(Column_Name,0, CHARINDEX('-',Column_Name)) as 'new_name'
from [abc].
Output - W12345A
Inspired by the work of Josien, I wondered about a simplification.
Would this also work? Much shorter:
SELECT SUBSTRING(col, CHARINDEX ('/', col) + 1, CHARINDEX ('.', col) - CHARINDEX ('/', col) - 1);
(I can't test right now because of right issues at my company SQL server, which is a problem in its own right)
Simply Try With LEFT ,RIGHT ,CHARINDEX
select
LEFT((RIGHT(a.name,((CHARINDEX('/', name))+1))),((CHARINDEX('.', (RIGHT(a.name,
((CHARINDEX('/', name))+1)))))-1)) splitstring,
a.name
from
(select 'images/test.jpg' as name)a
declare #searchStart nvarchar(100) = 'search ';
declare #searchEnd nvarchar(100) = ' ';
declare #string nvarchar(4000) = 'This is a string to search (hello) in this text ';
declare #startIndex int = CHARINDEX(#searchStart, #string,0) + LEN(#searchStart);
declare #endIndex int = CHARINDEX(#searchEnd, #string, #startIndex + 1);
declare #length int = #endIndex - #startIndex;
declare #sub nvarchar(4000) = SUBSTRING(#string, #startIndex, #length)
select #startIndex, #endIndex, #length, #sub
This is a little more legible than the one-liners in this answer which specifically answer the question, but not in a generic way that would benefit all readers. This could easily be made into a function as well with a slight modification.
If there are more than one or none occurences of given character use this:
DECLARE #rightidx int = CASE
WHEN 'images/images/test.jpg' IS NULL OR (CHARINDEX('.', 'images/images/test.jpg')) <= 0 THEN LEN('images/images/test.jpg')
ELSE (CHARINDEX('.', REVERSE('images/images/test.jpg')) - 1)
END
SELECT RIGHT('images/images/test.jpg', #rightidx)
This was the approach I took.
CREATE FUNCTION dbo.get_text_before_char(#my_string nvarchar(255),#my_char char(1))
RETURNS nvarchar(255)
AS
BEGIN;
return IIF(#my_string LIKE '%' + #my_char + '%',left (#my_string, IIF(charindex(#my_char, #my_string) - 1<1,1,charindex(#my_char, #my_string) - 1)),'');
END;
CREATE FUNCTION dbo.get_text_after_char(#my_string nvarchar(255),#my_char char(1))
RETURNS nvarchar(255)
AS
BEGIN;
return IIF ( #my_string LIKE '%' + #my_char + '%' ,RIGHT ( #my_string , IIF ( charindex ( #my_char ,reverse(#my_string) )-1 < 1 ,1 ,charindex ( #my_char ,reverse(#my_string) )-1 ) ) , '' )
END;
SELECT
dbo.get_text_before_char('foo-bar','-')
, dbo.get_text_after_char('foo-bar','-')
declare #test varchar(100)='images/test.jpg'
select right(left(#test, charindex('.', #test) - 1),4)

How to get the numeric part from a string using T-SQL?

I have the following string.
Input
--------------
2030031469-NAI
To get the numeric part, I am using the following script
declare #str varchar(50)= '2030031469-NAI'
Select
#str
,SUBSTRING(#str, 1, NULLIF(CHARINDEX('-', #str) - 1, -1))
,Left(#str,PATINDEX('%-%',#str)-1)
to get the following output
Output:
----------
2030031469
Is there any other easy/elegant way of doing the same?
select left(#str, patindex('%[^0-9]%', #str+'.') - 1)
In case your string start with alphabet and end with number like ERT-123
you can use this query:
(select substring(#str,patindex('%[0-9]%', #str),len(#str)))
Please check with this, i used in my project for extracting phone numbers
CREATE Function [dbo].[RemoveNonNumericCharacters](#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
While PatIndex('%[^0-9]%', #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex('%[^0-9]%', #Temp), 1, '')
Return #TEmp
End
To extract number from an unformatted string
DECLARE #Text NVARCHAR(100)= 'extract only 23124R integer ##%%'
SELECT SUBSTRING(#Text, PATINDEX('%[0-9]%',#Text), PATINDEX('%[^0-9]%',SUBSTRING(#Text, PATINDEX('%[0-9]%',#Text), LEN(#Text)))-1) [ONLY_INT]

SQL: problem word count with len()

I am trying to count words of text that is written in a column of table. Therefor I am using the following query.
SELECT LEN(ExtractedText) -
LEN(REPLACE(ExtractedText, ' ', '')) + 1 from EDDSDBO.Document where ID='100'.
I receive a wrong result that is much to high.
On the other hand, if I copy the text directly into the statement then it works, i.e.
SELECT LEN('blablabla text') - LEN(REPLACE('blablabla text', ' ', '')) + 1.
Now the datatype is nvarchar(max) since the text is very long. I have already tried to convert the column into text or ntext and to apply datalength() instead of len(). Nevertheless I obtain the same result that it does work as a string but does not work from a table.
You're counting spaces not words. That will typically yield an approximate answer.
e.g.
' this string will give an incorrect result '
Try this approach: http://www.sql-server-helper.com/functions/count-words.aspx
CREATE FUNCTION [dbo].[WordCount] ( #InputString VARCHAR(4000) )
RETURNS INT
AS
BEGIN
DECLARE #Index INT
DECLARE #Char CHAR(1)
DECLARE #PrevChar CHAR(1)
DECLARE #WordCount INT
SET #Index = 1
SET #WordCount = 0
WHILE #Index <= LEN(#InputString)
BEGIN
SET #Char = SUBSTRING(#InputString, #Index, 1)
SET #PrevChar = CASE WHEN #Index = 1 THEN ' '
ELSE SUBSTRING(#InputString, #Index - 1, 1)
END
IF #PrevChar = ' ' AND #Char != ' '
SET #WordCount = #WordCount + 1
SET #Index = #Index + 1
END
RETURN #WordCount
END
GO
usage
DECLARE #String VARCHAR(4000)
SET #String = 'Health Insurance is an insurance against expenses incurred through illness of the insured.'
SELECT [dbo].[WordCount] ( #String )
Leading spaces, trailing spaces, two or more spaces between the neighbouring words – these are the likely causes of the wrong results you are getting.
The functions LTRIM() and RTRIM() can help you eliminate the first two issues. As for the third one, you can use REPLACE(ExtractedText, ' ', ' ') to replace double spaces with single ones, but I'm not sure if you do not have triple ones (in which case you'd need to repeat the replacing).
UPDATE
Here's a UDF that uses CTEs and ranking to eliminate extra spaces and then counts the remaining ones to return the quantity as the number of words:
CREATE FUNCTION fnCountWords (#Str varchar(max))
RETURNS int
AS BEGIN
DECLARE #xml xml, #res int;
SET #Str = RTRIM(LTRIM(#Str));
WITH split AS (
SELECT
idx = number,
chr = SUBSTRING(#Str, number, 1)
FROM master..spt_values
WHERE type = 'P'
AND number BETWEEN 1 AND LEN(#Str)
),
ranked AS (
SELECT
idx,
chr,
rnk = idx - ROW_NUMBER() OVER (PARTITION BY chr ORDER BY idx)
FROM split
)
SELECT #res = COUNT(DISTINCT rnk) + 1
FROM ranked
WHERE chr = ' ';
RETURN #res;
END
With this function your query will be simply like this:
SELECT fnCountWords(ExtractedText)
FROM EDDSDBO.Document
WHERE ID='100'
UPDATE 2
The function uses one of the system tables, master..spt_values, as a tally table. The particular subset used contains only values from 0 to 2047. This means the function will not work correctly for inputs longer than 2047 characters (after trimming both leading and trailing spaces), as #t-clausen.dk has correctly noted in his comment. Therefore, a custom tally table should be used if longer input strings are possible.
Replace the spaces with something that never occur in your text like ' $!' or pick another value.
then replace all '$! ' and '$!' with nothing this way you never have more than 1 space after a word. Then use your current script. I have defined a word as a space followed by a non-space.
This is an example
DECLARE #T TABLE(COL1 NVARCHAR(2000), ID INT)
INSERT #T VALUES('A B C D', 100)
SELECT LEN(C) - LEN(REPLACE(C,' ', '')) COUNT FROM (
SELECT REPLACE(REPLACE(REPLACE(' ' + COL1, ' ', ' $!'), '$! ',''), '$!', '') C
FROM #T ) A
Here is a recursive solution
DECLARE #T TABLE(COL1 NVARCHAR(2000), ID INT)
INSERT #T VALUES('A B C D', 100)
INSERT #T VALUES('have a nice day with 7 words', 100)
;WITH CTE AS
(
SELECT 1 words, col1 c, col1 FROM #t WHERE id = 100
UNION ALL
SELECT words +1, right(c, len(c) - patindex('% [^ ]%', c)), col1 FROM cte
WHERE patindex('% [^ ]%', c) > 0
)
SELECT words, col1 FROM cte WHERE patindex('% [^ ]%', c) = 0
You should declare the column using the varchar data type, like:
create table emp(ename varchar(22));
insert into emp values('amit');
select ename,len(ename) from emp;
output : 4