Find special characters in all rows in specific columns in table - sql-server-2005

I have a database containing about 50 tables, each table has about 10-100 columns with max 1 milion rows in each table. (quite big like for a newbie :P)
Database is old and some rows contains special characters (invisible characters or some weird unicode) and I would like to remove those characters.
I was searching google and I found a small snippet that lists all columns with specific type:
SELECT
OBJECT_NAME(col.OBJECT_ID) AS [TableName]
,col.[name] AS [ColName]
,typ.[name] AS [TypeName]
FROM
sys.all_columns col
INNER JOIN sys.types typ
ON col.user_type_id = typ.user_type_id
WHERE
col.user_type_id IN (167,231)
AND
OBJECT_NAME(col.OBJECT_ID) = 'Orders'
This lists all columns that are varchar or nvarchar.
I found two functions, one that returns a table of all characters from a string and second that checks if string contains any special characters:
CREATE FUNCTION AllCharactersInString (#str nvarchar(max))
RETURNS TABLE
AS
RETURN
(SELECT
substring(B.main_string,C.int_seq,1) AS character
,Unicode(substring(B.main_string,C.int_seq,1)) AS unicode_value
FROM
(SELECT
#str AS main_string) B,(SELECT
A.int_seq
FROM
(SELECT
row_number() OVER (ORDER BY name) AS int_seq
FROM
sys.all_objects) A
WHERE
A.int_seq <= len(#str)) C
)
And second:
CREATE FUNCTION ContainsInvisibleCharacter (#str nvarchar(max))
RETURNS int
AS
BEGIN
DECLARE #Result Int
IF exists
(SELECT
*
FROM
AllCharactersInString(#str)
WHERE
unicode_value IN (1,9,10,11,12,13,14,28,29,31,129,141,143,144,157,160))
BEGIN SET #Result = 1
END
ELSE
BEGIN SET #Result = 0
END
RETURN #Result
END
My question is how to combine thos two functions into one (if it is possible and if it will be faster) and second: how to run that function on all records in all columns (that are specific type) in a table.
I have this code:
SELECT
O.Order_Id
,Rn_Descriptor
FROM
dbo.Order O
WHERE
dbo.ContainsInvisibleCharacter(O.Rn_Descriptor) = 1
AND
O.Order_Id IN (SELECT TOP 1000
Order.Order_Id
FROM
dbo.Order
WHERE
Order.Rn_Descriptor IS NOT NULL
)
But it works sooo slow :/
Mayby there is a fastest way to remove unwanted characters?
What will be fine is to find rows containing those characters, list them, then I could manually check them.

You can do this more efficiently using LIKE.
CREATE FUNCTION ContainsInvisibleCharacter(#str nvarchar(max)) RETURNS int
AS
BEGIN
RETURN
(SELECT CASE WHEN #str LIKE
'%[' + NCHAR(1) + NCHAR(9) + NCHAR(10) + NCHAR(11) + NCHAR(12)
+ NCHAR(13) + NCHAR(14) + NCHAR(28) + NCHAR(29) + NCHAR(31)
+ NCHAR(129) + NCHAR(141) + NCHAR(143) + NCHAR(144)
+ NCHAR(157) + NCHAR(160) + ']%'
THEN 1 ELSE 0 END)
END

Related

How to get a list of temporary tables created in a stored procedure?

I have a stored procedure where I create several temporary tables. How can I get the list of those temporary tables created in that stored procedure?
Something like this:
SELECT [# temporary table name]
FROM sys.procedures
WHERE name = '<Stored Procedure Name>'
I want this result
Temporary_Table_Name
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#TemporaryTable1
#TemporaryTable2
.
.
.
#TemporaryTableN
(N row(s) affected)
Then, with that list, I want to built DROP TABLE instructions dynamically.
 
Dinamic_DROP_Instruction
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
IF OBJECT_ID('tempdb..#TemporaryTable1') IS NOT NULL DROP TABLE #TemporaryTable1
IF OBJECT_ID('tempdb..#TemporaryTable2') IS NOT NULL DROP TABLE #TemporaryTable1
.
.
.
IF OBJECT_ID('tempdb..#TemporaryTableN') IS NOT NULL DROP TABLE #TemporaryTableN
(N row(s) affected)
I was able to construct a code to get the list of temporary tables and also set up the dynamic instruction to DROP each temporary table if it exists.
I leave the code and the links of the sources on which I was based.
CODE:
DECLARE #NameStoreProcedure AS VARCHAR(100) = 'Name_of_store_procedure' --Do not place the scheme
IF OBJECT_ID('tempdb..#Positions') IS NOT NULL
DROP TABLE #Positions
IF OBJECT_ID('tempdb..#TemporalTableNames') IS NOT NULL
DROP TABLE #TemporalTableNames
--Find all positions: http://dba.stackexchange.com/questions/41961/how-to-find-all-positions-of-a-string-within-another-string
DECLARE #term CHAR(20) = 'create'
DECLARE #string VARCHAR(MAX)
SELECT #string = OBJECT_DEFINITION(object_id)
FROM sys.procedures
WHERE NAME = #NameStoreProcedure
SET #string += '.' --Add any data here (different from the one searched) to get the position of the last character
------------------------------------------------------------------------------------------------------------------------
--Range of numbers: http://stackoverflow.com/questions/21425546/how-to-generate-a-range-of-numbers-between-two-numbers-in-sql-server
DECLARE #min BIGINT
, #max BIGINT
SELECT #Min = 1
, #Max = len(#string)
------------------------------------------------------------------------------------------------------------------------
--Get positions of 'CREATE'
SELECT pos = Number - LEN(#term)
INTO #Positions
FROM (
SELECT Number
, Item = LTRIM(RTRIM(SUBSTRING(#string, Number, CHARINDEX(#term, #string + #term, Number) - Number)))
FROM (
SELECT TOP (#Max - #Min + 1) #Min - 1 + row_number() OVER (
ORDER BY t1.number
) AS N
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
) AS n(Number)
WHERE Number > 1
AND Number <= CONVERT(INT, LEN(#string))
AND SUBSTRING(#term + #string, Number, LEN(#term)) = #term
) AS y
SELECT RTRIM(LTRIM(REPLACE(REPLACE(REPLACE(substring(#string, pos - 1, CHARINDEX('(', #string, pos) - pos + 1), CHAR(9), ''), CHAR(13), ''), CHAR(10), ''))) AS NAME
INTO #TemporalTableNames
FROM #Positions
WHERE substring(#string, pos - 1, CHARINDEX('(', #string, pos) - pos + 1) LIKE '#%'
--List of temporary tables
SELECT NAME
FROM #TemporalTableNames
/*
--Dynamic Instruction for DROP instructios
SELECT 'IF OBJECT_ID(''tempdb..' + NAME + ''') IS NOT NULL DROP TABLE ' + NAME
FROM #TemporalTableNames
*/
Too long to comment....
This isn't going to be easy and will need a parse function likely. To start, use OBJECT_DEFINITION or sp_helptext or look in sys.sql_modules or what ever other method you want to get the definition. Then you'll have to search for your temp table based on # or what ever other method you want, and split those. It's going to be extremely messy and error prone IMHO. Here's a start.
SELECT
SUBSTRING(
OBJECT_DEFINITION(OBJECT_ID('yourProcedure')),
CHARINDEX('#',OBJECT_DEFINITION(OBJECT_ID('yourProcedure'))),
60)
60 here is just a made up number. You'd want to find the first white space after the # or something similar. Again, I don't think there is a fast way other than using CONTROL+F in your procedure and manually adding the DROPstatements...

SQL separating address into mutiple columns using spaces

I have over 7 million rows, otherwise I would use Excel.
My address column has a varying number of words. Some are as short as '123 bay street', while others can be as long as '1234 west spring hill drive apt 123'.
My goal is to put each word into its own column. I was able to get the first word, using the query below. But I can't create a query efficient enough to do the rest.
update X
set X.Address_number = Y.[address]
from
(SELECT
unique_id,
CASE
WHEN SUBSTRING(phy_addr1, 1, CHARINDEX(' ', phy_addr1)) = ''
THEN phy_addr1 + ' '
ELSE SUBSTRING(phy_addr1, 1, CHARINDEX(' ', phy_addr1))
END 'address'
FROM
[RD_GeoCode].[dbo].[PA_Stg_excel]) as Y
inner join
[RD_GeoCode].[dbo].[rg_ApplicationData_AllForms_20160401_address] as X on X.unique_id = Y.unique_id
where
X.Address_number is null
you need to have a Numbers table and one of the split strings mentioned here.once you have that ,then its simple..
-----String splitter function
CREATE FUNCTION dbo.SplitStrings_Numbers
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
(
SELECT Item = SUBSTRING(#List, Number,
CHARINDEX(#Delimiter, #List + #Delimiter, Number) - Number)
FROM dbo.Numbers
WHERE Number <= CONVERT(INT, LEN(#List))
AND SUBSTRING(#Delimiter + #List, Number, LEN(#Delimiter)) = #Delimiter
);
GO
you can use the above function like below..
select
*
from yourtable t
cross apply
dbo.SplitStrings_Numbers(t.address,' ') b
instead of updating values into same table,i suggest create some other table which has links to above table.This requires some schema modification to your existing table
create table addressreferences
(
addresss varchar(300),
delimitedvalue varchar(100)
)
insert into addressreferences
select
address,b.*
from yourtable t
cross apply
dbo.SplitStrings_Numbers(t.address,' ') b
This is just a pseudo code to give an idea,you will have to take care of references...Updating same table will not work ,because you are not aware how many rows an address column can span
Update:
I think a trigger will be better suit for your scenario instead of references ..But you have to do an insert first on references table for existing values .here is some pseudo code..
create trigger trg_test
after insert,update,delete
on dbo.yourtable
as
begin
---check for inserts
if exists(Select * from inserted)
begin
insert into addressreferences
select address,b.* from inserted i
cross apply
dbo.splitstrings(address,' ') b
--check for deletes
if exists(select 1 from deleted)
begin
delete * from
deleted d
join
adressreferences a
on a.address=d.address
end
if update(address)
begin
---here i recommend doing delete first since your old address and new one may not have equal rows
delete * from
deleted d
join
addressreferences a
on a.address=d.address
--then do a insert
insert into addressreferences
select address,a.* from
inserted i
join
addressreferences a
on a.address=i.address
end
end
end
A sequence table is a good thing. As in Louis Davidson's 'Pro Relational database design and implementation', you can create it
CREATE SCHEMA tools
go
CREATE TABLE tools.sequence
(
i int CONSTRAINT PKtools_sequence PRIMARY KEY
)
-- Then I will load it, up to 99999:
;WITH DIGITS (i) as(--set up a set of numbers from 0-9
SELECT i
FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) as digits (i))
--builds a table from 0 to 99999
,sequence (i) as (
SELECT D1.i + (10*D2.i) + (100*D3.i) + (1000*D4.i) + (10000*D5.i)
--+ (100000*D6.i)
FROM digits AS D1 CROSS JOIN digits AS D2 CROSS JOIN digits AS D3
CROSS JOIN digits AS D4 CROSS JOIN digits AS D5
/* CROSS JOIN digits AS D6 */)
INSERT INTO tools.sequence(i)
SELECT i
FROM sequence
Then split your input, again code from L. Davidson's book
DECLARE #delimitedList VARCHAR(100) = '1,2,3,4,5'
SELECT word = SUBSTRING(',' + #delimitedList + ',',i + 1,
CHARINDEX(',',',' + #delimitedList + ',',i + 1) - i - 1)
FROM tools.sequence
WHERE i >= 1
AND i < LEN(',' + #delimitedList + ',') - 1
AND SUBSTRING(',' + #delimitedList + ',', i, 1) = ','
ORDER BY i
using a space rather than a comma.
Finally, I would think of using the PIVOT operator to turn the rows into columns, but for it to work, you need to specify the maximum number of words.

Check to see if any combination of results when added will equal a variable

I need a select statement which will interrogate a list of totals and work out whether an adding combination exists within the result set that is equal to a local variable.
Example:
create table #mytemptable
(
totals Decimal (19,2)
)
insert into #mytemptable (totals)
values (57.83),
(244.18),
(239.23),
(227.79),
(563.12)
select *
from #mytemptable
I would now like to check if any combination(s) within the result when added will equal
285.62
Also, it would be nice if there were multiple instances where totals could be added to equal my variable then this would be handled and displayed in an appropriate fashion.
A bit convoluted but here it goes:
Basically my aim is to generate a dynamic query where one column will identify the value (first value on column A, second on column B, etc) and finally a column with the total.
After that we can do a group by with cube, which will sum all the permutations of values grouping them by the different columns. view example
The final result will show something like:
Total A B C D E
285.62 NULL NULL NA NA NA
This will indicate that 285.62 is the sum of the first and 2nd values, sorted by value
DECLARE #columns varchar(max) = ''
DECLARE #allcolumns varchar(max) = ''
DECLARE #columnName varchar(1) = 'A'
DECLARE #select varchar(max) = ''
SELECT
#columns = #columns + ',''NA'' AS ' + #columnName,
#allcolumns = #allcolumns + ',' + #columnName,
#columnName = CHAR(ASCII(#columnName)+1)
FROM
#mytemptable
SET #columnName = 'A'
SELECT
#select = #select + CHAR(13) + 'UNION SELECT ' + CONVERT(varchar(100),totals) + ' AS totals' + STUFF(#columns,2+10*(ord-1),4,'''' + #columnName + ''''), #columnName = CHAR(ASCII(#columnName)+1)
FROM
(SELECT totals, ROW_NUMBER() OVER(ORDER BY totals) ord from #mytemptable)
A
SET #select = STUFF(#select,1,6,'')
SET #allcolumns = STUFF(#allcolumns, 1,1,'')
--PRINT (#select)
EXEC ( 'SELECT * FROM (
SELECT SUM(totals) AS Total, ' + #allcolumns + '
FROM (' + #select + ') A GROUP BY ' + #allcolumns + ' WITH CUBE
) sub WHERE Total = 285.62 ')
If you are willing to add an identity column to your table the following CTE solution will work for you:
WITH SumOfPermutations AS
(
SELECT
CONVERT(decimal(15,2), 0) SummedTotals,
0 id
UNION ALL
SELECT
CONVERT(decimal(15,2), A.SummedTotals + B.totals),
B.ID
FROM
SumOfPermutations A
INNER JOIN myTempTable B ON A.ID < B.ID AND A.SummedTotals + B.Totals <= 285.62
WHERE
A.SummedTotals + B.totals <= 285.62
)
SELECT
COUNT(*)
FROM
SumOfPermutations
WHERE
SummedTotals = 285.62
However be advised if you have a large number of small values the performance will degrade massively. This is because of the fact that once a permutation's sum is above 285.62 it is not included any more. If you have lots of small small values then you will have lots of permutations that have a large number of values before they reach the 285.62 threshold. If your real data is distibuted similarly to the example data you gave, this should work well and quickly.
If you expect that the most numbers from your table that can be summed to a value below your 285.62 is of the order of 10, you should be OK. However if you have 20 values in your table below 30.0 you will probably have issues with this.

TSQL - Querying a table column to pull out popular words for a tag cloud

Just an exploratory question to see if anyone has done this or if, in fact it is at all possible.
We all know what a tag cloud is, and usually, a tag cloud is created by someone assigning tags. Is it possible, within the current features of SQL Server to create this automatically, maybe via trigger when a table has a record added or updated, by looking at the data within a certain column and getting popular words?
It is similar to this question: How can I get the most popular words in a table via mysql?. But, that is MySQL not MSSQL.
Thanks in advance.
James
Here is a good bit on parsing delimited string into rows:
http://anyrest.wordpress.com/2010/08/13/converting-parsing-delimited-string-column-in-sql-to-rows/
http://www.sqlteam.com/article/parsing-csv-values-into-multiple-rows
http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=50648
T-SQL: Opposite to string concatenation - how to split string into multiple records
If you want to parse all words, you can use the space ' ' as your delimiter, Then you get a row for each word.
Next you would simply select the result set GROUPing by the word and aggregating the COUNT
Order your results and you're there.
IMO, the design approach is what makes this difficult. Just because you allow users to assign tags does not mean the tags must be stored as a single delimited list of words. You can normalize the structure into something like:
Create Table Posts ( Id ... not null primary key )
Create Table Tags( Id ... not null primary key, Name ... not null Unique )
Create Table PostTags
( PostId ... not null References Posts( Id )
, TagId ... not null References Tags( Id ) )
Now your question becomes trivial:
Select T.Id, T.Name, Count(*) As TagCount
From PostTags As PT
Join Tags As T
On T.Id = PT.TagId
Group By T.Id, T.Name
Order By Count(*) Desc
If you insist on storing tags as delimited values, then only solution is to split the values on their delimiter by writing a custom Split function and then do your count. At the bottom is an example of a Split function. With it your query would look something like (using a comma delimiter):
Select Tag.Value, Count(*) As TagCount
From Posts As P
Cross Apply dbo.Split( P.Tags, ',' ) As Tag
Group By Tag.Value
Order By Count(*) Desc
Split Function:
Create Function [dbo].[Split]
(
#DelimitedList nvarchar(max)
, #Delimiter nvarchar(2) = ','
)
RETURNS TABLE
AS
RETURN
(
With CorrectedList As
(
Select Case When Left(#DelimitedList, DataLength(#Delimiter)/2) <> #Delimiter Then #Delimiter Else '' End
+ #DelimitedList
+ Case When Right(#DelimitedList, DataLength(#Delimiter)/2) <> #Delimiter Then #Delimiter Else '' End
As List
, DataLength(#Delimiter)/2 As DelimiterLen
)
, Numbers As
(
Select TOP (Coalesce(Len(#DelimitedList),1)) Row_Number() Over ( Order By c1.object_id ) As Value
From sys.objects As c1
Cross Join sys.columns As c2
)
Select CharIndex(#Delimiter, CL.list, N.Value) + CL.DelimiterLen As Position
, Substring (
CL.List
, CharIndex(#Delimiter, CL.list, N.Value) + CL.DelimiterLen
, Case
When CharIndex(#Delimiter, CL.list, N.Value + 1)
- CharIndex(#Delimiter, CL.list, N.Value)
- CL.DelimiterLen < 0 Then Len(CL.List)
Else CharIndex(#Delimiter, CL.list, N.Value + 1)
- CharIndex(#Delimiter, CL.list, N.Value)
- CL.DelimiterLen
End
) As Value
From CorrectedList As CL
Cross Join Numbers As N
Where N.Value < Len(CL.List)
And Substring(CL.List, N.Value, CL.DelimiterLen) = #Delimiter
)
Word or Tag clouds need two fields: a string and a value of how many times that word or string appeared in your collection. You can then pass the results into a tag cloud tool that will display the data as you require.
Not to take away from the previous answers, as they do answer the original challenge. However, I have a simpler solution using two functions (similar to #Thomas answer), one of which uses regex to "clean" the words.
The two functions are:
dbo.fnStripChars(a, b) --use regex 'b' to cleanse a string 'a'
dbo.fnMakeTableFromList(a, b) --convert a single field 'a' into a tabled list, delimited by 'b'
I then apply them into a single SQL statement, using the TOP n feature to give me the top 10 words I want to pass onto PowerBI or some other graphical tool, for actually displaying a word or tag cloud.
SELECT TOP 10 b.[words], b.[total]
FROM
(SELECT a.[words], count(*) AS [total]
FROM
(SELECT upper(l.item) AS [words]
FROM dbo.MyTableWithWords AS c
CROSS APPLY POTS.fnMakeTableFromList([POTS].fnStripChars(c.myColumnThatHasTheWords,'[^a-zA-Z ]'),' ') AS l) AS a
GROUP BY a.[words]) AS b
ORDER BY 2 DESC
As you can see, the regex is [^a-zA-Z ], which is to give me only alphabetical characters and spaces. The space is then used as a delimiter to the make table function to separate each word individually. I apply a count(*), to give me the number of times that word appears, hence then I have everything I need to give me the TOP 10 results.
Note that CROSS APPLY is important here so I get only data with actual "words" in each record found. Otherwise it will go through every record with or without words to extract from the column I want.
fnStripChars()
FUNCTION [dbo].[fnStripChars]
(
#String NVARCHAR(4000),
#MatchExpression VARCHAR(255)
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
SET #MatchExpression = '%' + #MatchExpression + '%'
WHILE PatIndex(#MatchExpression, #String) > 0
SET #String = Stuff(#String, PatIndex(#MatchExpression, #String), 1, '')
RETURN #String
END
fnMakeTableFromList()
FUNCTION [dbo].[fnMakeTableFromList](
#List VARCHAR(MAX),
#Delimiter CHAR(1))
RETURNS TABLE
AS
RETURN (SELECT Item = CONVERT(VARCHAR, Item)
FROM (SELECT Item = x.i.value('(./text())[1]','varchar(max)')
FROM (SELECT [XML] = CONVERT(XML,'<i>' + REPLACE(#List,#Delimiter,'</i><i>') + '</i>').query('.')) AS a
CROSS APPLY [XML].nodes('i') AS x(i)) AS y
WHERE Item IS NOT NULL);
I've tested this with over 400K records and it's able to come back with my results in under 60 seconds. I think that's reasonable.

SQL: problem word count with len()

I am trying to count words of text that is written in a column of table. Therefor I am using the following query.
SELECT LEN(ExtractedText) -
LEN(REPLACE(ExtractedText, ' ', '')) + 1 from EDDSDBO.Document where ID='100'.
I receive a wrong result that is much to high.
On the other hand, if I copy the text directly into the statement then it works, i.e.
SELECT LEN('blablabla text') - LEN(REPLACE('blablabla text', ' ', '')) + 1.
Now the datatype is nvarchar(max) since the text is very long. I have already tried to convert the column into text or ntext and to apply datalength() instead of len(). Nevertheless I obtain the same result that it does work as a string but does not work from a table.
You're counting spaces not words. That will typically yield an approximate answer.
e.g.
' this string will give an incorrect result '
Try this approach: http://www.sql-server-helper.com/functions/count-words.aspx
CREATE FUNCTION [dbo].[WordCount] ( #InputString VARCHAR(4000) )
RETURNS INT
AS
BEGIN
DECLARE #Index INT
DECLARE #Char CHAR(1)
DECLARE #PrevChar CHAR(1)
DECLARE #WordCount INT
SET #Index = 1
SET #WordCount = 0
WHILE #Index <= LEN(#InputString)
BEGIN
SET #Char = SUBSTRING(#InputString, #Index, 1)
SET #PrevChar = CASE WHEN #Index = 1 THEN ' '
ELSE SUBSTRING(#InputString, #Index - 1, 1)
END
IF #PrevChar = ' ' AND #Char != ' '
SET #WordCount = #WordCount + 1
SET #Index = #Index + 1
END
RETURN #WordCount
END
GO
usage
DECLARE #String VARCHAR(4000)
SET #String = 'Health Insurance is an insurance against expenses incurred through illness of the insured.'
SELECT [dbo].[WordCount] ( #String )
Leading spaces, trailing spaces, two or more spaces between the neighbouring words – these are the likely causes of the wrong results you are getting.
The functions LTRIM() and RTRIM() can help you eliminate the first two issues. As for the third one, you can use REPLACE(ExtractedText, ' ', ' ') to replace double spaces with single ones, but I'm not sure if you do not have triple ones (in which case you'd need to repeat the replacing).
UPDATE
Here's a UDF that uses CTEs and ranking to eliminate extra spaces and then counts the remaining ones to return the quantity as the number of words:
CREATE FUNCTION fnCountWords (#Str varchar(max))
RETURNS int
AS BEGIN
DECLARE #xml xml, #res int;
SET #Str = RTRIM(LTRIM(#Str));
WITH split AS (
SELECT
idx = number,
chr = SUBSTRING(#Str, number, 1)
FROM master..spt_values
WHERE type = 'P'
AND number BETWEEN 1 AND LEN(#Str)
),
ranked AS (
SELECT
idx,
chr,
rnk = idx - ROW_NUMBER() OVER (PARTITION BY chr ORDER BY idx)
FROM split
)
SELECT #res = COUNT(DISTINCT rnk) + 1
FROM ranked
WHERE chr = ' ';
RETURN #res;
END
With this function your query will be simply like this:
SELECT fnCountWords(ExtractedText)
FROM EDDSDBO.Document
WHERE ID='100'
UPDATE 2
The function uses one of the system tables, master..spt_values, as a tally table. The particular subset used contains only values from 0 to 2047. This means the function will not work correctly for inputs longer than 2047 characters (after trimming both leading and trailing spaces), as #t-clausen.dk has correctly noted in his comment. Therefore, a custom tally table should be used if longer input strings are possible.
Replace the spaces with something that never occur in your text like ' $!' or pick another value.
then replace all '$! ' and '$!' with nothing this way you never have more than 1 space after a word. Then use your current script. I have defined a word as a space followed by a non-space.
This is an example
DECLARE #T TABLE(COL1 NVARCHAR(2000), ID INT)
INSERT #T VALUES('A B C D', 100)
SELECT LEN(C) - LEN(REPLACE(C,' ', '')) COUNT FROM (
SELECT REPLACE(REPLACE(REPLACE(' ' + COL1, ' ', ' $!'), '$! ',''), '$!', '') C
FROM #T ) A
Here is a recursive solution
DECLARE #T TABLE(COL1 NVARCHAR(2000), ID INT)
INSERT #T VALUES('A B C D', 100)
INSERT #T VALUES('have a nice day with 7 words', 100)
;WITH CTE AS
(
SELECT 1 words, col1 c, col1 FROM #t WHERE id = 100
UNION ALL
SELECT words +1, right(c, len(c) - patindex('% [^ ]%', c)), col1 FROM cte
WHERE patindex('% [^ ]%', c) > 0
)
SELECT words, col1 FROM cte WHERE patindex('% [^ ]%', c) = 0
You should declare the column using the varchar data type, like:
create table emp(ename varchar(22));
insert into emp values('amit');
select ename,len(ename) from emp;
output : 4