Count the number of spaces in a string - sql

I am working on data validation and I am trying to count the number of spaces in a string. My problem is that when I count the spaces, any sting with more than one space between texts or any string with trailing space(s) are not counted
I have tried the following codes without luck. each codes gives different result but not the desired output
DECLARE #MyTbl TABLE (ID INT, Name VARCHAR(300))
INSERT INTO #MyTBL VALUES
(1, 'Alfreds Futterkiste'), -- 1 space
(2,'Mike James Ray '), -- 4 spaces 1 space between each text and 2 spaces after text
(3,'Hanari Carnes'), -- 2 spaces between text
(4,'James Michael')
-- 1
SELECT ID,
LEN(Name)-LEN(REPLACE(Name, ' ', '')) AS Count_Of_Spaces
FROM #MyTBL
-- 2
SELECT ID,
LEN(Name + ';')-LEN(REPLACE(Name,' ','')) AS Count_Of_Spaces2
FROM #MyTBL
-- 3
SELECT ID,
LEN(Name)-LEN(REPLACE(Name,' ', '')) AS Count_Of_Spaces3
FROM #MyTBL
Current output based on the first query
ID Count_Of_Spaces
1 1
2 2
3 2
4 1
Desired output
ID Count_Of_Spaces
1 1
2 4
3 2
4 1

You could use DATALENGTH:
SELECT ID,
DATALENGTH(Name)-LEN(REPLACE(Name,' ', '')) AS Count_Of_Spaces
FROM #MyTBL;
DBFiddle Demo
LEN does not count trailing spaces.
If NVARCHAR then you need to divide by 2.
DECLARE #MyTbl TABLE (ID INT, Name NVARCHAR(300))
INSERT INTO #MyTBL VALUES
(1, 'Alfreds Futterkiste'), -- 1 space
(2,'Mike James Ray '), -- 4 spaces 1 space between
-- each text and 2 spaces after text
(3,'Hanari Carnes'), -- 2 spaces between text
(4,'James Michael');
SELECT ID,
DATALENGTH(Name)/2-LEN(REPLACE(Name,' ', '')) AS Count_Of_Spaces
FROM #MyTBL;
DBFiddle Demo2

You had the answer in your attempt #2. Probably just didn't realize to do the appending in the second part(REPLACE) of your query
DECLARE #MyTbl TABLE (ID INT, Name VARCHAR(300))
INSERT INTO #MyTBL VALUES
(1, 'Alfreds Futterkiste'), -- 1 space
(2,'Mike James Ray '), -- 4 spaces 1 space between each text and 2 spaces after text
(3,'Hanari Carnes'), -- 2 spaces between text
(4,'James Michael')
-- 2
SELECT ID,
LEN(';' + Name + ';')-LEN(REPLACE(';' + Name + ';',' ','')) AS Count_Of_Spaces2
FROM #MyTBL

When I need the length of a field passed into a function or stored procedure and the field could have trailing spaces that are meant to be there, I use the following statement:-
#Len = LEN(#Parm + '.') - 1

Related

Extracting last number within string

I have the below sample data and I'm trying to extract the last number within the string. I have the following, which gets me part of the way there, but I'm not sure how to get the value where it isn't the last word.
right(TextDescription, patindex('%[^0-9]%',reverse(TextDescription)) - 1)
The result should be:
ID
code
1
10015662
2
100040344
3
10015370
4
NULL
5
400337
Sample data
Create Table #TestData
(
ID int,
TextDescription varchar(100)
)
insert into #TestData Values (1,'Data From JOE BLOGGS 10015662 tree 10015662')
insert into #TestData Values (2,'Fast Data From JOHN SMITH 10004034 MARY SMITH 100040344 plant')
insert into #TestData Values (3,'Data In 10015370 pot JONES')
insert into #TestData Values (4,'Fast Data From LEE tree')
insert into #TestData Values (5,'Direct Data 106600 JANE GREEN 400337')
Just another option using a a bit of JSON which will convert the string into an array and [key] will maintain the sequence.
Select A.ID
,B.Value
From #TestData A
Outer Apply (
Select top 1 Value
From OpenJSON( '["'+replace(string_escape(TextDescription,'json'),' ','","')+'"]' )
Where try_convert(int,Value) is not null
Order by [key] Desc
) B
Results
ID Value
1 10015662
2 100040344
3 10015370
4 NULL
5 400337
The following will locate the last digit, trim the string at that point, find the preceding non-digit, and then perform another trim to get the final result. As in your original post, calculations are done on a reversed string to accommodate the lack of of a LASTPATINDEX() function. CROSS APPLY is used to build up intermediate results and to avoid duplication of subexpressions.
select T.*, P1.Pos1, P2.Pos2, N.Result
from #TestData T
cross apply (select reverse(TextDescription) AS Reversed) R
cross apply (select nullif(patindex('%[0-9]%', R.Reversed), 0) AS Pos1) P1
cross apply (select stuff(R.Reversed, 1, P1.Pos1 - 1, '') AS Trim1) T1
cross apply (select patindex('%[^0-9]%', T1.Trim1 + 'X') AS Pos2) P2
cross apply (select reverse(left(T1.Trim1, P2.Pos2 - 1)) AS Result) N
-- Partly reduced
select T.*, reverse(left(T1.Trim1, patindex('%[^0-9]%', T1.Trim1 + 'X') - 1)) AS Result
from #TestData T
cross apply (select reverse(TextDescription) AS Reversed) R
cross apply (select stuff(R.Reversed, 1, nullif(patindex('%[0-9]%', R.Reversed), 0) - 1, '') AS Trim1) T1
This will handle a variety of forms, not just space-delimited values.
See this db<>fiddle.
If your interesting values are always found at the end, and are always preceeded by a non-digit, you can use the SUBSTRING with:
lower boundary being the last non-digit before the last number location
length being the difference between last non-digit and first value of last number
WITH cte AS (
SELECT ID, TextDescription,
PATINDEX('%[0-9][^0-9]%', REVERSE(TextDescription) + ' ') AS first_space,
PATINDEX('%[0-9]%' , REVERSE(TextDescription) ) AS last_digit
FROM #TestData
)
SELECT ID,
SUBSTRING(TextDescription,
LEN(TextDescription) -first_space +1,
first_space+1 -last_digit) AS code
FROM cte
Check the demo here.
Please try the following solution leveraging XML and XQuery.
Notable points:
CROSS APPLY is tokenizing TextDescription column as XML.
XQuery FLWOR expression is checking every token if it is castable as
INTEGER data type. And filtering them out if they are not.
XPath predicate [last()] is giving us last INTEGER value.
SQL
-- DDL and sample data population, start
DECLARE #tbl Table (ID INT IDENTITY PRIMARY KEY, TextDescription varchar(100));
INSERT INTO #tbl (TextDescription) VALUES
('Data From JOE BLOGGS 10015662 tree 10015662'),
('Fast Data From JOHN SMITH 10004034 MARY SMITH 100040344 plant'),
('Data In 10015370 pot JONES'),
('Fast Data From LEE tree'),
('Direct Data 106600 JANE GREEN 400337');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = SPACE(1);
SELECT t.*
, c.query('for $x in /root/r[not(empty(xs:int(.)))]
return $x
').value('(/r[last()]/text())[1]','INT') AS [code]
FROM #tbl AS t
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE(TextDescription, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)) AS t1(c)
ORDER BY t.ID;
Output
ID
TextDescription
code
1
Data From JOE BLOGGS 10015662 tree 10015662
10015662
2
Fast Data From JOHN SMITH 10004034 MARY SMITH 100040344 plant
100040344
3
Data In 10015370 pot JONES
10015370
4
Fast Data From LEE tree
NULL
5
Direct Data 106600 JANE GREEN 400337
400337
CREATE FUNCTION [ExtractInteger](#String VARCHAR(2000))
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #Count INT
DECLARE #IntNumbers VARCHAR(1000)
SET #Count = 0
SET #IntNumbers = ''
WHILE #Count <= LEN(#String)
BEGIN
IF SUBSTRING(#String,#Count,1) = ' '
BEGIN
SET #IntNumbers = #IntNumbers + ' '
END
IF SUBSTRING(#String,#Count,1) >= '0'
AND SUBSTRING(#String,#Count,1) <= '9'
BEGIN
SET #IntNumbers = #IntNumbers + SUBSTRING(#String,#Count,1)
END
SET #Count = #Count + 1
END
RETURN LTRIM(RTRIM(#IntNumbers))
END
The ExtractInteger function will fetch only numbers and spaces, and the below select will take the last word as number:
select right(dbo.ExtractInteger('My 3rd Phone Number is 323-111-CALL'), charindex(' ', reverse(dbo.ExtractInteger('My 3rd Phone Number is 323-111-CALL')) + ' ') - 1)

Storing and Query Blank Values in Hive Columns

I have a requirement for storing blank strings of length 1, 2, and 3 in some columns of my Hive table.
Storing:
If my column type is char, then I see that the data is always trimmed before storing. i.e. length(column) is always 0
If my column type is varchar then the data is not trimmed. so length(column) is 1, 2 and 3 respectively.
So that solves my storing problem.
Querying:
I am unable to query the column by value.
say. select * from hive table where column = ' ';
it only works if I do something like
select * from hive table where length(column) > 0 and trim(column) = '';
Is there a way to handle this separately ?
say I want to query those records where column value is of a blank string of length 3? How do I do this?
This is what i Tried (Note that the issues seems to be when the file is stored as parquet)
CREATE EXTERNAL TABLE IF NOT EXISTS DUMMY5 (
col1 varchar(3))
STORED AS PARQUET
LOCATION "/DUMMY5";
insert into DUMMY5 values (' '); // 2 character strings
insert into DUMMY5 values (' '); //3 character strings
select col1, length(col1) from DUMMY5;
+-------+------+--+
| col1 | _c1 |
+-------+------+--+
| | 3 |
| | 2 |
+-------+------+--+
select col1, length(col1) from DUMMY5 where col1 = ' '; // 0 record
select col1, length(col1) from DUMMY5 where col1 = ' '; // 0 record
Running Hive 2.1.1
drop table dummy_tbl;
CREATE TABLE dummy_tbl (
col1 char(1),
col2 varchar(1),
col3 char(3),
col4 varchar(3)) ;
insert into dummy_tbl values (' ', ' ', ' ', ' ');
select length(col1), length(col2), length(col3), length(col4) from dummy_tbl;
Result:
c0 c1 c2 c3
0 1 0 2
Varchar column works absolutely correct. col2 was trimmed on insert, it is documented.
col4 varchar(2) works correctly, this query returns 1:
select count(*) from dummy_tbl where col4=' '; --returns 1
And length of all char columns shows 0 and comparison ignoring spaces like it is documented:
select count(*) from dummy_tbl where col1=' '; --single space --returns 1
select count(*) from dummy_tbl where col1=' '; --two spaces --also returns 1 because it is ignoring spaces
You can use varchar with proper length. Or STRING type if you not sure about length.

SQL: Extract last 5 digits in a string after special char

I am struggling to extract last 5 digits in title(free text field) after special char ': ' (with a space). Sample records are as follows:
title column
1 ABC Requirement1 - 1,500 - 3,000 sq m : 12345
2 10,000 sft shed requirement
3 OFFICES REQUIRED 500/700 SQ FT : 56789
4 Land Acquisition : 34567
5 Storage Requirement : 12345
6 Land Requirement :100 sq.m
my result set should be as follows:
ID
1 12345
3 56789
4 34567
5 12345
It should only pick up last 5 digits(ID) after special char ': ' and ignore other records with ': ' in between. I am trying to extract ID values to join with another table. Any help is highly appreciated!
This should get the query that you want.
SELECT LEFT(SUBSTRING(Title, CHARINDEX(': ', Title) + 2, LEN(Title)), 5)
FROM #table
WHERE [Title] LIKE '%: %'
AND ISNUMERIC(LEFT(SUBSTRING(Title, CHARINDEX(': ', Title) + 2, LEN(Title)), 5)) = 1
Try this query --
;WITH CTE
AS (
SELECT Id
,CASE
WHEN CHARINDEX(':', Title, 1) > 1
THEN SUBSTRING(Title, CHARINDEX(':', Title, 1) + 2, 5)
END AS TitleID
FROM RequirementTable
)
SELECT ID
,TitleID
FROM CTE
WHERE ISNUMERIC(TitleID) = 1;
First, you should seriously reconsider the way you're storing your data if you need to go to these lengths to form a relation between records. This is potentially disastrous should your data ever include ': ' naturally and without ending in a foreign key value. And you most likely won't figure that out until it's too late and processing and/or other applications fail as a result.
However, to answer the question as it was asked, I have the same thing as #ChesterLin, but with sample data and including the 'ID' column in the output.
DECLARE #Temp TABLE (ID int, Title varchar(255))
INSERT INTO #Temp
VALUES
(1, 'ABC Requirement1 - 1,500 - 3,000 sq m : 12345'),
(2, '10,000 sft shed requirement'),
(3, 'OFFICES REQUIRED 500/700 SQ FT : 56789'),
(4, 'Land Acquisition : 34567'),
(5, 'Storage Requirement : 12345'),
(6, 'Land Requirement :100 sq.m')
SELECT ID, LEFT(SUBSTRING(Title, CHARINDEX(': ', Title) + 2, LEN(Title)), 5) AS [Extracted Value]
FROM #Temp
WHERE [Title] LIKE '%: %'
AND ISNUMERIC(LEFT(SUBSTRING(Title, CHARINDEX(': ', Title) + 2, LEN(Title)), 5)) = 1
you can get last 5 digits
SUBSTR(column, LENGTH(column) - 5, 5)
OR
SELECT RIGHT('ABC Requirement1 - 1,500 - 3,000 sq m : 12345',5)
OR Full query
SELECT substr(title, character(title)-5) from table_name;
substr(column, -5, 5)
Starts from the last character in the string, and gives the five characters.
Then cast it as INT.
select cast(substr(column, -5, 5) as INT) as ID from table_name
where isnumeric(substr(column, -5, 5)) = 1
I hope this will work. Or, something like this.

deleting second comma in data

Ok so I have a table called PEOPLE that has a name column. In the name column is a name, but its totally a mess. For some reason its not listed such as last, first middle. It's sitting like last,first,middle and last first (and middle if there) are separated by a comma.. two commas if the person has a middle name.
example:
smith,steve
smith,steve,j
smith,ryan,tom
I'd like the second comma taken away (for parsing reason ) spaces put after existing first comma so the above would come out looking like:
smith, steve
smith, steve j
smith, ryan tom
Ultimately I'd like to be able to parse the names into first, middle, and last name fields, but that's for another post :_0. I appreciate any help.
thank you.
Drop table T1;
Create table T1(Name varchar(100));
Insert T1 Values
('smith,steve'),
('smith,steve,j'),
('smith,ryan,tom');
UPDATE T1
SET Name=
CASE CHARINDEX(',',name, CHARINDEX(',',name)+1) WHEN
0 THEN Name
ELSE
LEFT(name,CHARINDEX(',',name, CHARINDEX(',',name)+1)-1)+' ' +
RIGHT(name,LEN(Name)-CHARINDEX(',',name, CHARINDEX(',',name)+1))
END
Select * from T1
This seems to work. Not the most concise but avoids cursors.
DECLARE #people TABLE (name varchar(50))
INSERT INTO #people
SELECT 'smith,steve'
UNION
SELECT 'smith,steve,j'
UNION
SELECT 'smith,ryan,tom'
UNION
SELECT 'commaless'
SELECT name,
CASE
WHEN CHARINDEX(',',name) > 0 THEN
CASE
WHEN CHARINDEX(',',name,CHARINDEX(',',name) + 1) > 0 THEN
STUFF(STUFF(name, CHARINDEX(',',name,CHARINDEX(',',name) + 1), 1, ' '),CHARINDEX(',',name),1,', ')
ELSE
STUFF(name,CHARINDEX(',',name),1,', ')
END
ELSE name
END AS name2
FROM #people
Using a table function to split apart the names with a delimiter and for XML Path to stitch them back together, we can get what you're looking for! Hope this helps!
Declare #People table(FullName varchar(200))
Insert Into #People Values ('smith,steve')
Insert Into #People Values ('smith,steve,j')
Insert Into #People Values ('smith,ryan,tom')
Insert Into #People Values ('smith,john,joseph Jr')
Select p.*,stuff(fn.FullName,1,2,'') as ModifiedFullName
From #People p
Cross Apply (
select
Case When np.posID<=2 Then ', ' Else ' ' End+np.Val
From #People n
Cross Apply Custom.SplitValues(n.FullName,',') np
Where n.FullName=p.FullName
For XML Path('')
) fn(FullName)
Output:
ModifiedFullName
smith, steve
smith, steve j
smith, ryan tom
smith, john joseph Jr
SplitValues table function definition:
/*
This Function takes a delimited list of values and returns a table containing
each individual value and its position.
*/
CREATE FUNCTION [Custom].[SplitValues]
(
#List varchar(max)
, #Delimiter varchar(1)
)
RETURNS
#ValuesTable table
(
posID int
,val varchar(1000)
)
AS
BEGIN
WITH Cte AS
(
SELECT CAST('<v>' + REPLACE(#List, #Delimiter, '</v><v>') + '</v>' AS XML) AS val
)
INSERT #ValuesTable (posID,val)
SELECT row_number() over(Order By x) as posID, RTRIM(LTRIM(Split.x.value('.', 'VARCHAR(1000)'))) AS val
FROM Cte
CROSS APPLY val.nodes('/v') Split(x)
RETURN
END
GO
String manipulation in SQLServer, outside of writing your own User Defined Function, is limited but you can use the PARSENAME function for your purposes here. It takes a string, splits it on the period character, and returns the segment you specify.
Try this:
DECLARE #name VARCHAR(100) = 'smith,ryan,tom'
SELECT REVERSE(PARSENAME(REPLACE(REVERSE(#name), ',', '.'), 1)) + ', ' +
REVERSE(PARSENAME(REPLACE(REVERSE(#name), ',', '.'), 2)) +
COALESCE(' ' + REVERSE(PARSENAME(REPLACE(REVERSE(#name), ',', '.'), 3)), '')
Result: smith, ryan tom
If you set #name to 'smith,steve' instead, you'll get:
Result: smith, steve
Segment 1 actually gives you the last segment, segment 2 the second to last etc. Hence I've used REVERSE to get the order you want. In the case of 'steve,smith', segment 3 will be null, hence the COALESCE to add an empty string if that is the case. The REPLACE of course changes the commas to periods so that the split will work.
Note that this is a bit of a hack. PARSENAME will not work if there are more than four parts and this will fail if the name happens to contain a period. However if your data conforms to these limitations, hopefully it provides you with a solution.
Caveat: it sounds like your data may be inconsistently formatted. In that case, applying any automated treatment to it is going to be risky. However, you could try:
UPDATE people SET name = REPLACE(name, ',', ' ')
UPDATE people SET name = LEFT(name, CHARINDEX(' ', name)-1)+ ', '
+ RIGHT(name, LEN(name) - CHARINDEX(' ', name)
That'll work for the three examples you give. What it will do to the rest of your set is another question.
Here's an example with CHARINDEX() and SUBSTRING
WITH yourTable
AS
(
SELECT names
FROM
(
VALUES ('smith,steve'),('smith,steve,j'),('smith,ryan,tom')
) A(names)
)
SELECT names AS old,
CASE
WHEN comma > 0
THEN SUBSTRING(spaced_names,0,comma + 1) --before the comma
+ SUBSTRING(spaced_names,comma + 2,1000) --after the comma
ELSE spaced_names
END AS new
FROM yourTable
CROSS APPLY(SELECT CHARINDEX(',',names,CHARINDEX(',',names) + 1),REPLACE(names,',',', ')) AS CA(comma,spaced_names)

Help with string formatting in SQL Server Query

I have the following SQL query:
SELECT DISTINCT ProductNumber, PageNumber FROM table
I am trying to modify the query so that PageNumber will be formatted. You see, PageNumber is in any of the following formats, where 'x' is a digit:
xxx, xxx
xxx
xxx-xxx
xx, xxx-xxx
xx-xx, xxx
xx-xx, xxx-xxx
I want to format PageNumber so that it is only in the format: xxx. To do so, I have parse out the following bolded numbers from the above formats:
xxx, xxx
xxx
xxx-xxx
xx, xxx-xxx
xx-xx, xxx
xx-xx, xxx-xxx
I want to do this all without writing any functions, but I don't know if that is possible. I am having trouble "detecting" all of the different formats, though:
Here is what I have so far:
SELECT ProductNumber,
CASE WHEN CHARINDEX(',', PageNumber) > 0
THEN SUBSTRING(PageNumber, 0, CHARINDEX('-', PageNumber))
WHEN CHARINDEX('-', PageNumber) > 0
THEN SUBSTRING(PageNumber, 0, CHARINDEX('-', PageNumber))
ELSE PageNumber
END AS PageNumber
FROM table
WHERE PageNumber IS NOT NULL
AND PageNumber <> ''
Can anyone offer me some help? Thanks!
Use pattern matching rather than CHARINDEX
CASE also forces ordering of evaluation which helps here for the 3rd case which overlaps with the first 2 cases.
Not tested, something like
CASE
WHEN PageNumber LIKE '[0-9][0-9][0-9]%' THEN LEFT(PageNumber, 3)
WHEN PageNumber LIKE '[0-9][0-9]-[0-9][0-9], [0-9][0-9][0-9]') THEN RIGHT(PageNumber , 3)
WHEN PageNumber LIKE '[0-9][0-9]%') THEN LEFT(PageNumber, 2)
END
try this:
DECLARE #YourTable table (ProductNumber int, PageNumber varchar(20))
INSERT #YourTable VALUES (1,'123, 456')
INSERT #YourTable VALUES (2,'123')
INSERT #YourTable VALUES (3,'123-456')
INSERT #YourTable VALUES (4,'12, 345-678')
INSERT #YourTable VALUES (5,'12-34, 567')
INSERT #YourTable VALUES (6,'12-34, 567-789')
;WITH AllNumbers AS ---builds a Numbers table 1-100
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<101
)
, RowChars AS --one row for each non-numeric single character value per #YourTable row
( SELECT DISTINCT
ProductNumber,Number, SUBSTRING(PageNumber,Number,1) AS CharacterOF
FROM #YourTable
INNER JOIN AllNumbers ON 1=1
WHERE SUBSTRING(PageNumber,Number,1) IS NOT NULL AND SUBSTRING(PageNumber,Number,1) NOT LIKE '[0-9]' AND SUBSTRING(PageNumber,Number,1)!=''
)
,FirstSplit AS --get first non-numeric single character value per #YourTable row
( SELECT
ProductNumber,MIN(Number) AS SplitOf
FROM RowChars
GROUP BY ProductNumber
)
SELECT
t.ProductNumber, LEFT(t.PageNumber,COALESCE(s.SplitOf-1,LEN(t.PageNumber))) AS NewPage,t.PageNumber AS OldPage
FROM #YourTable t
LEFT OUTER JOIN FirstSplit s ON t.ProductNumber=s.ProductNumber
OUTPUT:
ProductNumber NewPage OldPage
------------- -------------------- --------------------
1 123 123, 456
2 123 123
3 123 123-456
4 12 12, 345-678
5 12 12-34, 567
6 12 12-34, 567-789
(6 row(s) affected)