Find character occurrences in SQL at the end of string - sql

How can i find the occurrences of 'e' at then end only of a string in sql?
For example:
abcdeee, occurrences: 3
aecdeae, occurrences: 1
Thanks.

The goal is going to be to avoid looping if at all possible, since SQL Server works much better with sets of data rather than processing things sequentially. With that in mind, I would generate a virtual table that gives you all of the counts that you might find. To be safe, it should be the same length as your column. In my example, I've stopped at 10 characters. I use a CTE to generate the virtual table. You can use a variable in there instead of the hard-coded 'e' of course. Also, the CAST()s are important to avoid data type mismatches with the recursive CTE, but you may need to adjust them, especially if you're using NVARCHAR.
;WITH CTE_Characters AS
(
SELECT
CAST('e' AS VARCHAR(10)) AS my_char, 1 AS cnt
UNION ALL
SELECT
CAST(my_char + 'e' AS VARCHAR(10)), cnt + 1
FROM
CTE_Characters
WHERE
cnt <= 9
)
SELECT
MT.my_string,
MAX(CTE.cnt) AS number_of_occurrences
FROM
My_Table MT
INNER JOIN CTE_Characters CTE ON REVERSE(MT.my_string) LIKE CTE.my_char + '%'
GROUP BY
MT.my_string

Here is a loop that will count the instances of 'e' on the end of an incoming string:
DECLARE #str nvarchar(50) = 'abcdeee'; --incoming string
DECLARE #ctr int = 0; --to count instances
DECLARE #rts nvarchar(50) = REVERSE(#str); --reverse the incoming string to start from the end
DECLARE #ind int = CHARINDEX('e',#rts,1); --find the first instance of e
WHILE #ind = 1 --only continue to count consecutive instances
BEGIN
SET #ctr += 1;
SET #rts = RIGHT(#rts,LEN(#rts)-1); --remove first character and re-run
SET #ind = CHARINDEX('e',#rts,1); --find next e
END
SELECT #ctr AS ctr

Related

Count numeric chars in string

Using tsql I want to count a numeric chars in string. For example i've got 'kick0my234ass' string and i wanna count how many (4 in that example) numbers are in that string. I can't use regex, just plain tslq.
You COULD do this I suppose:
declare #c varchar(30)
set #c = 'kick0my234ass'
select #c, len(replace(#c,' ','')) - len(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(#c,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),' ',''))
You'll first have to split the character string in its individual characters, evaluate which are numeric, and finally count those that are. This will do the trick:
DECLARE #test TABLE (Example NVARCHAR(255))
INSERT #test
VALUES ('kick0my234ass')
SELECT COUNT(1)
FROM #test AS T
INNER JOIN master..spt_values v
ON v.type = 'P'
AND v.number < len(T.Example)
WHERE SUBSTRING(T.Example, v.number + 1, 1) LIKE '[0-9]'
You could try this solution with regular expressions (if you'd allow them):
it uses recursive CTE, at every recursive step, one digit is removed from given string and the condition is to stop, when there are no digits in string. The rows are also numbered with consecutive ids, so the last id is the amount of removed digits from string.
declare #str varchar(100) = 'kick0my123ass';
with cte as (
select 1 [id], stuff(#str,PATINDEX('%[0-9]%', #str),1,'') [col]
union all
select [id] + 1, stuff([col],PATINDEX('%[0-9]%', [col]),1,'') from cte
where col like '%[0-9]%'
)
--this will give you number of digits in string
select top 1 id from cte order by id desc
Use a WHILE loop to each each character is a numeric or not.
Query
declare #text as varchar(max) = 'kick0my234ass';
declare #len as int;
select #len = len(#text);
if(#len > 0)
begin
declare #i as int = 1;
declare #count as int = 0;
while(#i <= #len)
begin
if(substring(#text, #i, 1) like '[0-9]')
set #count += 1;
set #i += 1;
end
print 'Count of Numerics in ' + #text + ' : ' + cast(#count as varchar(100));
end
else
print 'Empty string';
If simplicity & performance are important I suggest a purely set-based solution. Grab a copy of DigitsOnlyEE which will remove all non-numeric characters. Then use LEN against the output.
DECLARE #string varchar(100) = '123xxx45ff678';
SELECT string = #string, digitsOnly, DigitCount = LEN(digitsOnly)
FROM dbo.DigitsOnlyEE(#string);
Results
string digitsOnly DigitCount
------------------ ----------- ------------
123xxx45ff678 12345678 8
using a Tally Table created by an rCTE:
CREATE TABLE #Sample (S varchar(100));
INSERT INTO #Sample
VALUES ('kick0my234 ass');
GO
WITH Tally AS(
SELECT 1 AS N
UNION ALL
SELECT N + 1
FROM Tally
WHERE N + 1 <= 100)
SELECT S.S, SUM(CASE WHEN SUBSTRING(S,T.N, 1) LIKE '[0-9]' THEN 1 ELSE 0 END) AS Numbers
FROM #Sample S
JOIN Tally T ON LEN(S.S) >= T.N
GROUP BY S.S;
For future reference, also post your owns attempts please. We aren't here (really) to do your work for you.

Adding dynamic condition in where clause

I want to add condition in 'where' clause depends upon 'if' condition like below
Declare #strSQLClause varchar(50)
If (#QMasterCompanyId='09' and #State='FL' and (#LOB='HO' or #LOB='HP'))
Begin
Declare #strMonthsOccupied char(1)
select Distinct #strMonthsOccupied=sm.MonthsOccupiedDesc from HOStructureRating sr
join HOSeleMonthsOccupied sm on sr.MonthsOccupied=sm.MonthsOccupiedCd
where AppId=#AppId
If(CONVERT(int,LTRIM(RTrim(#strMonthsOccupied))) > 9)
Begin
set #strSQLClause ='AND QuestionCd!=Q8'
End
Else
set #strSQLClause =''
End
so that in my Query will work as
select * from SHSeleQuestions where MasterCompanyId='09' + #strSQLClause
But this approach is not working fine, can anyone please help me on this.
There are two ways to do this one is use dynamic sql or other one is below mention :
select *
from SHSeleQuestions
where MasterCompanyId='09' AND
1 = CASE WHEN LEN(#strSQLClause) > 0 AND QuestionCd != 'Q8' THEN 1
WHEN LEN(#strSQLClause) = 0 THEN 1 END
Using Dynamic SQL
EXEC('select * from SHSeleQuestions where MasterCompanyId=''09''' + #strSQLClause ')
You would need to use dynamic SQL, but why not just have two statements that execute SQL, so rather than set #strSQLClause = 'AND ...', simply have a select statement here with the condition(s) you need
IF (#QMasterCompanyId='09' AND #State='FL' AND (#LOB='HO' OR #LOB='HP'))
BEGIN
DECLARE #strMonthsOccupied CHAR(1)
SELECT DISTINCT #strMonthsOccupied = sm.MonthsOccupiedDesc
FROM HOStructureRating sr
INNER JOIN HOSeleMonthsOccupied sm
ON sr.MonthsOccupied=sm.MonthsOccupiedCd
WHERE AppId=#AppId;
IF(CONVERT(INT,LTRIM(RTRIM(#strMonthsOccupied))) > 9)
BEGIN
SELECT *
FROM SHSeleQuestions
WHERE MasterCompanyId='09'
AND QuestionCd!='Q8';
RETURN;
END
END
SELECT *
FROM SHSeleQuestions
WHERE MasterCompanyId='09';
That being said, there are so many issues with the above I don't really know where to begin. You declare your variable then assign it an indeterminate value:
DECLARE #strMonthsOccupied CHAR(1)
SELECT DISTINCT #strMonthsOccupied = sm.MonthsOccupiedDesc
FROM HOStructureRating sr
INNER JOIN HOSeleMonthsOccupied sm
ON sr.MonthsOccupied=sm.MonthsOccupiedCd
WHERE AppId=#AppId;
If the query returns multiple rows then there is no clear logic for which value the variable should be assigned. The nex issue is that this CHAR(1) variable is clearly a number based on your attempted conversion:
IF(CONVERT(INT,LTRIM(RTRIM(#strMonthsOccupied))) > 9)
Why not just cut out the middle man and declare an INT to begin with. The next point is that it is a CHAR(1) so isn't actually big enough to store anything greater than 9, so your above condition will never be true.
Even if sm.MonthsOccupiedDesc, was 10, the variable would simply be truncated to 1, which is smaller than 9, so fails the condition, e.g.
DECLARE #strMonthsOccupied CHAR(1) = '10';
IF(CONVERT(INT,LTRIM(RTRIM(#strMonthsOccupied))) > 9)
PRINT 'TRUE';
ELSE
PRINT 'FALSE';

Parsing / Indexing a Binary String in SQL Server

I have searched extensively for a relevant answer, but none quite satisfy what I need to be doing.
For our purposes I have a column with a 50 character binary string. In our database, it is actually hundreds of characters long.
There is one string for each unique item ID in our database. The location of each '1' flags a specific criteria being true, and a '0' false, so the indexed location of the ones and zeros are very important. Mostly, I care about where the 1's are.
I am not updating any databases, so I first decided to try and make a loop to look through each string and create a list of the 1's locations.
declare #binarystring varchar(50) = '10000010000110000001000000000000000000000000000001'
declare #position int = 0
declare #list varchar(200) = ''
while (#position <= len(#binarystring))
begin
set #position = charindex('1', #binarystring, #position)
set #list = #list + ', ' + convert(varchar(10),#position)
set #position = charindex('1', #binarystring, #position)+1
end
select right(#list, len(#list)-2)
This creates the following list:
1, 7, 12, 13, 20, 50
However, the loop will bomb if there is not a '1' at the end of the string, as I am searching through the string via occurrences of 1's rather than one character at a time. I am not sure how satisfy the break criteria when the loop would normally reach the end of the string, without there being a 1.
Is there a simple solution to my loop bombing, and should I even be looping in the first place?
I have tried other methods of parsing, union joining, indexing, etc, but given this very specific set of circumstances I couldn't find any combination that did quite what I needed. The above code is the best I've got so far.
I don't specifically need a comma delimited list as an output, but I need to know the location of all 1's within the string. The amount of 1's vary, but the string size is always the same.
This is my first time posting to stackoverflow, but I have used answers many times. I seek to give a clear question with relevant information. If there is anything I can do to help, I will try to fulfill any requests.
How about changing the while condition to this?
while (charindex('1', #binarystring, #position) > 0)
while (#position <= len(#binarystring))
begin
set #position = charindex('1', #binarystring, #position)
if #position != 0
begin
set #list = #list + ', ' + convert(varchar(10),#position)
set #position = charindex('1', #binarystring, #position)+1
end
else
begin
break
end;
end
It's often useful to have a source of large ranges of sequential integers handy. I have a table, dbo.range that has a single column, id containing all the sequential integers from -500,000 to +500,000. That column is a clustered primary key so lookups against are fast. With such a table, solving your problem is easy.
Assuming your table has a schema something like
create table dbo.some_table_with_flags
(
id int not null primary key ,
flags varchar(1000) not null ,
)
The following query should do you:
select row_id = t.id ,
flag_position = r.id
from dbo.some_table t
join dbo.range r on r.id between 1 and len(t.flags)
and substring(t.flags,r.id,1) = '1'
For each 1 value in the flags column, you'll get a row containing the ID from your source table's ID column, plus the position in which the 1 was found in flags.
There are a number of techniques for generating such sequences. This link shows several:
http://sqlperformance.com/2013/01/t-sql-queries/generate-a-set-1
For instance, you could use common table expressions (CTEs) to generate your sequences, like this:
WITH
s1(n) AS -- 10 (10^1)
( SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
UNION ALL SELECT 1
) ,
s2(n) as ( select 1 from s1 a cross join s1 b ) , -- 10^2 100
s3(n) as ( select 1 FROM s1 a cross join s2 b ) , -- 10^3 1,000
s4(n) as ( select 1 from s1 a cross join s3 b ) , -- 10^4 10,000
s5(n) as ( select 1 from s1 a cross join s4 b ) , -- 10^5 100,000
s6(n) as ( select 1 from s1 a cross join s5 b ) , -- 10^6 1,000,000
seq(n) as ( select row_number() over ( order by n ) from s6 )
select *
from dbo.some_table t
join seq s on s.n between 1 and len(t.flags)
and substring(t.flags,s.n,1) = '1'

Split words with a capital letter in sql

Does anyone know how to split words starting with capital letters from a string?
Example:
DECLARE #var1 varchar(100) = 'OneTwoThreeFour'
DECLARE #var2 varchar(100) = 'OneTwoThreeFourFive'
DECLARE #var3 varchar(100) = 'One'
SELECT #var1 as Col1, <?> as Col2
SELECT #var2 as Col1, <?> as Col2
SELECT #var3 as Col1, <?> as Col2
expected result:
Col1 Col2
OneTwoThreeFour One Two three Four
OneTwoThreeFourFive One Two Three Four Five
One One
If this is not possible (or if too long) an scalar function would be okay as well.
Here is a function I created that is similar to the "removing non-alphabetic characters". How to strip all non-alphabetic characters from string in SQL Server?
This one uses a case sensitive collation which actively seeks out a non-space/capital letter combination and then uses the STUFF function to insert the space. This IS a scalar UDF, so some folks will immediately say that it will be slower than other solutions. To that notion, I say, please test it. This function does not use any table data and only loops as many times as necessary, so it will likely give you very good performance.
Create Function dbo.Split_On_Upper_Case(#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^ ][A-Z]%'
While PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) + 1, 0, ' ')
Return #Temp
End
Call it like this:
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('One')
Select dbo.Split_On_Upper_Case('OneTwoThree')
Select dbo.Split_On_Upper_Case('stackOverFlow')
Select dbo.Split_On_Upper_Case('StackOverFlow')
Here is a function I have just created.
FUNCTION
CREATE FUNCTION dbo.Split_On_Upper_Case
(
#String VARCHAR(4000)
)
RETURNS VARCHAR(4000)
AS
BEGIN
DECLARE #Char CHAR(1);
DECLARE #i INT = 0;
DECLARE #OutString VARCHAR(4000) = '';
WHILE (#i <= LEN(#String))
BEGIN
SELECT #Char = SUBSTRING(#String, #i,1)
IF (#Char = UPPER(#Char) Collate Latin1_General_CS_AI)
SET #OutString = #OutString + ' ' + #Char;
ELSE
SET #OutString = #OutString + #Char;
SET #i += 1;
END
SET #OutString = LTRIM(#OutString);
RETURN #OutString;
END
Test Data
DECLARE #TABLE TABLE (Strings VARCHAR(1000))
INSERT INTO #TABLE
VALUES ('OneTwoThree') ,
('FourFiveSix') ,
('SevenEightNine')
Query
SELECT dbo.Split_On_Upper_Case(Strings) AS Vals
FROM #TABLE
Result Set
╔══════════════════╗
║ Vals ║
╠══════════════════╣
║ One Two Three ║
║ Four Five Six ║
║ Seven Eight Nine ║
╚══════════════════╝
If a single query is needed 26 REPLACE can be used to check every upper case letter like
SELECT #var1 col1, REPLACE(
REPLACE(
REPLACE(
...
REPLACE(#var1, 'A', ' A')
, ...
, 'X', ' X')
, 'Y', ' Y')
, 'Z', ' Z') col2
Not the most beautiful thing but it'll work.
EDIT
Just to add another function to do the same thing in a different way of the other answers
CREATE FUNCTION splitCapital (#param Varchar(MAX))
RETURNS Varchar(MAX)
BEGIN
Declare #ret Varchar(MAX) = '';
declare #len int = len(#param);
WITH Base10(N) AS (
SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7
UNION ALL SELECT 8 UNION ALL SELECT 9
), Chars(N) As (
Select TOP(#len)
nthChar
= substring(#param, u.N + t.N*10 + h.N*100 + th.N*1000 + 1, 1)
Collate Latin1_General_CS_AI
FROM Base10 u
CROSS JOIN Base10 t
CROSS JOIN Base10 h
CROSS JOIN Base10 th
WHERE u.N + t.N*10 + h.N*100 + th.N*1000 < #len
ORDER BY u.N + t.N*10 + h.N*100 + th.N*1000
)
SELECT #ret += Case nthChar
When UPPER(nthChar) Then ' '
Else ''
End + nthChar
FROM Chars
RETURN #ret;
END
This one uses the possibility of TSQL to concatenate string variable, I had to use the TOP N trick to force the Chars CTE rows in the right order
Build a Numbers table. There are some excellent posts on SO to show you how to do this. Populate it with values up the maximum length of your input string. Select the values from 1 through the actual length of the current input string. Cross join this list of numbers to the input string. Use the result to SUBSTRING() each character. Then you can either compare the resulting list of one-charachter values to a pre-populated table-valued variable or convert each character to an integer using ASCII() and choose only those between 65 ('A') and 90 ('Z'). At this point you have a list which is the position of each upper-case character in your input string. UNION the maximum length of your input string onto the end of this list. You'll see why in just a second. Now you can SUBSTRING() your input variable, starting at the Number given by row N and taking a length of (the Number given by row N+1) - (The number given by row N). This is why you have to UNION the extra Number on the end. Finally concatenate all these substring together, space-separated, using the algorithm of your choice.
Sorry, don't have an instance in front of me to try out code. Sounds like a fun task. I think doing it with nested SELECT statements will get convoluted and un-maintainable; better to lay it out as CTEs, IMHO.
I know that there are already some good answers out there, but if you wanted to avoid creating a function, you could also use a recursive CTE to accomplish this. It's certainly not a clean way of doing this, but it works.
DECLARE
#camelcase nvarchar(4000) = 'ThisIsCamelCased'
;
WITH
split
AS
(
SELECT
[iteration] = 0
,[string] = #camelcase
UNION ALL
SELECT
[iteration] = split.[iteration] + 1
,[string] = STUFF(split.[string], pattern.[index] + 1, 0, ' ')
FROM
split
CROSS APPLY
( SELECT [index] = PATINDEX(N'%[^ ][A-Z]%' COLLATE Latin1_General_Bin, split.[string]) )
pattern
WHERE
pattern.[index] > 0
)
SELECT TOP (1)
[spaced] = split.[string]
FROM
split
ORDER BY
split.[iteration] DESC
;
As I said, this isn't a pretty way to write a query, but I use things like this when I'm just writing up some ad-hoc queries where I would not want to add new artifacts to the database. You could also use this to create your function as an inline table valued function, which is always a tad nicer.
Please Try This:
declare #t nvarchar (100) ='IamTheTestString'
declare #len int
declare #Counter int =0
declare #Final nvarchar (100) =''
set #len =len( #t)
while (#Counter <= #len)
begin
set #Final= #Final + Case when ascii(substring (#t,#Counter,1))>=65 and
ascii(substring (#t,#Counter,1))<=90 then ' '+substring (#t,#Counter,1) else
substring (#t,#Counter,1) end
set #Counter=#Counter+1
end
print ltrim(#Final)

Find all strings that share at least X characters, order by likeness

I'm working on a project that has the names of various drugs. Often, I will find something like Proscratinol and Proscratinol XR (extended release). I would like to find a query to pick up on all the names of this nature so I can put the 'parent' drug in a table and have these 'child' drugs reference it so when I write a query to do drug counts, I'm not double counting Proscratinol because it has an XR, CR, and whatever else version to it. I wrote the following in order to take a stab at it
;with x
as
(
select drug_name
from rx
group by drug_name
)
select distinct *
from x,x as x2
where LEFT(x2.drug_name,5) = LEFT(x.drug_name,5)
and x.drug_name !=x2.drug_name
This will give me a list of all the drugs whose names share the first five letters. Five is completely arbitrary here. What I've got so far does good enough, but I would like to order the results by descending likeness. So I would like to find their X-most characters reading from the left are the same.
e.g. Phenytoin and Phepil would be 3 (their first three letters are the same)
;with x
as
(
select drug_name
from rx
group by drug_name
)
select x.drug_name as xDrugName
,x2.drug_name as x2DrugName
,case when LEFT(x2.drug_name,6) = LEFT(x.drug_name,6)
then LEN(left(x.drug_name,6)) else '0' end
from x,x as x2
where LEFT(x2.drug_name,5) = LEFT(x.drug_name,5)
and x.drug_name !=x2.drug_name
group by x.drug_name,x2.drug_name
Instead of hard coding an int into the left function in the above query, I need that integer expression to return how many similar characters the two strings share. Any good way to do this?
This approach uses a number generator and then just tests the length of overlap:
select x.drug_name, x2.drug_name, MAX(c.seqnum) as OverlapLen
from x cross join
x x2 cross join
(select ROW_NUMBER() over (order by (select NULL)) seqnum
from INFORMATION_SCHEMA.COLUMNS c
) c
where LEFT(x.drug_name, c.seqnum) = LEFT(x2.drug_name, c.seqnum) and
len(x.drug_name) >= c.seqnum and len(x2.drug_name) >= c.seqnum
group by x.drug_name, x.drug_name
order by x.drug_name, OverlapLen desc
This assumes that information_schema.columns has enough rows for the longer drug names.
This joins x to itself and then joins in a list of numbers. The where clause is checking three conditions: (1) that the left part of each drug name is the same up to seqnum; (2) that the length of each drug name is less than or equal to seqnum.
The aggregation then takes each pair and chooses the highest value of seqnum -- this should be the longest substring match.
you want longest common sequence. here is a SQL server implementation:
select dbo.lcs(#string1, #string2), len(#string1), len(#string2)
CREATE FUNCTION [dbo].[LCS]( #s varchar(MAX), #t varchar(MAX) )
RETURNS INT AS
BEGIN
DECLARE #d varchar(MAX), #LD INT, #m INT, #n INT, #i INT, #j INT,
#s_i NCHAR(1), #t_j NCHAR(1)
SET #n = LEN(#s)
IF #n = 0 RETURN 0
SET #m = LEN(#t)
IF #m = 0 RETURN 0
SET #d = REPLICATE(CHAR(0),(#n+1)*(#m+1))
SET #i = 1
WHILE #i <= #n BEGIN
SET #s_i = SUBSTRING(#s,#i,1)
SET #j = 1
WHILE #j <= #m BEGIN
SET #t_j = SUBSTRING(#t,#j,1)
IF #s_i = #t_j
SET #d = STUFF(#d,#j*(#n+1)+#i+1,1,
NCHAR(UNICODE(
SUBSTRING(#d, (#j-1)*(#n+1)+#i-1+1, 1)
)+1))
ELSE
SET #d = STUFF(#d,#j*(#n+1)+#i+1,1,CHAR(dbo.Max2(
UNICODE(SUBSTRING(#d,#j*(#n+1)+#i-1+1,1)),
UNICODE(SUBSTRING(#d,(#j-1)*(#n+1)+#i+1,1)))))
SET #j = #j+1
END
SET #i = #i+1
END
SET #LD = UNICODE(SUBSTRING(#d,#n*(#m+1)+#m+1,1))
RETURN #LD
END