Count number of repeated character in a given string - sql

How do I count the number of occurrences of repeated $ character in the given strings.
For ex:
String = '$$$$ABC$$$DE$$$' --> Answer is 4,3,3
String = '###$$%%ANE$$$$$' --> Answer is 2,5
I have no idea how to do it so did not do any attempts.
Thanks for your help.
For Reproducing:
DDL and Inserts:
Create table xyz(text varchar(200));
Insert into xyz values('$$$$ABC$$$DE$$$');
Insert into xyz values('###$$%%ANE$$$$$');
What I need to do: Count the repeated number of '$'
Desired output, based on the sample data in #1 above.
text = '$$$$ABC$$$DE$$$' --> Answer is 4,3,3
text = '###$$%%ANE$$$$$' --> Answer is 2,5
SQL Server version: Microsoft SQL Server 2019 (RTM) - 15.0.2000.5

Please try the following solution. It will work starting from SQL Server 2017 onwards.
It is based on use of the TRANSLATE() function, and XML and XQuery.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, tokens VARCHAR(30));
INSERT INTO #tbl (tokens) VALUES
('$$$$ABC$$$DE$$$'), --> Answer is 4,3,3
('###$$%%ANE$$$$$'); --> Answer is 2,5
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = SPACE(1);
;WITH cte AS
(
SELECT *
, REPLACE(TRANSLATE(tokens, '$', SPACE(1)),' ','') AS JunkCharacters
FROM #tbl
)
SELECT *
, REPLACE(TRY_CAST('<root><r><![CDATA[' +
REPLACE(TRANSLATE(tokens, TRIM(JunkCharacters), SPACE(LEN(TRIM(JunkCharacters)))), #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)
.query('
for $x in /root/r[text()]
return data(string-length($x))
').value('.', 'VARCHAR(20)'), SPACE(1), ',') AS CleansedTokensCounter
FROM cte;
Output
+----+-----------------+----------------+-----------------------+
| ID | tokens | JunkCharacters | CleansedTokensCounter |
+----+-----------------+----------------+-----------------------+
| 1 | $$$$ABC$$$DE$$$ | ABCDE | 4,3,3 |
| 2 | ###$$%%ANE$$$$$ | ###%%ANE | 2,5 |
+----+-----------------+----------------+-----------------------+

We can do this with a number of steps:
We use a tally/numbers table to shred the string into individual characters. The tally is calculated on the fly with a couple of cross-joins and ROW_NUMBER
We then calculate a grouping ID for each group of characters, using a standard gaps-and-islands technique: a windowed sum of each starting row
Filter down to the character we want, group it by ID and return a count of rows in each group.
This returns a new row for every group of $ characters
Create table xyz(text varchar(200));
Insert into xyz values('$$$$ABC$$$DE$$$');
Insert into xyz values('###$$%%ANE$$$$$');
WITH
L0 AS ( SELECT 1 AS c
FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1)) AS D(c) ),
L1 AS ( SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B ),
-- you can allow for larger strings with more cross-joins
Nums AS ( SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rownum
FROM L1 )
SELECT
xyz.[text],
r.numRepetitions
FROM xyz
CROSS APPLY (
SELECT numRepetitions = COUNT(*)
FROM (
SELECT TOP(LEN(xyz.[text]))
thisChar = SUBSTRING(xyz.[text], rownum, 1),
groupId = SUM(CASE WHEN rownum = 1 OR SUBSTRING(xyz.[text], rownum, 1) <> SUBSTRING(xyz.[text], rownum - 1, 1) THEN 1 ELSE 0 END)
OVER (ORDER BY rownum ROWS UNBOUNDED PRECEDING)
FROM Nums
ORDER BY rownum
) AS chars
WHERE thisChar = '$'
GROUP BY groupId
) AS r;
If you want a single comma-separated list of row-counts, you need to subquery again
CROSS APPLY (
SELECT numRepetitions = STRING_AGG(CAST(numRepetitions AS varchar(10)), ',')
FROM (
SELECT numRepetitions = COUNT(*)
FROM (
SELECT TOP(LEN(xyz.[text]))
thisChar = SUBSTRING(xyz.[text], rownum, 1),
groupId = SUM(CASE WHEN rownum = 1 OR SUBSTRING(xyz.[text], rownum, 1) <> SUBSTRING(xyz.[text], rownum - 1, 1) THEN 1 ELSE 0 END)
OVER (ORDER BY rownum ROWS UNBOUNDED PRECEDING)
FROM Nums
ORDER BY rownum
) AS chars
WHERE thisChar = '$'
GROUP BY groupId
) AS groups
) AS r;

Related

SQL get average of a list in sql select

We have this column in the table named "pricehistory"
1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17
that is an example data.
first is the timestamp than after ; is the price at this timestamp
But i want the average price from every timestamp in a select... is that possible?
I dont find any similiar examples somewhere and my tries to select doesnt work... i am not so good with sql
so i want average of all prices behind that ; and before ,
The , split the timestamp and prices
Some test data :
create table test ( id int not null, pricehistory text not null );
insert into test values ( 1, '1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17' );
insert into test values ( 2, '1634913731;42.42,1634916609;21.21' );
If your RDBMS has some splitting function
Then it's quite easy, just split and use AVG. Here is an example using PostgreSQL :
SELECT id, AVG(SUBSTRING(v, 12, 42)::decimal) AS average
FROM test
INNER JOIN LATERAL regexp_split_to_table(pricehistory, E',') t(v) ON TRUE
GROUP BY id;
Then you get:
id | average
----+----------------------
2 | 31.8150000000000000
1 | 238.5016666666666667
(2 rows)
Otherwise
You can use a CTE to split the values manually. This is a bit more involved. Here is an example using PostgreSQL again :
WITH RECURSIVE T AS (
SELECT id,
-- We get the last value ...
SUBSTRING(pricehistory, LENGTH(pricehistory) - STRPOS(REVERSE(pricehistory), ',') + 2) AS oneprice,
pricehistory AS remaining
FROM test
UNION ALL
-- ... as we get the other values from the recursive CTE.
SELECT id,
LEFT(remaining, STRPOS(remaining, ',') - 1),
SUBSTRING(remaining, STRPOS(remaining, ',') + 1)
FROM T
WHERE STRPOS(remaining, ',') > 0
)
SELECT id, AVG(SUBSTRING(oneprice, 12)::decimal) AS average
FROM T
GROUP BY id;
Then you get:
id | average
----+----------------------
2 | 31.8150000000000000
1 | 238.5016666666666667
(2 rows)
MySql >= 8.0
I used Recursive Common Table Expressions (cte) to split pricehistory string by ','. Then I split price from timestamp by ';', cast price as decimal(10,2) and group by id to get average price by id.
WITH RECURSIVE
cte AS (SELECT id,
SUBSTRING_INDEX(pricehistory, ',', 1) AS price,
CASE WHEN POSITION(',' IN pricehistory) > 0
THEN SUBSTR(pricehistory, POSITION(',' IN pricehistory) + 1)
ELSE NULL END AS rest
FROM t
UNION ALL
SELECT id,
SUBSTRING_INDEX(rest, ',', 1) AS price,
CASE WHEN POSITION(',' IN rest) > 0
THEN SUBSTR(rest, POSITION(',' IN rest) + 1)
ELSE NULL END AS rest
FROM cte
WHERE rest IS NOT NULL)
SELECT id, AVG(CAST(SUBSTR(price, POSITION(';' IN price) + 1) AS decimal(10,2))) AS price_average
FROM cte
GROUP BY id;
A similar way to do the same (using regular expressions functions):
WITH RECURSIVE
cte AS (SELECT Id, concat(pricehistory, ',') AS pricehistory FROM t),
unnest AS (SELECT id,
pricehistory,
1 AS i,
REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, 1) AS price
FROM cte
UNION ALL
SELECT id,
pricehistory,
i + 1,
REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, i + 1)
FROM unnest
WHERE REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, i + 1) IS NOT NULL)
SELECT id, AVG(CAST(SUBSTR(price, 2, LENGTH(price) - 2) AS decimal(10,2))) AS price_average
FROM unnest
GROUP BY id;
you don't write what DBMS you are using.
In MS SQL-SERVER you can write something like this.
Create a function to convert string to multiple rows, and then use that in the query.
CREATE or ALTER FUNCTION dbo.BreakStringIntoRows (#CommadelimitedString varchar(1000), #Separator VARCHAR(1))
RETURNS #Result TABLE (Column1 VARCHAR(max))
AS
BEGIN
DECLARE #IntLocation INT
WHILE (CHARINDEX(#Separator, #CommadelimitedString, 0) > 0)
BEGIN
SET #IntLocation = CHARINDEX(#Separator, #CommadelimitedString, 0)
INSERT INTO #Result (Column1)
--LTRIM and RTRIM to ensure blank spaces are removed
SELECT RTRIM(LTRIM(SUBSTRING(#CommadelimitedString, 0, #IntLocation)))
SET #CommadelimitedString = STUFF(#CommadelimitedString, 1, #IntLocation, '')
END
INSERT INTO #Result (Column1)
SELECT RTRIM(LTRIM(#CommadelimitedString))--LTRIM and RTRIM to ensure blank spaces are removed
RETURN
END
create table test1 ( id int not null, pricehistory varchar(max) not null );
insert into test1 values ( 1, '1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17' );
insert into test1 values ( 2, '1634913731;42.42,1634916609;21.21' );
Select *,
(
Select avg(CAST(RTRIM(LTRIM(SUBSTRING(column1, 0, CHARINDEX(';', column1, 0)))) as decimal)) From dbo.BreakStringIntoRows(pricehistory, ',')
) as AVG
FRom test1
sample output:

SQL Server Loop thru rows to form Groups

I using SQL Server 2008 R2 / 2014. I wish to find a SQL query that can do the following:
Rules:
Each [Group] must have [Number] 1 to 6 to be complete group.
[Name] in each [Group] must be unique.
Each row only can use 1 time.
Table before sorting is...
Name Number Group
---- ------ -----
A 1
B 6
A 123
C 3
B 4
C 23
D 45
D 4
C 56
A 12
D 56
After sorting, result I want is below or similar....
Name Number Group
---- ------ -----
A 1 1
C 23 1
D 45 1
B 6 1
A 123 2
D 4 2
C 56 2
A 12 3
C 3 3
B 4 3
D 56 3
What I tried before is to find a subgroup that have [Number] consist of 1-6 with below concatenate method...
SELECT *
FROM [Table1] ST2
WHERE
SUBSTRING((SELECT ST1.[Number] AS [text()]
FROM [Table1] ST1
-- WHERE ST1.[Group] = ST2.[Group]
ORDER BY LEFT(ST1.[Number],1)
FOR XML PATH ('')), 1, 1000) = '123456'
Maybe you should check ROW_NUMBER function.
select Name
, Number
, ROW_NUMBER () OVER(PARTITION BY Name ORDER BY Number) as Group
from [Table1]
If you have more than 6 rows with same NAME value then it will return more groups. You can filter additional groups out since you are interested in only 6 groups with unique values of NAME column.
I'm not sure if this can be done more simply or not, but here's my go at it...
Advanced warning, this requires some means of splitting strings. Since you're not on 2016, I've included a function at the beginning of the script.
The bulk of the work is a recursive CTE that builds the Name and Number columns into comma delimited groups. We then reduce our working set to only the groups where the numbers would create 123456, split the groups and use ROW_NUMBER() OVER... to identify them, and then select based on the new data.
Demo: http://rextester.com/NEXG53500
CREATE FUNCTION [dbo].[SplitStrings]
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
(
SELECT Item = y.i.value('(./text())[1]', 'nvarchar(4000)')
FROM
(
SELECT x = CONVERT(XML, '<i>'
+ REPLACE(#List, #Delimiter, '</i><i>')
+ '</i>').query('.')
) AS a CROSS APPLY x.nodes('i') AS y(i)
);
GO
CREATE TABLE #temp
(
name VARCHAR(MAX),
number INT
)
INSERT INTO #temp
VALUES
('a',1),
('b',6),
('a',123),
('c',3),
('b',4),
('c',23),
('d',45),
('d',4),
('c',56),
('a',12),
('d',56);
/*** Recursively build groups based on information from #temp ***/
WITH groupFinder AS
(
SELECT CAST(name AS VARCHAR(MAX)) AS [groupNames], CAST(number AS VARCHAR(max)) AS [groupNumbers] FROM #temp
UNION ALL
SELECT
cast(CONCAT(t.[Name],',',g.[groupNames]) as VARCHAR(MAX)),
CAST(CONCAT(CAST(t.[Number] AS VARCHAR(max)),',',CAST(g.[groupNumbers] AS VARCHAR(max))) AS VARCHAR(max))
FROM #temp t
JOIN groupFinder g
ON
g.groupNames NOT LIKE '%' + t.name+'%'
AND g.[groupNumbers] NOT LIKE '%' + CAST(t.number/100 AS VARCHAR(10)) +'%'
AND g.[groupNumbers] NOT LIKE '%' + CAST(t.number/10 AS VARCHAR(10)) +'%'
AND g.[groupNumbers] NOT LIKE '%' + CAST(t.number%10 AS VARCHAR(10)) +'%'
)
/*** only get groups where the numbers form 123456 ***/
, groupPruner AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY [groupNames]) AS [rn] FROM groupFinder WHERE REPLACE([groupNumbers],',','') = '123456'
)
/*** split the name group and give it identifiers ***/
, nameIdentifier AS
(
SELECT g.*, c1.[item] AS [Name], ROW_NUMBER() OVER (PARTITION BY [rn] ORDER BY (SELECT NULL)) AS [rn1]
FROM groupPruner g
CROSS APPLY splitstrings(g.groupnames,',') c1
)
/*** split the number group and give it identifiers ***/
, numberIdentifier AS
(
SELECT g.*, c1.[item] AS [Number], ROW_NUMBER() OVER (PARTITION BY [rn], [rn1] ORDER BY (SELECT NULL)) AS [rn2]
FROM nameIdentifier g
CROSS APPLY splitstrings(g.groupNumbers,',') c1
)
SELECT [Name], [Number], [rn] AS [Group]
--,groupnames, groupNumbers /*uncomment this line to see the groups that were built*/
FROM numberIdentifier
WHERE rn1 = rn2
ORDER BY rn, rn1
DROP TABLE #temp

SQL How to return column value based in other substring position delimited by commas

My first post! I hope that you help me :)
I'm working in SQL 2017 and I have a table like this:
+----+------------------+------------------+
| ID | Col1 | Col2 |
+-----+------------------+------------------+
| 110 | 450,2,50,110,600 | 3,45,30,901,1001 |
| 250 | 2,250,300,1 | 1,33,540,900 |
| 45 | 1,45,320 | 200,444,600 |
+-----+------------------+------------------+
The logic is to find the ID position in Col1 and return based in that position the substring in Col2.
Example:
ID 110 match 4th position in Col1 so should return 901 value in Col2.
ID 250 match 2nd position in Col1 so should return 33 value in Col2.
ID 45 match 2nd position in Col1 so should return 400 value in Col2.
I made different attempts without any success, probably I'm in wrong direction.
Can you please help with this?
The output that I want is the specific values from Col2.
Thanks!
For SQL Server 2016+ (I'm not going to do one for earlier because of STRING_SPLIT support
DECLARE #BadDesign table (ID int, Col1 varchar(200), Col2 varchar(200));
INSERT #BadDesign VALUES
(110,'450,2,50,110,600', '3,45,30,901,1001'),
(250,'2,250,300,1', '1,33,540,900'),
(45 ,'1,45,320', '200,444,600')
SELECT
*
FROM
#BadDesign B
CROSS APPLY
(SELECT
rn = ROW_NUMBER() OVER (ORDER BY (SELECT 1)), value
FROM
STRING_SPLIT(B.Col1, ',')
) b1
CROSS APPLY
(SELECT
rn = ROW_NUMBER() OVER (ORDER BY (SELECT 1)), value
FROM
STRING_SPLIT(B.Col2, ',')
) b2
WHERE
B.ID = b1.value AND b1.rn = b2.rn
No guarantees on ROW_NUMBER consistency over the output of STRING_SPLIT.
Edit: also requires database compatibility to be 130 or above (SQL Server 2016)
The STRING_SPLIT function is available only under compatibility level
130. If your database compatibility level is lower than 130, SQL Server will not be able to find and execute STRING_SPLIT function. You
can change a compatibility level of database using the following
command: ALTER DATABASE DatabaseName SET COMPATIBILITY_LEVEL = 130
Using a Custom String Split Function (this answer is using one written by Aaron Bertrand), so not restricting the use on SQL2016+
CREATE FUNCTION dbo.SplitStringsOrdered (
#List NVARCHAR(2000)
, #Delimiter NVARCHAR(32)
)
RETURNS TABLE
AS
RETURN (
SELECT
rn = ROW_NUMBER() OVER (ORDER BY Number)
, Item
FROM
(
SELECT
Number
, Item = LTRIM(RTRIM(SUBSTRING(
#List
, Number
, CHARINDEX(#Delimiter, #List + #Delimiter, Number) - Number
)
)
)
FROM
(
SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
FROM
sys.all_objects
) AS n(Number)
WHERE
Number <= CONVERT(INT, LEN(#List))
AND SUBSTRING(#Delimiter + #List, Number, LEN(#Delimiter)) = #Delimiter
) AS y
);
GO
And amending the query created by #gbn in his/her answer - is this allowed on SO?
DECLARE #BadDesign table (ID int, Col1 varchar(200), Col2 varchar(200));
INSERT #BadDesign VALUES
(110,'450,2,50,110,600', '3,45,30,901,1001'),
(250,'2,250,300,1', '1,33,540,900'),
(45 ,'1,45,320', '200,444,600')
SELECT
B.*, Col1Value=b1.Item, Cal2Value = B2.Item
FROM
#BadDesign B
CROSS APPLY
(SELECT
rn = ROW_NUMBER() OVER (ORDER BY (SELECT 1)), F.Item
FROM
dbo.SplitStringsOrdered(B.Col1, ',') F
) b1
CROSS APPLY
(SELECT
rn = ROW_NUMBER() OVER (ORDER BY (SELECT 1)), F1.Item
FROM
dbo.SplitStringsOrdered(B.Col2, ',') F1
) b2
WHERE
b1.rn = b2.rn

SQL split-string as (key-identity,value)

I've added a function to my DB that splits a comma separated string into separate rows.
Now in my string I have: 1,55,2,56,3,57,etc... where (1) is the rowID and (55) the value I want to enter into row 1 of my table.
How can I modify this function to pull the 1st,3rd,5th,etc... values and 2nd,4th,6th,etc... values into two different columns?
CREATE FUNCTION dbo.SplitStringToValues
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS ( SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1),
E2(N) AS (SELECT 1 FROM E1 a, E1 b),
E4(N) AS (SELECT 1 FROM E2 a, E2 b),
E42(N) AS (SELECT 1 FROM E4 a, E2 b),
cteTally(N) AS (SELECT 0 UNION ALL SELECT TOP (DATALENGTH(ISNULL(#List,1)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E42),
cteStart(N1) AS (SELECT t.N+1 FROM cteTally t
WHERE (SUBSTRING(#List,t.N,1) = #Delimiter OR t.N = 0))
SELECT Item = SUBSTRING(#List, s.N1, ISNULL(NULLIF(CHARINDEX(#Delimiter,#List,s.N1),0)-s.N1,8000))
FROM cteStart s;
go
-------------- Update
Thanks everyone for your examples. I'm going to try out each of these until I get something working. I will accept once i figure out which on I can make work.
Thank you,
Alexp
An attempt to help with batch script; please try it out:
DECLARE #List NVARCHAR(MAX) = '1,55,2,56,3,57,10,65,11,88';
DECLARE #Delimiter NVARCHAR(255) = ',';
DECLARE #ListDataTable TABLE
(
ID INT IDENTITY (1, 1)
,DataKey INT
,DataValue INT
)
INSERT INTO #ListDataTable (DataKey, DataValue)
SELECT
value
,LEAD(value, 1, 0) OVER(ORDER BY (SELECT 1))
FROM STRING_SPLIT(#List, #Delimiter) WHERE RTRIM(value) <> '';
-- To get odd key values
SELECT * FROM
(
SELECT DataKey, DataValue FROM #ListDataTable WHERE ID % 2 = 1
) Temp WHERE DataKey % 2 = 1;
-- To get even key values
SELECT * FROM
(
SELECT DataKey, DataValue FROM #ListDataTable WHERE ID % 2 = 1
) Temp WHERE DataKey % 2 = 0;
Modify your function to return two columns: the position and the value. This is easy enough and keeps the function general purpose. Just change the select to:
SELECT Item = SUBSTRING(#List, s.N1, ISNULL(NULLIF(CHARINDEX(#Delimiter, #List, s.N1), 0) - s.N1, 8000)),
ItemNum = row_number() over (order by s.N1)
FROM cteStart s;
Then you can use to get the information you want. Here is one method:
select max(case when ItemNum % 2 = 1 then Item end) as rownum,
max(case when ItemNum % 2 = 0 then Item end) as value
from dbo.SplitStringToValues('1,55,2,56,3,57', ',')
group by (ItemNum - 1) / 2
#Macwise was on to something with LEAD - you could do this:
SELECT rownum = item, value
FROM
(
SELECT itemnumber, item, value = LEAD(item,1) OVER (ORDER BY itemnumber)
FROM dbo.SplitStringToValues('1,44,2,55,3,456,4,123,5,0', ',')
) split
WHERE 1 = itemnumber%2;
Gordon's solution is the best, most elegant pre-2012 solution. Here's another pre-2012 solution that does not require a sort in the execution plan:
SELECT rownum = s1.Item, value = s2.Item
FROM DelimitedSplit8K(#string, ',') s1
INNER MERGE JOIN SplitStringToValues('1,44,2,55,3,456,4,123,5,0', ',') s2
ON 1 = s1.itemNumber % 2 AND s1.ItemNumber = s2.ItemNumber-1;
Instead of changing that function, to get the next row's value next to the id use the LEAD function introduced in SQL SERVER 2012:
SELECT Id, Value
FROM (SELECT
ROW_NUMBER() over (order by(select 1)) as cnt,
t.item AS Id,
Lead(t.item)
OVER (
ORDER BY (SELECT 1)) Value
FROM dbo.Splitstringtovalues('10,20,30,40,50,10,20,30,40,50,60,70', ',')
t)
keyValue
WHERE keyValue.value IS NOT NULL
and cnt % 2 = 1

Count Of Distinct Characters In Column

Say I have the following data set
Column1 (VarChar(50 or something))
Elias
Sails
Pails
Plane
Games
What I'd like to produce from this column is the following set:
LETTER COUNT
E 3
L 4
I 3
A 5
S 5
And So On...
One solution I thought of was combining all strings into a single string, and then count each instance of the letter in that string, but that feels sloppy.
This is more an exercise of curiosity than anything else, but, is there a way to get a count of all distinct letters in a dataset with SQL?
I would do this by creating a table of your letters similar to:
CREATE TABLE tblLetter
(
letter varchar(1)
);
INSERT INTO tblLetter ([letter])
VALUES
('a'),
('b'),
('c'),
('d'); -- etc
Then you could join the letters to your table where your data is like the letter:
select l.letter, count(n.col) Total
from tblLetter l
inner join names n
on n.col like '%'+l.letter+'%'
group by l.letter;
See SQL Fiddle with Demo. This would give a result:
| LETTER | TOTAL |
|--------|-------|
| a | 5 |
| e | 3 |
| g | 1 |
| i | 3 |
| l | 4 |
| m | 1 |
| p | 2 |
| s | 4 |
If you create a table of letters, like this:
create table letter (ch char(1));
insert into letter(ch) values ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H')
,('I'),('J'),('K'),('L'),('M'),('N'),('O'),('P')
,('Q'),('R'),('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z');
you could do it with a cross join, like this:
select ch, SUM(len(str) - len(replace(str,ch,'')))
from letter
cross join test -- <<== test is the name of the table with the string
group by ch
having SUM(len(str) - len(replace(str,ch,''))) <> 0
Here is a running demo on sqlfiddle.
You can do it without defining a table by embedding a list of letters into a query itself, but the idea of cross-joining and grouping by the letter would remain the same.
Note: see this answer for the explanation of the expression inside the SUM.
To me, this is a problem almost tailored for a CTE (Thanks, Nicholas Carey, for the original, my fiddle here: http://sqlfiddle.com/#!3/44f77/8):
WITH cteLetters
AS
(
SELECT
1 AS CharPos,
str,
MAX(LEN(str)) AS MaxLen,
SUBSTRING(str, 1, 1) AS Letter
FROM
test
GROUP BY
str,
SUBSTRING(str, 1, 1)
UNION ALL
SELECT
CharPos + 1,
str,
MaxLen,
SUBSTRING(str, CharPos + 1, 1) AS Letter
FROM
cteLetters
WHERE
CharPos + 1 <= MaxLen
)
SELECT
UPPER(Letter) AS Letter,
COUNT(*) CountOfLetters
FROM
cteLetters
GROUP BY
Letter
ORDER BY
Letter;
Use the CTE to calculate character positions and deconstruct each string. Then you can just aggregate from the CTE itself. No need for additional tables or anything.
This should work even if you have case sensitivity turned on.
The setup:
CREATE TABLE _test ( Column1 VARCHAR (50) )
INSERT _test (Column1) VALUES ('Elias'),('Sails'),('Pails'),('Plane'),('Games')
The work:
DECLARE #counter AS INT
DECLARE #results TABLE (LETTER VARCHAR(1),[COUNT] INT)
SET #counter=65 --ascii value for 'A'
WHILE ( #counter <=90 ) -- ascii value for 'Z'
BEGIN
INSERT #results (LETTER,[COUNT])
SELECT CHAR(#counter),SUM(LEN(UPPER(Column1)) - LEN(REPLACE(UPPER(Column1), CHAR(#counter),''))) FROM _test
SET #counter=#counter+1
END
SELECT * FROM #results WHERE [Count]>0
It's often useful to have a range or sequence table that gives you a source of large runs of contiguous sequential numbers, like this one covering the range -100,000–+100,000.
drop table dbo.range
go
create table dbo.range
(
id int not null primary key clustered ,
)
go
set nocount on
go
declare #i int = -100000
while ( #i <= +100000 )
begin
if ( #i > 0 and #i % 1000 = 0 ) print convert(varchar,#i) + ' rows'
insert dbo.range values ( #i )
set #i = #i + 1
end
go
set nocount off
go
Once you have such a table, you can do something like this:
select character = substring( t.some_column , r.id , 1 ) ,
frequency = count(*)
from dbo.some_table t
join dbo.range r on r.id between 1 and len( t.some_column )
group by substring( t.some_column , r.id , 1 )
order by 1
If you want to ensure case-insensitivity, just mix in the desired upper() or lower():
select character = upper( substring( t.some_column , r.id , 1 ) ) ,
frequency = count(*)
from dbo.some_table t
join dbo.range r on r.id between 1 and len( t.some_column )
group by upper( substring( t.some_column , r.id , 1 ) )
order by 1
Given your sample data:
create table dbo.some_table
(
some_column varchar(50) not null
)
go
insert dbo.some_table values ( 'Elias' )
insert dbo.some_table values ( 'Sails' )
insert dbo.some_table values ( 'Pails' )
insert dbo.some_table values ( 'Plane' )
insert dbo.some_table values ( 'Games' )
go
The latter query above produces the following results:
character frequency
A 5
E 3
G 1
I 3
L 4
M 1
N 1
P 2
S 5