Query rows where first_name contains at least 2 vowels, and the number of occurences of each vowel is equal - sql

I have the following problem: Show all rows in table where column first_name contains at least 2 vowels (a, e, i, o, u), and the number of occurences of each vowel is the same.
Valid example: Alexander, "e" appears 2 times, "a" appears 2 times. That is coreect.
Invalid example: Jonathan, it has 2 vowels (a, o), but "o" appears once, and "a" appears twice, the number of occurences is not equal.
I've solved this problem by calculating each vowel, and then verify every case (A E, A I, A O etc. Shortly, each combination of 2, 3, 4, 5). With that solution, I have a very long WHERE. Is there any shorter way and more elegant and simple?

This is how I solved it in TSQL in MS SQL Server 2019.
I know its not exactly what you wanted. Just an interesting thing to try. Thanks for that.
DROP TABLE IF EXISTS #Samples
SELECT n.Name
INTO #Samples
FROM
(
SELECT 'Ravi' AS Name
UNION
SELECT 'Tim'
UNION
SELECT 'Timothe'
UNION
SELECT 'Ian'
UNION
SELECT 'Lijoo'
UNION
SELECT 'John'
UNION
SELECT 'Jami'
) AS n
SELECT g.Name,
IIF(MAX (g.Repeat) = MIN (g.Repeat) AND SUM (g.Appearance) >= 2, 'Valid', 'Invalid') AS Validity
FROM
(
SELECT v.value,
s.Name,
SUM (LEN (s.Name) - LEN (REPLACE (s.Name, v.value, ''))) AS Repeat,
SUM (IIF(s.Name LIKE '%' + v.value + '%', 1, 0)) AS Appearance
FROM STRING_SPLIT('a,e,i,o,u', ',') AS v
CROSS APPLY #Samples AS s
GROUP BY v.value,
s.Name
) AS g
WHERE g.Repeat > 0
GROUP BY g.Name
Output
we can replace STRING_SPLIT with a temp table for supporting lower versions
DROP TABLE IF EXISTS #Vowels
SELECT C.Vowel
INTO #Vowels
FROM
(
SELECT 'a' AS Vowel
UNION
SELECT 'e'
UNION
SELECT 'i'
UNION
SELECT 'o'
UNION
SELECT 'u'
) AS C
SELECT g.Name,
IIF(MAX (g.Repeat) = MIN (g.Repeat) AND SUM (g.Appearance) >= 2, 'Valid', 'Invalid') AS Validity
FROM
(
SELECT v.Vowel,
s.Name,
SUM (LEN (s.Name) - LEN (REPLACE (s.Name, v.Vowel, ''))) AS Repeat,
SUM (IIF(s.Name LIKE '%' + v.Vowel + '%', 1, 0)) AS Appearance
FROM #Vowels AS v
CROSS APPLY #Samples AS s
GROUP BY v.Vowel,
s.Name
) AS g
WHERE g.Repeat > 0
GROUP BY g.Name

From Oracle 12, you can use:
SELECT name
FROM table_name
CROSS JOIN LATERAL(
SELECT 1
FROM (
-- Step 2: Count the frequency of each vowel
SELECT letter,
COUNT(*) As frequency
FROM (
-- Step 1: Find all the vowels
SELECT REGEXP_SUBSTR(LOWER(name), '[aeiou]', 1, LEVEL) AS letter
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(LOWER(name), '[aeiou]')
)
GROUP BY letter
)
-- Step 3: Filter out names where the number of vowels are
-- not equal or the vowels do not occur at least twice
-- and there are not at least 2 different vowels.
HAVING MIN(frequency) >= 2
AND MIN(frequency) = MAX(frequency)
AND COUNT(*) >= 2
);
Which, for the sample data:
CREATE TABLE table_name (name) AS
SELECT 'Alexander' FROM DUAL UNION ALL
SELECT 'Johnaton' FROM DUAL UNION ALL
SELECT 'Anna' FROM DUAL;
Outputs:
NAME
Alexander
db<>fiddle here

Related

Is there an Oracle Function to determine the number of repeat character

I need to validate the number of repeat character in a email.
I try the next code who give me the percentage of repeat character, but only work if character are next to each other. So one posibily its order the email by character to get my result.
SELECT
round(((REGEXP_COUNT(regexp_replace(SUBSTR('999824123#HOTMAIL.COM',1,INSTR('989824123#HOTMAIL.COM', '#', 1)-1), '(.)\1+','&'),'&')+length(SUBSTR('989824123#HOTMAIL.COM',1,INSTR('989824123#HOTMAIL.COM', '#', 1)-1)) - length(regexp_replace(SUBSTR('989824123#HOTMAIL.COM',1,INSTR('989824123#HOTMAIL.COM', '#', 1)-1), '(.)\1+','\1')))* 100)/length(SUBSTR('989824123#HOTMAIL.COM',1,INSTR('989824123#HOTMAIL.COM', '#', 1)-1)),2) AS PORCENTAJE_IGUAL
FROM DUAL A;
I expect 60% of repeat character for this email 989824123#HOTMAIL.COM. not incluing domain.
please Help.
PD: sorry for the bad english
Numbers 9, 8, 2 repeats in email, so we have 6 characters (9, 9, 8, 8, 2, 2) which repeats and 3 unique (1, 3, 4). 6/9 gives us 66,67%.
You can use this query to count this:
with
t(email) as (select '989824123#hotmail.com' from dual),
a(email) as (select substr(email, 1,instr(email, '#', 1)-1) from t),
l as (select substr(email, level, 1) ltr from a connect by level <= length(email))
select sum(case when cnt <> 1 then cnt end) / sum(cnt)
from (select ltr, count(1) cnt from l group by ltr)
I cut domain, then in subquery l I divided string into one-letter rows, rest was only to count non-unique chars and divide by number of all chars.
edit:
how do you apply something like this in a update or select for a large
scale data base with many email?
You can create function:
create or replace function rpt_similarity(i_email in varchar2) return number is
v_email varchar2(100);
v_ret number;
begin
v_email := substr(i_email, 1, instr(i_email, '#', 1) - 1);
with l as (
select substr(v_email, level, 1) ltr
from dual
connect by level <= length(v_email))
select sum(case when cnt <> 1 then cnt end) / sum(cnt)
into v_ret
from (select ltr, count(1) cnt from l group by ltr);
return v_ret;
end;
and use it like here:
select rpt_similarity('abxabc#pqr.com') from dual;
or:
select rpt_similarity(email) from your_table;
Also you can use above solution in select directly, without function, here is the example:
create table test(id, email) as (
select 101, '989824123#hotmail.com' from dual union all
select 102, 'hsimpson#gmail.com' from dual union all
select 103, 'msimpson#gmail.com' from dual union all
select 104, 'bsimpson121314#hotmail.com' from dual union all
select 105, 'abxabx#hotmail.com' from dual );
with
a(id, email) as (select id, substr(email, 1,instr(email, '#', 1)-1) from test),
l as (
select id, email, substr(email, level, 1) ltr from a
connect by level <= length(email)
and prior id = id and prior sys_guid() is not null)
select id, email, sum(case when cnt <> 1 then cnt end) / sum(cnt)
from (select id, email, ltr, count(1) cnt from l group by id, ltr, email)
group by id, email;
connect by queries tends to be slow for large sets of data. Maybe you can adapt your regexp functions and it will be faster. I tried to do it, but your regexp_replace changes 99 into $ and 999 also into one $.

In SQL sort by Alphabets first then by Numbers

In H2 Database when i have applied order by on varchar column Numbers are coming first then Alphabets. But need to come Alphabets first then Numbers.
I have tried with
ORDER BY IF(name RLIKE '^[a-z]', 1, 2), name
but getting error like If condition is not available in H2.
My Column Data is Like
A
1-A
3
M
2-B
5
B-2
it should come like
A
B-2
M
1-A
2-B
3
5
try this out
SELECT MYCOLUMN FROM MYTABLE ORDER BY REGEXP_REPLACE (MYCOLUMN,'(*)(\d)(*)','}\2') , MYCOLUMN
One thing can be done is by altering the ASCII in order by clause.
WITH tab
AS (SELECT 'A' col FROM DUAL
UNION ALL
SELECT '1-A' FROM DUAL
UNION ALL
SELECT '3' FROM DUAL
UNION ALL
SELECT 'M' FROM DUAL
UNION ALL
SELECT '2-B' FROM DUAL
UNION ALL
SELECT '5' FROM DUAL
UNION ALL
SELECT 'B-2' FROM DUAL)
SELECT col
FROM tab
ORDER BY CASE WHEN SUBSTR (col, 1, 1) < CHR (58) THEN CHR (177) || col ELSE col END;
I have Used CHR(58) as ASCII value of numbers end at 57. and CHR(177) is used as this is the maximum in the ASCII table.
FYR : ASCII table
Given the example dataset, I'm not sure if you need further logic than this- so I'll refrain from making further assumptions:
DECLARE #temp TABLE (myval char(3))
INSERT INTO #temp VALUES
('A'), ('1-A'), ('3'), ('M'), ('2-B'), ('5'), ('B-2')
SELECT myval
FROM #temp
ORDER BY CASE WHEN LEFT(myval, 1) LIKE '[a-Z]'
THEN 1
ELSE 2
END
,LEFT(myval, 1)
Gives output:
myval
A
B-2
M
1-A
2-B
3
5

Check palindrome without using string functions with condition

I have a table EmployeeTable.
If I want only that records where employeename have character of 1 to 5
will be palindrome and there also condition like total character is more then 10 then 4 to 8 if character less then 7 then 2 to 5 and if character less then 5 then all char will be checked and there that are palindrome then only display.
Examples :- neen will be display
neetan not selected
kiratitamara will be selected
I try this something on string function like FOR first case like name less then 5 character long
SELECT SUBSTRING(EmployeeName,1,5),* from EmaployeeTable where
REVERSE (SUBSTRING(EmployeeName,1,5))=SUBSTRING(EmployeeName,1,5)
I want to do that without string functions,
Can anyone help me on this?
You need at least SUBSTRING(), I have a solution like this:
(In SQL Server)
DECLARE #txt varchar(max) = 'abcba'
;WITH CTE (cNo, cChar) AS (
SELECT 1, SUBSTRING(#txt, 1, 1)
UNION ALL
SELECT cNo + 1, SUBSTRING(#txt, cNo + 1, 1)
FROM CTE
WHERE SUBSTRING(#txt, cNo + 1, 1) <> ''
)
SELECT COUNT(*)
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY cNo DESC) as cRevNo
FROM CTE t1 CROSS JOIN
(SELECT Max(cNo) AS strLength FROM CTE) t2) dt
WHERE
dt.cNo <= dt.strLength / 2
AND
dt.cChar <> (SELECT dti.cChar FROM CTE dti WHERE dti.cNo = cRevNo)
The result will shows the count of differences and 0 means no differences.
Note :
Current solution is Non-Case-Sensitive for change it to a Case-Sensitive you need to check the strings in a case-sensitive collation like Latin1_General_BIN
You can use this solution as a SVF or something like that.
I dont realy understand why you dont want to use string functions in your query, but here is one solution. Compute everything beforehand:
Add Column:
ALTER TABLE EmployeeTable
ADD SubString AS
SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)
PERSISTED
GO
ALTER TABLE EmployeeTable
ADD Palindrome AS
REVERSE(SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)) PERSISTED
GO
Then your query will looks like:
SELECT * from EmaployeeTable
where Palindrome = SubString
BUT!
This is not a good idea. Please tell us, why you dont want to use string functios.
You could do it building a list of palindrome words using a recursive query that generates palindrome words till a length o n characters and then selects employees with the name matching a palindrome word. This may be a really inefficient way, but it does the trick
This is a sample query for Oracle, PostgreSQL should support this feature as well with little differences on syntax. I don't know about other RDBMS.
with EmployeeTable AS (
SELECT 'ADA' AS employeename
FROM DUAL
UNION ALL
SELECT 'IDA' AS employeename
FROM DUAL
UNION ALL
SELECT 'JACK' AS employeename
FROM DUAL
), letters as (
select chr(ascii('A') + rownum - 1) as letter
from dual
connect by ascii('A') + rownum - 1 <= ascii('Z')
), palindromes(word, len ) as (
SELECT WORD, LEN
FROM (
select CAST(NULL AS VARCHAR2(100)) as word, 0 as len
from DUAL
union all
select letter as word, 1 as len
from letters
)
union all
select l.letter||p.word||l.letter AS WORD, len + 1 AS LEN
from palindromes p
cross join letters l
where len <= 4
)
SEARCH BREADTH FIRST BY word SET order1
CYCLE word SET is_cycle TO 'Y' DEFAULT 'N'
select *
from EmployeeTable
WHERE employeename IN (
SELECT WORD
FROM palindromes
)
DECLARE #cPalindrome VARCHAR(100) = 'SUBI NO ONIBUS'
SET #cPalindrome = REPLACE(#cPalindrome, ' ', '')
;WITH tPalindromo (iNo) AS (
SELECT 1
WHERE SUBSTRING(#cPalindrome, 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome), 1)
UNION ALL
SELECT iNo + 1
FROM tPalindromo
WHERE SUBSTRING(#cPalindrome, iNo + 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome) - iNo, 1)
AND LEN(#cPalindrome) > iNo
)
SELECT IIF(MAX(iNo) = LEN(#cPalindrome), 'PALINDROME', 'NOT PALINDROME')
FROM tPalindromo

SQL order by included character and string

I have a table and i want to colum joint_no column. The column's values are like these
FW-1
FW-2
.
.
.
FW-13
FW-R1
FW-1A
When i ordered them i get this results
FW-1
FW-10
FW-11
FW-12
FW-13
FW-1A
.
.
FW-R1
I want to get this result after sql query
FW-1
FW-1A
FW-2
FW-3
..
FW-13
FW-R1
can anybody help me?
If you can do it, I'd advise you to renumber the values so that the 'logical' order sticks to the alphabetical order. F-1 will then be updated to F-01, or F-001.
If you cannot do it, add a field that will be populated with the 'ordered' form of your code. You 'll then be able to order by the F-001 column and still display the F-1 value
Otherwise ordering your records will rapidly become your nightmare.
Using Patindex to find the first numeric expression as first sort field, then extracting the numeric part as integer as second sortfield and using the whole string as third sort field you might get the desired result.
Declare #a Table (c varchar(50))
Insert Into #a
Select 'FW-1'
Union Select 'FW-10'
Union Select 'FW-11'
Union Select 'FW-12'
Union Select 'FW-13'
Union Select 'FW-1A'
Union Select 'FW-2'
Union Select 'FW-3'
Union Select 'FW-R1'
Union Select 'FW-A1'
;With CTE as
(Select 1 as ID
Union All
Select ID + 1 from CTE where ID < 100
)
Select * from
(
Select c
,PATINDEX('%[0-9]%',c) as s1
,(Select Cast(
(Select Case
When SUBSTRING(c, ID, 1) LIKE '[0-9]'
Then SUBSTRING(c, ID, 1)
Else ''
End
From (Select * from CTE) AS X(ID)
Where ID <= LEN(c)
For XML PATH(''))
as int)
)
as s2
from
#a
) x
order by
s1,s2,c
With the output:
FW-1 4 1 -1
FW-1A 4 1 -1A
FW-2 4 2 -2
FW-3 4 3 -3
FW-10 4 10 -10
FW-11 4 11 -11
FW-12 4 12 -12
FW-13 4 13 -13
FW-A1 5 1 A1
FW-R1 5 1 R1
If the leading part is not fixed (FW-) you might need to add one additional sort field
Declare #a Table (c varchar(50))
Insert Into #a
Select 'FW-1'
Union Select 'FW-10'
Union Select 'FW-11'
Union Select 'FW-12'
Union Select 'FW-13'
Union Select 'FW-1A'
Union Select 'FW-2'
Union Select 'FW-3'
Union Select 'FW-R1'
Union Select 'FW-A1'
Union Select 'AB-A1'
Union Select 'AB-11'
;With CTE as
(Select 1 as ID
Union All
Select ID + 1 from CTE where ID < 100
)
Select * from
(
Select c
,SubString(c,1,PATINDEX('%[0-9]%',c)-1) as S0
,PATINDEX('%[0-9]%',c) as s1
,(Select Cast(
(Select Case
When SUBSTRING(c, ID, 1) LIKE '[0-9]'
Then SUBSTRING(c, ID, 1)
Else ''
End
From (Select * from CTE) AS X(ID)
Where ID <= LEN(c)
For XML PATH(''))
as int)
)
as s2
from
#a
) x
order by
s0,s1,s2,c

SQL: how to get all the distinct characters in a column, across all rows

Is there an elegant way in SQL Server to find all the distinct characters in a single varchar(50) column, across all rows?
Bonus points if it can be done without cursors :)
For example, say my data contains 3 rows:
productname
-----------
product1
widget2
nicknack3
The distinct inventory of characters would be "productwigenka123"
Here's a query that returns each character as a separate row, along with the number of occurrences. Assuming your table is called 'Products'
WITH ProductChars(aChar, remain) AS (
SELECT LEFT(productName,1), RIGHT(productName, LEN(productName)-1)
FROM Products WHERE LEN(productName)>0
UNION ALL
SELECT LEFT(remain,1), RIGHT(remain, LEN(remain)-1) FROM ProductChars
WHERE LEN(remain)>0
)
SELECT aChar, COUNT(*) FROM ProductChars
GROUP BY aChar
To combine them all to a single row, (as stated in the question), change the final SELECT to
SELECT aChar AS [text()] FROM
(SELECT DISTINCT aChar FROM ProductChars) base
FOR XML PATH('')
The above uses a nice hack I found here, which emulates the GROUP_CONCAT from MySQL.
The first level of recursion is unrolled so that the query doesn't return empty strings in the output.
Use this (shall work on any CTE-capable RDBMS):
select x.v into prod from (values('product1'),('widget2'),('nicknack3')) as x(v);
Test Query:
with a as
(
select v, '' as x, 0 as n from prod
union all
select v, substring(v,n+1,1) as x, n+1 as n from a where n < len(v)
)
select v, x, n from a -- where n > 0
order by v, n
option (maxrecursion 0)
Final Query:
with a as
(
select v, '' as x, 0 as n from prod
union all
select v, substring(v,n+1,1) as x, n+1 as n from a where n < len(v)
)
select distinct x from a where n > 0
order by x
option (maxrecursion 0)
Oracle version:
with a(v,x,n) as
(
select v, '' as x, 0 as n from prod
union all
select v, substr(v,n+1,1) as x, n+1 as n from a where n < length(v)
)
select distinct x from a where n > 0
Given that your column is varchar, it means it can only store characters from codes 0 to 255, on whatever code page you have. If you only use the 32-128 ASCII code range, then you can simply see if you have any of the characters 32-128, one by one. The following query does that, looking in sys.objects.name:
with cteDigits as (
select 0 as Number
union all select 1 as Number
union all select 2 as Number
union all select 3 as Number
union all select 4 as Number
union all select 5 as Number
union all select 6 as Number
union all select 7 as Number
union all select 8 as Number
union all select 9 as Number)
, cteNumbers as (
select U.Number + T.Number*10 + H.Number*100 as Number
from cteDigits U
cross join cteDigits T
cross join cteDigits H)
, cteChars as (
select CHAR(Number) as Char
from cteNumbers
where Number between 32 and 128)
select cteChars.Char as [*]
from cteChars
cross apply (
select top(1) *
from sys.objects
where CHARINDEX(cteChars.Char, name, 0) > 0) as o
for xml path('');
If you have a Numbers or Tally table which contains a sequential list of integers you can do something like:
Select Distinct '' + Substring(Products.ProductName, N.Value, 1)
From dbo.Numbers As N
Cross Join dbo.Products
Where N.Value <= Len(Products.ProductName)
For Xml Path('')
If you are using SQL Server 2005 and beyond, you can generate your Numbers table on the fly using a CTE:
With Numbers As
(
Select Row_Number() Over ( Order By c1.object_id ) As Value
From sys.columns As c1
Cross Join sys.columns As c2
)
Select Distinct '' + Substring(Products.ProductName, N.Value, 1)
From Numbers As N
Cross Join dbo.Products
Where N.Value <= Len(Products.ProductName)
For Xml Path('')
Building on mdma's answer, this version gives you a single string, but decodes some of the changes that FOR XML will make, like & -> &.
WITH ProductChars(aChar, remain) AS (
SELECT LEFT(productName,1), RIGHT(productName, LEN(productName)-1)
FROM Products WHERE LEN(productName)>0
UNION ALL
SELECT LEFT(remain,1), RIGHT(remain, LEN(remain)-1) FROM ProductChars
WHERE LEN(remain)>0
)
SELECT STUFF((
SELECT N'' + aChar AS [text()]
FROM (SELECT DISTINCT aChar FROM Chars) base
ORDER BY aChar
FOR XML PATH, TYPE).value(N'.[1]', N'nvarchar(max)'),1, 1, N'')
-- Allow for a lot of recursion. Set to 0 for infinite recursion
OPTION (MAXRECURSION 365)