Let's say I have a table of persons with ids and names as follows
[Person]
ID NAME
================
1 Michael
2 Michelle
3 Emma
4 Evan
5 Ellen
6 Gary
I want to count the number of persons based on the first characters of their names.
Here's the output I expect
NUMBER_OF_PERSONS
=================
2 //M = Michael and Michelle
3 //E = Emma, Evan and Ellen
1 //G = Gary
How do I achieve this in Oracle?
And here's my query
select count(id) as number_of_person
from person
where substr(name) in (select distinct substr(name,1,1) from person);
You can acheive that purpose using below solution.
with Person (ID, NAME ) as (
select 1, 'Michael' from dual union all
select 2, 'Michelle' from dual union all
select 3, 'Emma' from dual union all
select 4, 'Evan' from dual union all
select 5, 'Ellen' from dual union all
select 6, 'Gary' from dual
)
select count(*) || ' //' || substr(NAME, 1, 1) || ' = ' ||
case
when regexp_count( listagg(NAME, ' and ') within group ( order by ID ), ' and ') > 1
then regexp_replace( listagg(NAME, ', ') within group ( order by ID ), ', ([^,]+)$', ' and \1 ', 1, 1 )
else listagg(NAME, ' and ') within group ( order by ID )
end NUMBER_OF_PERSONS
from Person
group by substr(NAME, 1, 1)
order by substr(NAME, 1, 1)
;
db<>fiddle
If you just want the count, you would use group by:
select substr(name, 1, 1) as first_letter,
count(*) as number_of_person
from person
group by substr(name, 1, 1) ;
If, in addition, you actually wanted the list of names, you could put that in another column, assuming there are not too many:
select substr(name, 1, 1) as first_letter,
count(*) as number_of_person,
listagg(name, ', ') within group (order by name) as names
from person
group by substr(name, 1, 1) ;
This is my solution to it:
WITH tbl AS (
SELECT 1 AS ID, 'Michael' AS NAME FROM dual UNION
SELECT 2, 'Michelle' FROM dual UNION
SELECT 3, 'Emma' FROM dual UNION
SELECT 4, 'Evan' FROM dual UNION
SELECT 5, 'Ellen' FROM dual UNION
SELECT 6, 'Gary' FROM dual
)
SELECT COUNT(1)
, SUBSTR(names.name,1,1)
, REGEXP_REPLACE((listagg(names.name,', ') WITHIN GROUP (ORDER BY names.name)), ',([^,]*)$', ' and \1')
FROM tbl names
GROUP BY SUBSTR(names.name,1,1);
Related
I have written a query to get values in comma separated format from both the table
Table 1 :
SELECT
regex_replace(xmlcast(Xmlagg(XMLELEMENT(empid, empid, ',')) AS clob), '\s*,\s*$', '') AS str1
FROM
(SELECT empid
FROM employee);
Table 2:
SELECT
regex_replace(xmlcast(Xmlagg(XMLELEMENT(depid, depid, ',')) AS clob), '\s*,\s*$', '') AS str2
FROM
(SELECT depid
FROM department);
Output of both queries:
str1 = 1,4,5,6,8
str2 = 1,5,6
How do I compare both the str1 and str2 and get ids which are in str1 but not in str2
Expected output: 4,8
You do not need to compare the delimited strings, you can simply use NOT IN (or NOT EXISTS) and compare the table values:
SELECT regexp_replace( xmlcast(Xmlagg(XMLELEMENT(empid,empid,',')) as clob),'\s*,\s*$','') AS str1
FROM employee
WHERE empid NOT IN (
SELECT depid
FROM department
);
However, you should consider whether it makes sense to compare the IDs for employees to the IDs for departments as that does not appear to make logical sense.
For the sample data:
CREATE TABLE employee (empid) AS
SELECT 1 FROM DUAL UNION ALL
SELECT 4 FROM DUAL UNION ALL
SELECT 5 FROM DUAL UNION ALL
SELECT 6 FROM DUAL UNION ALL
SELECT 8 FROM DUAL;
CREATE TABLE department (depid) AS
SELECT 1 FROM DUAL UNION ALL
SELECT 5 FROM DUAL UNION ALL
SELECT 6 FROM DUAL;
The query outputs:
STR1
4,8
db<>fiddle here
Another approach considering that both str1 and str2 are coming from different tables. Although I consider the comments more than right that this kind of comparison should not be done this way.
with x as
(
select regexp_substr(x.str1,'[^,]+',1,level) as str1_spit
from ( select '1,4,5,6,8' as str1 from dual ) x
CONNECT BY LEVEL <=REGEXP_COUNT(x.str1 ,'[,]') + 1
),
y as
( select regexp_substr(y.str2,'[^,]+',1,level) as str2_spit
from ( select '1,5,6' as str2 from dual ) y
CONNECT BY LEVEL <=REGEXP_COUNT(y.str2 ,'[,]') + 1
)
select LISTAGG(str1_spit, ',') WITHIN GROUP (order by str1_spit) as final_value
from
(
select x.str1_spit , y.str2_spit
from x left join y on x.str1_spit = y.str2_spit
where y.str2_spit is null
order by x.str1_spit
)
Demo
SQL> with x as
2 (
select regexp_substr(x.str1,'[^,]+',1,level) as str1_spit
from ( select '1,4,5,6,8' as str1 from dual ) x
CONNECT BY LEVEL <=REGEXP_COUNT(x.str1 ,'[,]') + 1
),
y as
( select regexp_substr(y.str2,'[^,]+',1,level) as str2_spit
from ( select '1,5,6' as str2 from dual ) y
CONNECT BY LEVEL <=REGEXP_COUNT(y.str2 ,'[,]') + 1
)
select LISTAGG(str1_spit, ',') WITHIN GROUP (order by str1_spit) as final_value
from
(
select x.str1_spit , y.str2_spit
from x left join y on x.str1_spit = y.str2_spit
where y.str2_spit is null
order by x.str1_spit
) 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ;
FINAL_VALUE
--------------------------------------------------------------------------------
4,8
SQL>
If you must do it by comparing strings (don't, use NOT IN or NOT EXISTS and compare the tables) then you can do it by only splitting one of the two strings and using simple string functions (rather than regular expressions, which are an order-of-magnitude slower):
WITH data (str1, str2) AS (
SELECT TO_CLOB('1,4,5,6,8'),
TO_CLOB('1,5,6')
FROM DUAL
),
bounds (str1, str2, spos, epos) AS (
SELECT str1,
str2,
1,
INSTR(str1, ',', 1)
FROM data
UNION ALL
SELECT str1,
str2,
epos + 1,
INSTR(str1, ',', epos + 1)
FROM bounds
WHERE epos > 0
),
items (item, str2) AS (
SELECT CASE epos
WHEN 0
THEN SUBSTR(str1, spos)
ELSE SUBSTR(str1, spos, epos - spos)
END,
',' || str2 || ','
FROM bounds
ORDER BY spos
)
SELECT regexp_replace(
xmlcast(Xmlagg(XMLELEMENT(item,item,',')) as clob),
'\s*,\s*$'
) AS str3
FROM items
WHERE str2 NOT LIKE '%,' || item || ',%';
Which outputs:
STR3
4,8
db<>fiddle here
I have a table as the following
name
-----------
1#apple#1
2#apple#2
3#apple#4
4#box#4
5#box#5
and I want to get the result as:
name
--------------
apple 3
box 2
Thanks in advance for your help
This is what you need.
select
SUBSTRING(
name,
CHARINDEX('#', name) + 1,
LEN(name) - (
CHARINDEX('#', REVERSE(name)) + CHARINDEX('#', name)
)
),
count(1)
from
tbl
group by
SUBSTRING(
name,
CHARINDEX('#', name) + 1,
LEN(name) - (
CHARINDEX('#', REVERSE(name)) + CHARINDEX('#', name)
)
)
If your data does not contain any full stops (or periods depending on your vernacular), and the length of your string is less than 128 characters, then you can use PARSENAME to effectively split your string into parts, and extract the 2nd part:
DECLARE #T TABLE (Val VARCHAR(20));
INSERT #T (Val)
VALUES ('1#apple#1'), ('2#apple#2'), ('3#apple#4'),
('4#box#4'), ('5#box#5');
SELECT Val = PARSENAME(REPLACE(t.Val, '#', '.'), 2),
[Count] = COUNT(*)
FROM #T AS t
GROUP BY PARSENAME(REPLACE(t.Val, '#', '.'), 2);
Otherwise you will need to use CHARINDEX to find the first and last occurrence of # within your string (REVERSE is also needed to get the last position), then use SUBSTRING to extract the text between these positions:
DECLARE #T TABLE (Val VARCHAR(20));
INSERT #T (Val)
VALUES ('1#apple#1'), ('2#apple#2'), ('3#apple#4'),
('4#box#4'), ('5#box#5');
SELECT Val = SUBSTRING(t.Val, x.FirstPosition + 1, x.LastPosition - x.FirstPosition),
[Count] = COUNT(*)
FROM #T AS t
CROSS APPLY
( SELECT CHARINDEX('#', t.Val) ,
LEN(t.Val) - CHARINDEX('#', REVERSE(t.Val))
) AS x (FirstPosition, LastPosition)
GROUP BY SUBSTRING(t.Val, x.FirstPosition + 1, x.LastPosition - x.FirstPosition);
use case when
select case when name like '%apple%' then 'apple'
when name like '%box%' then 'box' end item_name,
count(*)
group by cas when name like '%apple%' then 'apple'
when name like '%box%' then 'box' end
No DBMS specified, so here is a postgres variant. The query does use regexps to simplify things a bit.
with t0 as (
select '1#apple#1' as value
union all select '2#apple#2'
union all select '3#apple#4'
union all select '4#box#4'
union all select '5#box#5'
),
trimmed as (
select regexp_replace(value,'[0-9]*#(.+?)#[0-9]*','\1') as name
from t0
)
select name, count(*)
from trimmed
group by name
order by name
DB Fiddle
Update
For Oracle DMBS, the query stays basically the same:
with t0 as (
select '1#apple#1' as value from dual
union all select '2#apple#2' from dual
union all select '3#apple#4' from dual
union all select '4#box#4' from dual
union all select '5#box#5' from dual
),
trimmed as (
select regexp_replace(value,'[0-9]*#(.+?)#[0-9]*','\1') as name
from t0
)
select name, count(*)
from trimmed
group by name
order by name
NAME | COUNT(*)
:---- | -------:
apple | 3
box | 2
db<>fiddle here
Update
MySQL 8.0
with t0 as (
select '1#apple#1' as value
union all select '2#apple#2'
union all select '3#apple#4'
union all select '4#box#4'
union all select '5#box#5'
),
trimmed as (
select regexp_replace(value,'[0-9]*#(.+?)#[0-9]*','$1') as name
from t0
)
select name, count(*)
from trimmed
group by name
order by name
name | count(*)
:---- | -------:
apple | 3
box | 2
db<>fiddle here
You can use case and group by to do the same.
select new_col , count(new_col)
from
(
select case when col_name like '%apple%' then 'apple'
when col_name like '%box%' then 'box'
else 'others' end new_col
from table_name
)
group by new_col
;
I'm looking at text sequences in BigQuery and trying to identify word completions over a number of rows (sharing an ID). The data looks like:
ID, Text
1, t
1, th
1, the
1, the
1, the c
1, the ca
1, the cat
1, the cat
1, the cat s
...
1, the cat sat on the mat
2, r
...
For each given ID and sequence i'm trying to find the next word boundary. So the ideal output would be:
ID, Text, Boundary
1, t, the
1, th, the
1, the c, the cat
1, the ca, the cat
1, the cat s, the cat sat
In the above the next subsequent row that both shares an ID and ends in a space gives the next (there can be multiple) word completion boundary.
Below is for BigQuery Standard SQL
Note: it is brute force approach so query is not that elegant as potentially can be - but hope this will give you good start
#standardSQL
SELECT id, item, boundary
FROM (
SELECT id, grp,
STRING_AGG(IF(boundary, text, ''), '') boundary,
ARRAY_AGG(IF(NOT boundary, text, NULL) IGNORE NULLS ORDER BY LENGTH(text)) items
FROM (
SELECT id, text,
LENGTH(text) - LENGTH(REPLACE(text, ' ', '')) - IF(SUBSTR(text, -1) = ' ', 1, 0) grp,
SUBSTR(text, -1) = ' ' boundary
FROM `project.dataset.table`
)
GROUP BY id, grp
), UNNEST(items) item WITH OFFSET pos
WHERE RTRIM(item) != RTRIM(boundary)
if to apply to dummy data in your question as below
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 't' text UNION ALL
SELECT 1, 'th' UNION ALL
SELECT 1, 'the' UNION ALL
SELECT 1, 'the ' UNION ALL
SELECT 1, 'the c' UNION ALL
SELECT 1, 'the ca' UNION ALL
SELECT 1, 'the cat' UNION ALL
SELECT 1, 'the cat ' UNION ALL
SELECT 1, 'the cat s' UNION ALL
SELECT 1, 'the cat sat '
)
SELECT id, item, boundary
FROM (
SELECT id, grp,
STRING_AGG(IF(boundary, text, ''), '') boundary,
ARRAY_AGG(IF(NOT boundary, text, NULL) IGNORE NULLS ORDER BY LENGTH(text)) items
FROM (
SELECT id, text,
LENGTH(text) - LENGTH(REPLACE(text, ' ', '')) - IF(SUBSTR(text, -1) = ' ', 1, 0) grp,
SUBSTR(text, -1) = ' ' boundary
FROM `project.dataset.table`
)
GROUP BY id, grp
), UNNEST(items) item WITH OFFSET pos
WHERE RTRIM(item) != RTRIM(boundary)
ORDER BY id, grp, pos
result is
Row id item boundary
1 1 t the
2 1 th the
3 1 the c the cat
4 1 the ca the cat
5 1 the cat s the cat sat
BigQuery UDF's come in handy in these situations. Here is a working solution:
#standardSQL
/*boundary function*/
create temp function boundaryf (text string, sentence string) as (
array_to_string(array(
select q.w from unnest(
array(select struct(w as w, row_number() over () as i) from unnest(split(sentence, ' ')) w
)
) q
-- respect the ending space
where q.i <= array_length(split(text, ' ')) - (length(text) - length(rtrim(text)))
), ' ')
);
WITH items AS (
#--your data. assuming this is already ordered
SELECT 1 as id, 't' as text UNION ALL
SELECT 1, 'th' UNION ALL
SELECT 1, 'the' UNION ALL
SELECT 1, 'the ' UNION ALL
SELECT 1, 'the c' UNION ALL
SELECT 1, 'the ca' UNION ALL
SELECT 1, 'the cat' UNION ALL
SELECT 1, 'the cat ' UNION ALL
SELECT 1, 'the cat s' UNION ALL
SELECT 1, 'the cat sa' union all
SELECT 1, 'the cat sat' union all
SELECT 1, 'the cat sat ' union all
SELECT 1, 'the cat sat o' union all
SELECT 1, 'the cat sat on' union all
SELECT 1, 'the cat sat on ' union all
SELECT 1, 'the cat sat on a' union all
SELECT 1, 'the cat sat on a ' union all
SELECT 1, 'the cat sat on a m' union all
SELECT 1, 'the cat sat on a ma' union all
SELECT 1, 'the cat sat on a mat' union all
select 2, 'i' union all
select 2, 'i a' union all
select 2, 'i am' union all
select 2, 'i am f' union all
select 2, 'i am fr' union all
select 2, 'i am fre' union all
select 2, 'i am free'
),
sentences as (
select id, sentences[offset (array_length(sentences)-1)] as sentence from (
select id, array_agg(text) as sentences
from items group by 1
)
),
control as (
select i.id, i.text, boundaryf(i.text, s.sentence) as boundary
from items i
left join sentences s on s.id = i.id
)
select * from control
I need to convert the following
Column 1 Column 2
ABC, Company ABC Company
TA. Comp TA Comp
How can I get Column2 in sql where I am removing all ',' '.' to space.
How about:
with testdata as (
select 'ABC, Company Inc.' as col1 from dual
union all
select 'TA. Comp' as col1 from dual
)
select trim(regexp_replace(regexp_replace(col1, '[[:punct:]]',' '), ' {2,}', ' ')) as col2
from testdata;
Output:
ABC Company Inc
TA Comp
Assuming punctuation is what you're trying to blank out.
You can try to use:
SELECT REGEXP_REPLACE(column1, '[^a-zA-Z0-9 ]+', '')
FROM DUAL
with t (val) as
(
select 'ABC,. Cmpany' from dual union all
select 'A, VC' from dual union all
select 'A,, BC...com' from dual
)
select
val,
replace(replace(val, ',', ''), '.', '') x , -- one way
regexp_replace(val, '[,.]', '') y -- another way
from t
;
VAL X Y
--------------- ---------- ----------
ABC,. Cmpany ABC Cmpany ABC Cmpany
A, VC A VC A VC
A,, BC...com A BCcom A BCcom
In Oracle, I have columns called orderids
orderids
111,222,333
444,55,66
77,77
How can get the output as
Orderid
111
222
333
444
55
66
77
77
Try this:
WITH TT AS
(SELECT orderid COL1 FROM orders)
SELECT substr(str,
instr(str, ',', 1, LEVEL) + 1,
instr(str, ',', 1, LEVEL + 1) -
instr(str, ',', 1, LEVEL) - 1) COL1
FROM (SELECT rownum AS r,
','|| COL1||',' AS STR
FROM TT )
CONNECT BY PRIOR r = r
AND instr(str, ',', 1, LEVEL + 1) > 0
AND PRIOR dbms_random.STRING('p', 10) IS NOT NULL
;
See this SQLFiddle
This is one appraoch:
with order_table as (
select '111,222,333' as orderids from dual
union all select '444,55,66' from dual
union all select '77,77' from dual
)
select substr(orderids, instr(orderids, ',', 1, lvl) + 1, instr(orderids, ',', 1, lvl + 1) - instr(orderids, ',', 1, lvl) - 1) orderid
from
( select ',' || orderids || ',' as orderids from order_table ),
( select level as lvl from dual connect by level <= 100 )
where lvl <= length(orderids) - length(replace(orderids, ',')) - 1;
Just remove the WITH clause and replace the order_table with your real table.
This too might help you,
with t(orderid) as
(
SELECT '111,222,333' FROM dual
UNION
SELECT '444,55,66' FROM dual
UNION
SELECT '177,77' FROM dual
)
SELECT trim(x.COLUMN_VALUE.EXTRACT('e/text()')) cols
FROM t t, TABLE (xmlsequence(XMLTYPE('<e><e>' || REPLACE(t.orderid,',','</e><e>')|| '</e></e>').EXTRACT('e/e'))) x;
instr(','||NVL('972414AQ,972414AQ',I.CUSIP)||',', ','||I.CUSIP||',') > 0
This is the actual query I was looking for.