String Aggregation in ORACLE 10g with three columns - sql

This is a sample table data
Date | Fruit | Number
-----------------------
1 | Apple | 1
1 | Apple | 2
1 | Apple | 3
1 | Kiwi | 6
1 | Kiwi | 10
2 | Apple | 4
2 | Apple | 5
2 | Apple | 6
2 | Kiwi | 4
2 | Kiwi | 7
I try to concatenate the table column values to get the following:
Date | Fruit | Number
-----------------------
1 | Apple | 1-2-3
1 | Kiwi | 6-10
2 | Apple | 4-5-6
2 | Kiwi | 4-7
Code that I use:
SELECT fruit,
LTRIM( MAX(SYS_CONNECT_BY_PATH(number,','))
KEEP (DENSE_RANK LAST ORDER BY curr), ',') AS fruits_agg
FROM( SELECT Date,
fruit,
number,
ROW_NUMBER() OVER (PARTITION BY fruit ORDER BY number) AS curr,
ROW_NUMBER() OVER (PARTITION BY fruit ORDER BY number) - 1 AS prev
FROM table_name)
GROUP BY Date,fruit
CONNECT BY prev = PRIOR curr AND fruit = PRIOR fruit AND Date = PRIOR Date
START WITH curr = 1;
It doesn't work the way I want it to be. Where Did I do wrong?
PS: I'm on version 10g, so I can't use listagg.

For Oracle 10, using your approach - the issue is the partitioning in your inner query.
WITH tab as (
SELECT 1 as fdate, 'Apple' as fruit, 1 as num from dual union
SELECT 1 as fdate, 'Apple' as fruit, 2 as num from dual union
SELECT 1 as fdate, 'Apple' as fruit, 3 as num from dual union
SELECT 1 as fdate, 'Kiwi' as fruit, 6 as num from dual union
SELECT 1 as fdate, 'Kiwi' as fruit, 10 as num from dual union
SELECT 2 as fdate, 'Apple' as fruit, 4 as num from dual union
SELECT 2 as fdate, 'Apple' as fruit, 5 as num from dual union
SELECT 2 as fdate, 'Apple' as fruit, 6 as num from dual union
SELECT 2 as fdate, 'Kiwi' as fruit, 4 as num from dual union
SELECT 2 as fdate, 'Kiwi' as fruit, 7 as num from dual )
SELECT fdate, fruit,LTRIM(MAX(SYS_CONNECT_BY_PATH(num,','))
KEEP (DENSE_RANK LAST ORDER BY curr),',') AS fruits_agg
FROM (SELECT fdate,
fruit,
num,
ROW_NUMBER() OVER (PARTITION BY fdate, fruit ORDER BY num) AS curr,
ROW_NUMBER() OVER (PARTITION BY fdate, fruit ORDER BY num) -1 AS prev
FROM tab)
GROUP BY fdate,fruit
CONNECT BY prev = PRIOR curr AND fruit = PRIOR fruit AND fdate = PRIOR fdate
START WITH curr = 1;
Gives:
FDATE FRUIT FRUITS_AGG
1 "Kiwi" "6,10"
1 "Apple" "1,2,3"
2 "Kiwi" "4,7"
2 "Apple" "4,5,6"
The Oracle 11 solution is a whole lot easier:
WITH tab as (
SELECT 1 as fdate, 'Apple' as fruit, 1 as num from dual union
SELECT 1 as fdate, 'Apple' as fruit, 2 as num from dual union
SELECT 1 as fdate, 'Apple' as fruit, 3 as num from dual union
SELECT 1 as fdate, 'Kiwi' as fruit, 6 as num from dual union
SELECT 1 as fdate, 'Kiwi' as fruit, 10 as num from dual union
SELECT 2 as fdate, 'Apple' as fruit, 4 as num from dual union
SELECT 2 as fdate, 'Apple' as fruit, 5 as num from dual union
SELECT 2 as fdate, 'Apple' as fruit, 6 as num from dual union
SELECT 2 as fdate, 'Kiwi' as fruit, 4 as num from dual union
SELECT 2 as fdate, 'Kiwi' as fruit, 7 as num from dual )
select fdate
, fruit
, listagg(num,'-') within group ( order by num ) fruit_agg
from tab
group by fdate, fruit
Returns:
FDATE FRUIT FRUIT_AGG
1 Kiwi 6-10
1 Apple 1-2-3
2 Kiwi 4-7
2 Apple 4-5-6

Related

How to group-by in Oracle

I have a table like [Original] in below.
I want to sum by group-by field like [result].
Does anyone have an idea to make this query?
Thank you in advance for your help.
WITH t1 as (
SELECT 1 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 2 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 3 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 4 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 5 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 6 AS ID, 'B' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 7 AS ID, 'A' AS FIELD, 3 AS VAL FROM dual
UNION SELECT 8 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 9 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
)
SELECT *
FROM t1
[Original Data]
ID FIELD VAL
1 A 1
2 A 2
3 A 1
4 B 2
5 B 2
6 B 1
7 A 3
8 A 2
9 A 1
[Result]
ID FIELD VAL
1 A 4
4 B 5
7 A 6
This is island and gap issue and you can use analytical function as follows:
SQL> WITH t1 as (
2 SELECT 1 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
3 UNION SELECT 2 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
4 UNION SELECT 3 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
5 UNION SELECT 4 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
6 UNION SELECT 5 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
7 UNION SELECT 6 AS ID, 'B' AS FIELD, 1 AS VAL FROM dual
8 UNION SELECT 7 AS ID, 'A' AS FIELD, 3 AS VAL FROM dual
9 UNION SELECT 8 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
10 UNION SELECT 9 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
11 )
12 SELECT MIN(ID) AS ID, FIELD, SUM(VAL)
13 FROM (SELECT T1.*,
14 SUM(CASE WHEN LAG_FIELD = FIELD THEN 0 ELSE 1 END)
15 OVER (ORDER BY ID) AS SM
16 FROM (SELECT T1.*,
17 LAG(FIELD) OVER (ORDER BY ID) AS LAG_FIELD
18 FROM t1
19 ) T1
20 )
21 GROUP BY FIELD, SM
22 ORDER BY 1;
ID F SUM(VAL)
---------- - ----------
1 A 4
4 B 5
7 A 6
SQL>
This is indeed a gaps-and-islands problem. I think the simplest approach here is to use the difference between row numbers to identify groups of adjacent rows:
select min(id) as id, field, sum(val) as val
from (
select t1.*,
row_number() over(order by id) rn1,
row_number() over(partition by field order by id) rn2
from t1
) t
group by field, rn1 - rn2
order by min(id)
If id is always incrementing without gaps, this is even simpler:
select min(id) as id, field, sum(val) as val
from (
select t1.*,
row_number() over(partition by field order by id) rn
from t1
) t
group by field, id - rn
order by min(id)
From Oracle 12, you can do it quite simply using MATCH_RECOGNIZE:
WITH t1 as (
SELECT 1 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 2 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 3 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 4 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 5 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 6 AS ID, 'B' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 7 AS ID, 'A' AS FIELD, 3 AS VAL FROM dual
UNION SELECT 8 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 9 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
)
SELECT *
FROM t1
MATCH_RECOGNIZE (
ORDER BY id
MEASURES
FIRST( id ) AS id,
FIRST( field ) AS field,
SUM( val ) AS total
ONE ROW PER MATCH
PATTERN( same_field+ )
DEFINE same_field AS FIRST(field) = field
)
Which outputs:
ID | FIELD | TOTAL
-: | :---- | ----:
1 | A | 4
4 | B | 5
7 | A | 6
db<>fiddle here

Select rows when a value appears multiple times

I have a table like this one:
+------+------+
| ID | Cust |
+------+------+
| 1 | A |
| 1 | A |
| 1 | B |
| 1 | B |
| 2 | A |
| 2 | A |
| 2 | A |
| 2 | B |
| 3 | A |
| 3 | B |
| 3 | B |
+------+------+
I would like to get the IDs that have at least two times A and two times B. So in my example, the query should return only the ID 1,
Thanks!
In MySQL:
SELECT id
FROM test
GROUP BY id
HAVING GROUP_CONCAT(cust ORDER BY cust SEPARATOR '') LIKE '%aa%bb%'
In Oracle
WITH cte AS ( SELECT id, LISTAGG(cust, '') WITHIN GROUP (ORDER BY cust) custs
FROM test
GROUP BY id )
SELECT id
FROM cte
WHERE custs LIKE '%aa%bb%'
I would just use two levels of aggregation:
select id
from (select id, cust, count(*) as cnt
from t
where cust in ('A', 'B')
group by id, cust
) ic
group by id
having count(*) = 2 and -- both customers are in the result set
min(cnt) >= 2 -- and there are at least two instances
This is one option; lines #1 - 13 represent sample data. Query you might be interested in begins at line #14.
SQL> with test (id, cust) as
2 (select 1, 'a' from dual union all
3 select 1, 'a' from dual union all
4 select 1, 'b' from dual union all
5 select 1, 'b' from dual union all
6 select 2, 'a' from dual union all
7 select 2, 'a' from dual union all
8 select 2, 'a' from dual union all
9 select 2, 'b' from dual union all
10 select 3, 'a' from dual union all
11 select 3, 'b' from dual union all
12 select 3, 'b' from dual
13 )
14 select id
15 from (select
16 id,
17 sum(case when cust = 'a' then 1 else 0 end) suma,
18 sum(case when cust = 'b' then 1 else 0 end) sumb
19 from test
20 group by id
21 )
22 where suma = 2
23 and sumb = 2;
ID
----------
1
SQL>
You can use group by and having for the relevant Cust ('A' , 'B')
And query twice (I chose to use with to avoid multiple selects and to cache it)
with more_than_2 as
(
select Id, Cust, count(*) c
from tab
where Cust in ('A', 'B')
group by Id, Cust
having count(*) >= 2
)
select *
from tab
where exists ( select 1 from more_than_2 where more_than_2.Id = tab.Id and more_than_2.Cust = 'A')
and exists ( select 1 from more_than_2 where more_than_2.Id = tab.Id and more_than_2.Cust = 'B')
What you want is a perfect candidate for match_recognize. Here you go:
select id_ as id from t
match_recognize
(
order by id, cust
measures id as id_
pattern (A {2, } B {2, })
define A as cust = 'A',
B as cust = 'B'
)
Output:
Regards,
Ranagal

Count the number of times word appears in a single column

I'm attempting to count the number of times apples and oranges appear in my fruit column.
The table looks like this:
Fruit
-------
Apples
Apples Oranges
Apples Oranges
Apples
Oranges
Expected output:
Apples 4
Oranges 3
My code thus far. I'm not sure how to do it when both appear and how to add them to the totals. I'm sure there is an easier way that this.
SELECT
COUNT (CASE WHEN Fruit LIKE '%Apples%' THEN '1' END) AS Apples
COUNT (CASE WHEN Fruit LIKE '%Oranges%' THEN '1' END) AS Oranges
FROM Fruits
Cheers
If those fruits are single-worded and separated by a space, then such a generic approach might be interesting for you.
Lines #1 - 8 represent sample data; you already have that so you don't type it. Code you might need starts at line #10.
SQL> with fruit (fruit) as
2 -- sample data; you have that in a table
3 (select 'Apples' from dual union all
4 select 'Apples Oranges' from dual union all
5 select 'Apples Oranges' from dual union all
6 select 'Apples Lemon' from dual union all
7 select 'Oranges Plums' from dual
8 ),
9 -- split fruits to rows
10 temp as
11 (select regexp_substr(fruit, '[^ ]+', 1, column_value) fruit
12 from fruit cross join
13 table(cast(multiset(select level from dual
14 connect by level <= regexp_count(fruit, ' ') + 1
15 ) as sys.odcinumberlist))
16 )
17 select fruit, count(*)
18 from temp
19 group by fruit
20 order by fruit;
FRUIT COUNT(*)
-------------------------------------------------------- ----------
Apples 4
Lemon 1
Oranges 3
Plums 1
SQL>
Either with sum or count, it works
Connected to:
Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production
SQL> with t as
(
select 'Apples' as fruits from dual union all
select 'Apples Oranges' as fruits from dual union all
select 'Apples Oranges' as fruits from dual union all
select 'Apples' as fruits from dual union all
select 'Oranges' as fruits 6 from dual
) select
SUM (CASE WHEN fruits LIKE '%Apples%' THEN '1' END) AS Apples ,
SUM (CASE WHEN fruits LIKE '%Oranges%' THEN '1' END) AS Oranges
FROM t
;
APPLES ORANGES
---------- ----------
4 3
SQL> with t as
(
select 'Apples' as fruits from dual union all
select 'Apples Oranges' as fruits from dual union all
select 'Apples Oranges' as fruits from dual union all
select 'Apples' as fruits from dual union all
select 'Oranges' as fruits from dual
) select
COUNT (CASE WHEN fruits LIKE '%Apples%' THEN '1' END) AS Apples ,
COUNT (CASE WHEN fruits LIKE '%Oranges%' THEN '1' END) AS Oranges
FROM t ;
APPLES ORANGES
---------- ----------
4 3
SQL>

Oracle - count records if number of token less than 2

I have records like...
ID | KEY
-------|---------
1 | 123_456_abc
1 | 123_xyz
1 | 456_abc
2 | 123_abc
2 | 122_73_zcc
3 | 123_wer
4 | 345_23_fhd
4 | 3453_abc
5 | ad1fr2h3_abcasd
5 | ers2g45bb_abc2rtd
5 | asf23g_abc1_sf45
I want count(ID) where count(tokanize(numeric(KEY),'_')) < 2
As count(ID) will be 6
You can try something like this
SELECT COUNT(ID) FROM xyz WHERE key NOT LIKE '%_%_%';
This should filter all elements which have less than two underscores.
Try this :
select Count(1) from
(with abc(id,key) as (select '1','123_456_abc' from dual
Union all
select '1','123_xyz' from dual
UNion all
select '1','456_abc' from dual
Union all
select '2','123_abc' from dual
UNion all
select '2','123_73_zcc' from dual
Union all
select '3','123_wer' from dual
UNion all
select '1','345_23_fhd' from dual
UNion all
select '1','345_abc' from dual
)
select key, length(regexp_replace(key,'[^_]*','')) cntr
from abc )
where cntr = 1
eliminate all records which has more than 1 underscores
then eliminate the ones which do not start with a number
then sum it up
select sum(cnt) from (
select key, cnt, id from (
select key, length(regexp_replace(key,'[^_]*','')) cnt, id from table_name
) where cnt < 2
) where regexp_like(key,'[1-9]+(.)*')

How to group data according the condition and sequentially to number these groups?

I have next data:
with t as
(select 1 as id, '1324345' as amount, 7821 as code
from dual
union all
select 2 as id, 'current' as amount, 2210 as code
from dual
union all
select 3 as id, 'link' as amount, 2210 as code
from dual
union all
select 4 as id, '56236400' as amount, 6740 as code
from dual
union all
select 5 as id, '45562330' as amount, 5578 as code
from dual
union all
select 6 as id, '34875930' as amount, 5828 as code
from dual
union all
select 7 as id, 'current' as amount, 8520 as code
from dual
union all
select 8 as id, 'link' as amount, 8520 as code
from dual
union all
select 9 as id, '6731347060' as amount, 4740 as code
from dual
union all
select 10 as id, '346008600' as amount, 6575 as code
from dual)
select * from t
and I want to get the following:
with t as
(select 1 as id, '1324345' as amount, 7821 as code, 1 as group_id
from dual
union all
select 2 as id, 'current' as amount, 2210 as code, 2 as group_id
from dual
union all
select 3 as id, 'link' as amount, 2210 as code, 2 as group_id
from dual
union all
select 4 as id, '56236400' as amount, 6740 as code, 3 as group_id
from dual
union all
select 5 as id, '45562330' as amount, 5578 as code, 3 as group_id
from dual
union all
select 6 as id, '34875930' as amount, 5828 as code, 3 as group_id
from dual
union all
select 7 as id, 'current' as amount, 8520 as code, 4 as group_id
from dual
union all
select 8 as id, 'link' as amount, 8520 as code, 4 as group_id
from dual
union all
select 9 as id, '6731347060' as amount, 4740 as code, 5 as group_id
from dual
union all
select 10 as id, '346008600' as amount, 6575 as code, 5 as group_id
from dual)
select * from t
The condition is the value of "amount" field. It may be number or text.
UPD: Expected result:
id | amount | code | group_id
---------------------------------------------
1 | 1324345 | 7821 | 1
---------------------------------------------
2 | current | 2210 | 2
---------------------------------------------
3 | link | 2210 | 2
---------------------------------------------
4 | 56236400 | 6740 | 3
---------------------------------------------
5 | 45562330 | 5578 | 3
---------------------------------------------
6 | 34875930 | 5828 | 3
---------------------------------------------
7 | current | 8520 | 4
---------------------------------------------
8 | link | 8520 | 4
---------------------------------------------
9 | 6731347060 | 4740 | 5
---------------------------------------------
10 | 346008600 | 6575 | 5
---------------------------------------------
EDIT: best solution:
with tmain as
(select t.*,
decode(isnumeric(Amount),
lag(isnumeric(Amount)) over(order by id),
null,
1) lg
from t
order by id)
select id, amount, code, count(lg) over(order by id) group_id from tmain
Where isnumeric function is (based on #valexhome answer):
CREATE OR REPLACE FUNCTION ISNUMERIC (Str IN CHAR) RETURN NUMBER AS
TMP int;
BEGIN
if Str is null then
return(null);
end if;
TMP:=TO_NUMBER(Str);
RETURN (1);
EXCEPTION
WHEN OTHERS THEN
RETURN (0);
END;
Here is function ISNUMERIC to define before a query run:
CREATE OR REPLACE FUNCTION ISNUMERIC (Str IN CHAR) RETURN NUMBER AS
TMP int;
BEGIN
if Str is null then
return(null);
end if;
--if input null return NULL
TMP:=TO_NUMBER(Str);
RETURN (1);
EXCEPTION
WHEN OTHERS THEN
RETURN (0);
END;
And here is the query:
select id, amount, code,
(
select count(id)
from t Tab
where tab.id<=t.id
and
isnumeric(Amount)<>nvl(isnumeric((select Amount from t d1 where d1.id=(select max(d.id) from t d where (d.id<Tab.id)))),isnumeric(Amount)-1)
) Group_id
from t order by id
I found another solution:
with tmain as
(select t.*,
decode(isnumeric(Amount),
lag(isnumeric(Amount)) over(order by id),
null,
1) lg
from t
order by id)
select id, amount, code, count(lg) over(order by id) group_id from tmain
It works fine on large amounts of data.