Finding sequence in data and grouping by it

Finding sequence in data and grouping by it - sql

Data in Phone_number column of my Temp_table looks like this
1234560200
1234560201
1234560202
2264540300
2264540301
2264540302
2264540303
2264540304
2264540305
2264540306
I want it to find sequence of last 4 digits and and find First and Last number of sequence of it. For eg.
There is sequence of first 3 rows as 0200, 0201, 0202, so First = 0200 and Last = 0202
Final Output of this query should be
First Last
0200 0202
0300 0306
I tried below query, but not sure about this approach.
WITH get_nxt_range AS
(
select substr(a.PHONE_NUMBER,7,4) range1,
LEAD(substr(a.PHONE_NUMBER,7,4)) OVER (ORDER BY a.PHONE_NUMBER ) nxt_range
from Temp_table a
)
SELECT range1,nxt_range FROM get_nxt_range
WHERE nxt_range = range1 +1
ORDER BY range1

One method to get sequences is to use the difference of row numbers approach. This works in your case as well:
select substr(phone_number, 1, 6),
min(substr(phone_number, 7, 4)), max(substr(phone_number, 7, 4))
from (select t.*,
(row_number() over (order by phone_number) -
row_number() over (partition by substr(phone_number, 1, 6) order by phone_number)
) as grp
from temp_table t
) t
group by substr(phone_number, 1, 6), grp;

I think something like this might work:
select
min (substr (phone_number, -4, 4)) as first,
max (substr (phone_number, -4, 4)) as last
from temp_table
group by
substr (phone_number, -4, 2)

SELECT DISTINCT
COALESCE(
first_in_sequence,
LAG( first_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS first_in_sequence,
COALESCE(
last_in_sequence,
LAG( last_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS last_in_sequence
FROM (
SELECT phone_number,
CASE phone_number
WHEN LAG( phone_number ) OVER ( ORDER BY phone_number ) + 1
THEN NULL
ELSE phone_number
END AS first_in_sequence,
CASE phone_number
WHEN LEAD( phone_number ) OVER ( ORDER BY phone_number ) - 1
THEN NULL
ELSE phone_number
END AS last_in_sequence
FROM temp_table
);
Update:
CREATE TABLE phone_numbers ( phone_number ) AS
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual;
SELECT MIN( phone_number ) AS first_in_sequence,
MAX( phone_number ) AS last_in_sequence
FROM (
SELECT phone_number,
phone_number - ROW_NUMBER() OVER ( ORDER BY phone_number ) AS grp
FROM phone_numbers
)
GROUP BY grp;
Output:
FIRST_IN_SEQUENCE LAST_IN_SEQUENCE
----------------- ----------------
2264540300 2264540306
1234560200 1234560202

If 1234560201 1234560203 1234560204 are two instances then this should work:
with tt as (
select substr(PHONE_NUMBER,7,4) id from Temp_table
),
t as (
select
t1.id,
case when t3.id is null then 1 else 0 end start,
case when t2.id is null then 1 else 0 end "end"
from tt t1
-- no next adjacent element - we have an end of interval
left outer join tt t2 on t2.id - 1 = t1.id
-- not previous adjacent element - we have a start of interval
left outer join tt t3 on t3.id + 1 = t1.id
-- select starts and ends only
where t2.id is null or t3.id is null
)
-- find nearest end record for each start record (it may be the same record)
select t1.id, (select min(id) from t where id >= t1.id and "end" = 1)
from t t1
where t1.start = 1

I see guys already have answered for your question.
I just want to propose my variant how resolve this task:
with list_num (phone_number) as (
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual)
select root as from_value,
max(phone_number) keep (dense_rank last order by lvl) as to_value
from
(select phone_number, level as lvl, CONNECT_BY_ROOT phone_number as root
from
(select phone_number,
decode(phone_number-lag (phone_number) over(order by phone_number),1,1,0) as start_value
from list_num) b
connect by nocycle phone_number = prior phone_number + 1
start with start_value = 0)
group by root
having count(1) > 1
If you need only last 4 numbers just substr it.
substr(root,7,4) as from_value,
substr(max(phone_number) keep (dense_rank last order by lvl),7,4) as to_value
Thanks.

Related

How to compare different values within the same column

I having two tables emp and type.
create table EMP(ID number(10), effective_date date);
EID Effective_date
--------------------
1 02/14/2023
2 02/15/2023
3 04/30/2023
4 03/24/2023
create table type(ID number(10),contract_type varchar2(2));
TID contract_type
------------------
1 P
1 S
1 P
2 S
2 S
3 P
3 S
4 S
I am looking EID which is having contract type is 'S' in type table. (or emp table with effective date is greater than sysdate and in the type table with only contract_type ='S')
Actual result :
2
4
My query is not giving the correct results.
select emp.EID
from emp,type
where EID = TID
contract_type ='S'
effective_date >= sysdate
group by TID
having count(TID) >= 1;

If you want to keep your idea with COUNT and GROUP BY, you should count other contract types than the 'S' ones and check this is 0:
SELECT e.eid
FROM emp e
JOIN type t ON e.eid = t.tid
WHERE
e.effective_date >= sysdate
GROUP BY e.eid
HAVING COUNT(CASE WHEN t.contract_type <> 'S' THEN 1 END) = 0;
This query will return 2 and 4 for your sample data.
Try out: db<>fiddle
Another option is as already said here using NOT EXISTS.
Take care of following difference to the NOT EXISTS approach: The query in Tim's answer will also fetch id's of table "emp" that don't appear at all in table "type". My query here will not fetch such id's.
It's up to you to decide whether this is possible at all and what to do in this case.
Changing JOIN to LEFT JOIN in above query will eliminate this difference.

I would use exists logic here:
SELECT EID
FROM EMP e
WHERE effective_date >= SYSDATE AND
NOT EXISTS (
SELECT 1
FROM "type" t
WHERE t.TID = e.EID AND
t.contract_type <> 'S'
);

You could use Count() Over() analytic function to check for type 'S' and number of different types per ID.
SELECT DISTINCT ID
FROM ( Select e.EID "ID",
Count(CASE t.CONTRACT_TYPE WHEN 'S' THEN 'S' END) Over(Partition By t.ID Order By t.ID) "NUM_OF_S",
Count(Distinct t.CONTRACT_TYPE) Over(Partition By t.ID) "NUM_OF_TYPES",
TRUNC(e.EFFECTIVE_DATE) - TRUNC(SYSDATE) "DAYS_AFTER_SYSDATE"
From emp_cte e
Inner Join type_cte t ON(t.ID = e.EID) )
WHERE NUM_OF_S > 0 And -- Type 'S' exists for ID AND
NUM_OF_TYPES = 1 And -- It is the only type AND
DAYS_AFTER_SYSDATE > 0 -- EFFECTIVE_DATE is after SYSDATE
With your sample data ...
WITH
emp_cte(EID, EFFECTIVE_DATE) AS
(
Select 1, To_Date('02/14/2023', 'mm/dd/yyyy') From Dual Union All
Select 2, To_Date('02/15/2023', 'mm/dd/yyyy') From Dual Union All
Select 3, To_Date('04/30/2023', 'mm/dd/yyyy') From Dual Union All
Select 4, To_Date('03/24/2023', 'mm/dd/yyyy') From Dual
),
type_cte(ID, CONTRACT_TYPE) AS
(
Select 1, 'P' From Dual Union All
Select 1, 'S' From Dual Union All
Select 1, 'P' From Dual Union All
Select 2, 'S' From Dual Union All
Select 2, 'S' From Dual Union All
Select 3, 'P' From Dual Union All
Select 3, 'S' From Dual Union All
Select 4, 'S' From Dual
)
... result would be ...
-- ID
-- ----------
-- 2
-- 4

How to get multiple row values in single row but in different line(with Line break) in Oracle Sql

here the above table is my input and the below table I need as output.

A simple option is to aggregate them using listagg and set delimiter to the newline character:
Sample data:
SQL> select * from temp;
WO TITLE
-- --------------------
W1 T1
W1 T2
W2 TT1
Query:
SQL> select workflow_id,
2 listagg(title, chr(10)) within group (order by title) as title
3 from temp
4 group by workflow_id;
WO TITLE
-- --------------------
W1 T1
T2
W2 TT1
SQL>

Use LISTAGG with a new line separator (assuming that you have already ordered your rows):
SELECT workflow_id,
LISTAGG(title, CHR(10)) WITHIN GROUP (ORDER BY ROWNUM) AS titles
FROM table_name
GROUP BY workflow_id;
Which, for the sample data:
CREATE TABLE table_name (workflow_id, title) AS
SELECT 'W1', 'd1' FROM DUAL UNION ALL
SELECT 'W1', 'd5' FROM DUAL UNION ALL
SELECT 'W1', 'd4' FROM DUAL UNION ALL
SELECT 'W2', 'd2' FROM DUAL UNION ALL
SELECT 'W2', 'd3' FROM DUAL;
Outputs:
WORKFLOW_ID
TITLES
W1
d1d5d4
W2
d2d3
db<>fiddle here

This works here, hopefully it could help...
WITH
tbl AS
(
Select 'w1' "WORKFLOW_NO", 'd1' "DISCIPLINE" From Dual UNION ALL
Select 'w1' "WORKFLOW_NO", 'd5' "DISCIPLINE" From Dual UNION ALL
Select 'w1' "WORKFLOW_NO", 'd4' "DISCIPLINE" From Dual UNION ALL
Select 'w2' "WORKFLOW_NO", 'd2' "DISCIPLINE" From Dual UNION ALL
Select 'w2' "WORKFLOW_NO", 'd3' "DISCIPLINE" From Dual
)
SELECT
SUBSTR(LISTAGG(' ' || Chr(10) || WORKFLOW_NO) WITHIN GROUP (ORDER BY WF), 3, 50) "WORKFLOW_NO",
LISTAGG(DISCIPLINE, Chr(10)) WITHIN GROUP (ORDER BY WF) "DISCIPLINE"
FROM
(
SELECT
c.WORKFLOW_NO "WF",
c.CNT "CNT",
CASE
WHEN Count(1) OVER(PARTITION BY t.WORKFLOW_NO ORDER BY t.WORKFLOW_NO ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) = c.CNT
THEN t.WORKFLOW_NO
ELSE ''
END "WORKFLOW_NO",
t.DISCIPLINE "DISCIPLINE"
FROM
tbl t
INNER JOIN
(
SELECT DISTINCT
WORKFLOW_NO "WORKFLOW_NO",
Count(WORKFLOW_NO) OVER(PARTITION BY WORKFLOW_NO ORDER BY WORKFLOW_NO) "CNT"
FROM
tbl
) c ON(c.WORKFLOW_NO = t.WORKFLOW_NO)
)
GROUP BY
WF, CNT
--
-- R e s u l t
-- WORKFLOW_NO DISCIPLINE
-- ------------ ----------
-- d1
-- d4
-- w1 d5
-- -----------------------------------------
-- d2
-- w2 d3

SQL Query for finding longest streak of wins

I have data like below -
Year,winning_country
2001,IND
2002,IND
2003,IND
2004,AUS
2005,AUS
2006,SA
2007,SA
2008,SA
2009,IND
2010,IND
2011,IND
2012,IND
2013,AUS
2014,AUS
2015,SA
2016,NZ
2017,SL
2018,IND
The question here is to find out the longest streak of wins for each country and desired output will be like below -
Country,no_of_wins
IND,4
AUS,2
SA,3
SL,1
NZ,1
Can someone help here.

This is a gaps and islands problem, but the simplest method is to subtract a sequence from the year. So, to get all the sequences:
select country, count(*) as streak,
min(year) as from_year, max(year) as to_year
from (select year, country,
row_number() over (partition by country order by year) as seqnum
from t
) t
group by country, (year - seqnum);
To get the longest per country, aggregate again or use window functions:
select country, streak
from (select country, count(*) as streak,
min(year) as from_year, max(year) as to_year,
row_number() over (partition by country order by count(*) desc) as seqnum_2
from (select year, country,
row_number() over (partition by country order by year) as seqnum
from t
) t
group by country, (year - seqnum)
) cy
where seqnum_2 = 1;
I prefer using row_number() to get the longest streak because it allows you to also get the years when it occurred.

Looks like an gaps-and-islands problem.
The SQL below calculates some ranking based on 2 row_number.
Then it's just a matter of grouping.
SELECT q2.Country, MAX(q2.no_of_wins) AS no_of_wins
FROM
(
SELECT q1.winning_country as Country,
COUNT(*) AS no_of_wins
FROM
(
SELECT t.Year, t.winning_country,
(ROW_NUMBER() OVER (ORDER BY t.Year ASC) -
ROW_NUMBER() OVER (PARTITION BY t.winning_country ORDER BY t.Year)) AS rnk
FROM yourtable t
) q1
GROUP BY q1.winning_country, q1.rnk
) q2
GROUP BY q2.Country
ORDER BY MAX(q2.no_of_wins) DESC

If Redshift supports analytic function, below would be the query.
with t1 as
(
select 2001 as year,'IND' as cntry from dual union
select 2002,'IND' from dual union
select 2003,'IND' from dual union
select 2004,'AUS' from dual union
select 2005,'AUS' from dual union
select 2006,'SA' from dual union
select 2007,'SA' from dual union
select 2008,'SA' from dual union
select 2009,'IND' from dual union
select 2010,'IND' from dual union
select 2011,'IND' from dual union
select 2012,'IND' from dual union
select 2013,'AUS' from dual union
select 2014,'AUS' from dual union
select 2015,'SA' from dual union
select 2016,'NZ' from dual union
select 2017,'SL' from dual union
select 2018,'IND' from dual) ,
t2 as (select year, cntry, year - row_number() over (partition by cntry order by year) as grpBy from t1 order by cntry),
t3 as (select cntry, count(grpBy) as consWins from t2 group by cntry, grpBy),
res as (select cntry, consWins, row_number() over (partition by cntry order by consWins desc) as rnk from t3)
select cntry, consWins from res where rnk=1;
Hope this helps.

Here is a solution that leverages the use of Redshift Python UDF's
There may be simpler ways to achieve the same but this is a good example of how to create a simple UDF.
create table temp_c (competition_year int ,winning_country varchar(4));
insert into temp_c (competition_year, winning_country)
values
(2001,'IND'),
(2002,'IND'),
(2003,'IND'),
(2004,'AUS'),
(2005,'AUS'),
(2006,'SA'),
(2007,'SA'),
(2008,'SA'),
(2009,'IND'),
(2010,'IND'),
(2011,'IND'),
(2012,'IND'),
(2013,'AUS'),
(2014,'AUS'),
(2015,'SA'),
(2016,'NZ'),
(2017,'SL'),
(2018,'IND')
;
create or replace function find_longest_streak(InputStr varChar)
returns integer
stable
as $$
MaxStreak=0
ThisStreak=0
ThisYearStr=''
LastYear=0
for ThisYearStr in InputStr.split(','):
if int(ThisYearStr) == LastYear + 1:
ThisStreak+=1
else:
if ThisStreak > MaxStreak:
MaxStreak=ThisStreak
ThisStreak=1
LastYear=int(ThisYearStr)
return max(MaxStreak,1)
$$ language plpythonu;
select winning_country,
find_longest_streak(listagg(competition_year,',') within group (order by competition_year))
from temp_c
group by winning_country
order by 2 desc
;

How about something like...
SELECT
winning_country,
COUNT(*)
GROUP BY winning_country
HAVING MAX(year) - MIN(year) = COUNT(year) - 1
This assumes no duplicate entries.

Creating a session abstraction do the trick:
WITH winning_changes AS (
SELECT *,
CASE WHEN LAG(winning_country) OVER (ORDER BY year) <> winning_country THEN 1 ELSE 0 END AS same_winner
FROM winners
),
sequences AS (
SELECT *,
SUM(same_winner) OVER (ORDER BY year) AS winning_session
FROM winning_changes
),
streaks AS (
SELECT winning_country AS country,
winning_session,
COUNT(*) streak
FROM sequences
GROUP BY 1,2
)
SELECT country,
MAX(streak) AS no_of_wins
FROM streaks
GROUP BY 1;

Oracle SQL Replace multiple characters in different positions

I'm using Oracle 11g and I'm having trouble replacing multiple characters based on positions mentioned in a different table. For example:
Table 1
PRSKEY POSITION CHARACTER
123 3 ć
123 9 ć
Table 2
PRSKEY NAME
123 Becirovic
I have to replace the NAME in Table 2 to Bećirović.
I've tried regexp_replace but this function doesn't provide replacing more then 1 position, is there an easy way to fix this?

Here's another way to do it.
with tab1 as (select 123 as prskey, 3 as position, 'ć' as character from dual
union select 123, 9, 'ć' from dual),
tab2 as (select 123 as prskey, 'Becirovic' as name from dual)
select listagg(nvl(tab1.character, namechar)) within group(order by lvl)
from
(select prskey, substr(name, level, 1) as namechar, level as lvl
from tab2
connect by level <= length(name)
) splitname
left join tab1 on position = lvl and tab1.prskey = splitname.prskey
;

Simple solution using cursor ...
create table t1 (
prskey int,
pos int,
character char(1)
);
create table t2
(
prskey int,
name varchar2(100)
);
insert into t1 values (1, 1, 'b');
insert into t1 values (1, 3, 'e');
insert into t2 values (1, 'dear');
begin
for t1rec in (select * from t1) loop
update t2
set name = substr(name, 1, t1rec.pos - 1) || t1rec.character || substr(name, t1rec.pos + 1, length(name) - t1rec.pos)
where t2.prskey = t1rec.prskey;
end loop;
end;
/

I would prefer approach via PL/SQL, but in your tag only 'sql', so I made this monster:
with t as (
select 123 as id, 3 as pos, 'q' as new_char from dual
union all
select 123 as id, 6 as pos, 'z' as new_char from dual
union all
select 123 as id, 9 as pos, '1' as new_char from dual
union all
select 456 as id, 1 as pos, 'A' as new_char from dual
union all
select 456 as id, 4 as pos, 'Z' as new_char from dual
),
t1 as (
select 123 as id, 'Becirovic' as str from dual
union all
select 456 as id, 'Test' as str from dual
)
select listagg(out_text) within group (order by pos)
from(
select id, pos, new_char, str, prev, substr(str,prev,pos-prev)||new_char as out_text
from(
select id, pos, new_char, str, nvl(lag(pos) over (partition by id order by pos)+1,1) as prev
from (
select t.id, pos, new_char, str
from t, t1
where t.id = t1.id
) q
) a
) w
group by id
Result:
Beqirzvi1
AesZ

How to get query to return rows where first three characters of one row match another row?

Here's my data:
with first_three as
(
select 'AAAA' as code from dual union all
select 'BBBA' as code from dual union all
select 'BBBB' as code from dual union all
select 'BBBC' as code from dual union all
select 'CCCC' as code from dual union all
select 'CCCD' as code from dual union all
select 'FFFF' as code from dual union all
select 'GFFF' as code from dual )
select substr(code,1,3) as r1
from first_three
group by substr(code,1,3)
having count(*) >1
This query returns the characters that meet the cirteria. Now, how do I select from this to get desired results? Or, is there another way?
Desired Results
BBBA
BBBB
BBBC
CCCC
CCCD

WITH code_frequency AS (
SELECT code,
COUNT(1) OVER ( PARTITION BY SUBSTR( code, 1, 3 ) ) AS frequency
FROM table_name
)
SELECT code
FROM code_frequency
WHERE frequency > 1

WITH first_three AS (
...
)
SELECT *
FROM first_three f1
WHERE EXISTS (
SELECT 1 FROM first_three f2
WHERE f1.code != f2.code
AND substr(f1.code, 1, 3) = substr(f2.code, 1, 3)
)

select res from (select res,count(*) over
(partition by substr(res,1,3) order by null) cn from table_name) where cn>1;

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Finding sequence in data and grouping by it - sql

I think something like this might work: select min (substr (phone_number, -4, 4)) as first, max (substr (phone_number, -4, 4)) as last from temp_table group by substr (phone_number, -4, 2)

Related

How to compare different values within the same column

How to get multiple row values in single row but in different line(with Line break) in Oracle Sql

SQL Query for finding longest streak of wins

Oracle SQL Replace multiple characters in different positions

How to get query to return rows where first three characters of one row match another row?

Categories

Resources