SQL Query for finding longest streak of wins - sql

I have data like below -
Year,winning_country
2001,IND
2002,IND
2003,IND
2004,AUS
2005,AUS
2006,SA
2007,SA
2008,SA
2009,IND
2010,IND
2011,IND
2012,IND
2013,AUS
2014,AUS
2015,SA
2016,NZ
2017,SL
2018,IND
The question here is to find out the longest streak of wins for each country and desired output will be like below -
Country,no_of_wins
IND,4
AUS,2
SA,3
SL,1
NZ,1
Can someone help here.

This is a gaps and islands problem, but the simplest method is to subtract a sequence from the year. So, to get all the sequences:
select country, count(*) as streak,
min(year) as from_year, max(year) as to_year
from (select year, country,
row_number() over (partition by country order by year) as seqnum
from t
) t
group by country, (year - seqnum);
To get the longest per country, aggregate again or use window functions:
select country, streak
from (select country, count(*) as streak,
min(year) as from_year, max(year) as to_year,
row_number() over (partition by country order by count(*) desc) as seqnum_2
from (select year, country,
row_number() over (partition by country order by year) as seqnum
from t
) t
group by country, (year - seqnum)
) cy
where seqnum_2 = 1;
I prefer using row_number() to get the longest streak because it allows you to also get the years when it occurred.

Looks like an gaps-and-islands problem.
The SQL below calculates some ranking based on 2 row_number.
Then it's just a matter of grouping.
SELECT q2.Country, MAX(q2.no_of_wins) AS no_of_wins
FROM
(
SELECT q1.winning_country as Country,
COUNT(*) AS no_of_wins
FROM
(
SELECT t.Year, t.winning_country,
(ROW_NUMBER() OVER (ORDER BY t.Year ASC) -
ROW_NUMBER() OVER (PARTITION BY t.winning_country ORDER BY t.Year)) AS rnk
FROM yourtable t
) q1
GROUP BY q1.winning_country, q1.rnk
) q2
GROUP BY q2.Country
ORDER BY MAX(q2.no_of_wins) DESC

If Redshift supports analytic function, below would be the query.
with t1 as
(
select 2001 as year,'IND' as cntry from dual union
select 2002,'IND' from dual union
select 2003,'IND' from dual union
select 2004,'AUS' from dual union
select 2005,'AUS' from dual union
select 2006,'SA' from dual union
select 2007,'SA' from dual union
select 2008,'SA' from dual union
select 2009,'IND' from dual union
select 2010,'IND' from dual union
select 2011,'IND' from dual union
select 2012,'IND' from dual union
select 2013,'AUS' from dual union
select 2014,'AUS' from dual union
select 2015,'SA' from dual union
select 2016,'NZ' from dual union
select 2017,'SL' from dual union
select 2018,'IND' from dual) ,
t2 as (select year, cntry, year - row_number() over (partition by cntry order by year) as grpBy from t1 order by cntry),
t3 as (select cntry, count(grpBy) as consWins from t2 group by cntry, grpBy),
res as (select cntry, consWins, row_number() over (partition by cntry order by consWins desc) as rnk from t3)
select cntry, consWins from res where rnk=1;
Hope this helps.

Here is a solution that leverages the use of Redshift Python UDF's
There may be simpler ways to achieve the same but this is a good example of how to create a simple UDF.
create table temp_c (competition_year int ,winning_country varchar(4));
insert into temp_c (competition_year, winning_country)
values
(2001,'IND'),
(2002,'IND'),
(2003,'IND'),
(2004,'AUS'),
(2005,'AUS'),
(2006,'SA'),
(2007,'SA'),
(2008,'SA'),
(2009,'IND'),
(2010,'IND'),
(2011,'IND'),
(2012,'IND'),
(2013,'AUS'),
(2014,'AUS'),
(2015,'SA'),
(2016,'NZ'),
(2017,'SL'),
(2018,'IND')
;
create or replace function find_longest_streak(InputStr varChar)
returns integer
stable
as $$
MaxStreak=0
ThisStreak=0
ThisYearStr=''
LastYear=0
for ThisYearStr in InputStr.split(','):
if int(ThisYearStr) == LastYear + 1:
ThisStreak+=1
else:
if ThisStreak > MaxStreak:
MaxStreak=ThisStreak
ThisStreak=1
LastYear=int(ThisYearStr)
return max(MaxStreak,1)
$$ language plpythonu;
select winning_country,
find_longest_streak(listagg(competition_year,',') within group (order by competition_year))
from temp_c
group by winning_country
order by 2 desc
;

How about something like...
SELECT
winning_country,
COUNT(*)
GROUP BY winning_country
HAVING MAX(year) - MIN(year) = COUNT(year) - 1
This assumes no duplicate entries.

Creating a session abstraction do the trick:
WITH winning_changes AS (
SELECT *,
CASE WHEN LAG(winning_country) OVER (ORDER BY year) <> winning_country THEN 1 ELSE 0 END AS same_winner
FROM winners
),
sequences AS (
SELECT *,
SUM(same_winner) OVER (ORDER BY year) AS winning_session
FROM winning_changes
),
streaks AS (
SELECT winning_country AS country,
winning_session,
COUNT(*) streak
FROM sequences
GROUP BY 1,2
)
SELECT country,
MAX(streak) AS no_of_wins
FROM streaks
GROUP BY 1;

Related

How do I use MIN on a union column SQL

I'm having problems with using the MIN function in sql. I want to get a list of all the rows with the minimum value from my count function.
Here is my code:
SELECT land, MIN(count) as lowest
FROM
(
SELECT temp.land, count(*)
FROM
(
SELECT grans.land FROM Grans
UNION ALL
SELECT grans.aland FROM Grans
) as temp
GROUP BY land
ORDER BY land
) as subQuery
GROUP BY land
ORDER BY land
At the moment I just get a table listing land and count, although count is renamed to lowest.
I would use window functions:
SELECT land, cnt
FROM (SELECT temp.land, count(*) as cnt,
MIN(count(*)) OVER () as min_cnt
FROM (SELECT grans.land FROM Grans
UNION ALL
SELECT grans.aland FROM Grans
) temp
GROUP BY land
) l
WHERE cnt = min_cnt;
remove group by if you just want min because if you put group by it will return all the land count that you got in your sub-query, as in count it already made group and that is distinct
SELECT *
FROM
(
SELECT temp.land, count(*) as cnt
FROM
(
SELECT grans.land FROM Grans
UNION ALL
SELECT grans.aland FROM Grans
) as temp
GROUP BY land
ORDER BY land
) as subQuery
order by cnt asc
Limit 1
another way is
SELECT temp.land, count(*) as cnt
FROM
(
SELECT grans.land FROM Grans
UNION ALL
SELECT grans.aland FROM Grans
) as temp
GROUP BY land
having cnt in(
SELECT min(cnt)
FROM
(
SELECT temp.land, count(*) as cnt
FROM
(
SELECT grans.land FROM Grans
UNION ALL
SELECT grans.aland FROM Grans
) as temp
GROUP BY land
ORDER BY land
) as subQuery
)
and it also work
select * from
(
SELECT * ,row_number() over(partition by land order by cnt) as rn
FROM
(
SELECT temp.land, count(*) as cnt
FROM
(
SELECT grans.land FROM Grans
UNION ALL
SELECT grans.aland FROM Grans
) as temp
GROUP BY land
ORDER BY land
) as subQuery
) t where t.rn=1

update statement is not working in my query

with t as (
select 'AA-00001152' itemid from dual union all
select 'AA-00001152' from dual union all
select 'AA-00001153' from dual union all
select 'AA-00001154' from dual union all
select 'AA-00001154' from dual union all
select 'CC-254565' from dual union all
select 'AA-00001156' from dual union all
select 'AA-00001156' from dual union all
select 'BB-00001200' from dual
)
select 14999 + dense_rank() over(order by itemid) as seq_no,
itemid
from t
order by seq_no
Here i have generated seq_no for multiple itemIds, but i am trying to update in a seq_no column which is throwing error saying subquery returns more than one row. Please help in update query. thanks.
my update query:-
update test
set seq_num =14999 + dense_rank() over(order by itemid)
where item_type='non_product')
In SQL Server, you can use an updatable CTE:
with toupdate as (
select dense_rank() over (order by itemid) as seqnum,
t.*
from t
)
update toupdate
set seq_num = 14999 + seqnum
where item_type = 'non_product';
However, I suspect that you are not really using SQL Server. This syntax would not work in most other databases.

Get the attributes of the most recent row in BigQuery?

I'm working in BigQuery. I have a table t1 which has address, postcode, price and date fields. I want to group this by address and postcode, an find the price of the most recent row for each address.
How can I do this in BigQuery? I know how to get the address, postcode and most recent date:
SELECT
ADDRESS, POSTCODE, MAX(DATE)
FROM
[mytable]
GROUP BY
ADDRESS,
POSTCODE
But I don't know how to get the price of these rows matching these fields. This is my best guess, which does produce results - will this be correct?
SELECT
t1.address, t1.postcode, t1.date, t2.price
FROM [mytable] t2
JOIN
(SELECT
ADDRESS, POSTCODE, MAX(DATE) AS date
FROM
[mytable]
GROUP BY
ADDRESS,
POSTCODE) t1
ON t1.address=t2.address
AND t1.postcode=t2.postcode
AND t1.date=t2.date
This seems to me like it should work, but some of the similar questions have solutions that are much more complex.
Just use row_number():
SELECT t.*
FROM (SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY ADDRESS, POSTCODE
ORDER BY DATE DESC
) as seqnum
FROM [mytable] t
) t
WHERE seqnum = 1;
This is not an aggregation query. You want to filter the rows to get the most recent value.
Try below for BigQuery Standard SQL
#standardSQL
SELECT row.* FROM (
SELECT ARRAY_AGG(t ORDER BY date DESC LIMIT 1)[OFFSET(0)] AS row
FROM `yourTable` AS t
GROUP BY address, postcode
)
You can play/test it with dummy data as below
#standardSQL
WITH yourTable AS (
SELECT 'address_1' AS address, 'postcode_1' AS postcode, '2017-01-01' AS date, 1 AS price UNION ALL
SELECT 'address_1', 'postcode_1', '2017-01-02', 2 UNION ALL
SELECT 'address_1', 'postcode_1', '2017-01-03', 3 UNION ALL
SELECT 'address_1', 'postcode_1', '2017-01-04', 4 UNION ALL
SELECT 'address_2', 'postcode_2', '2017-01-01', 5 UNION ALL
SELECT 'address_3', 'postcode_1', '2017-01-01', 6 UNION ALL
SELECT 'address_3', 'postcode_1', '2017-01-02', 7 UNION ALL
SELECT 'address_3', 'postcode_1', '2017-01-03', 8
)
SELECT row.* FROM (
SELECT ARRAY_AGG(t ORDER BY date DESC LIMIT 1)[OFFSET(0)] AS row
FROM `yourTable` AS t
GROUP BY address, postcode
)

Find Top Most AND Lowest In a Table's Group Column

I have a table and there are 4 fields in it, ID, Price, QTY, Ratting and Optional [Position].
I have all the records Grouped By Columns [Qty,Ratting]
I have to define the position of groupwise and store that Position into Optional column.
For better understanding I have added an image with data in table:
On the basis of QTY in Each Rating I have to Mark Top3, Bottom3 and Rest of them as remaining.
I am not getting how to do it.
Can anybody suggest me how to do it?
So far what I've tried is:
Declare #RankTable TABLE
(
ID INT,
Price Decimal (10,2),
Qty INT,
Ratting INT
)
INSERT INTO #RankTable
SELECT 1,10,15,1
UNION ALL
SELECT 2,11,11,1
UNION ALL
SELECT 3,96,10,1
UNION ALL
SELECT 4,96,8,1
UNION ALL
SELECT 5,56,7,1
UNION ALL
SELECT 6,74,5,1
UNION ALL
SELECT 7,93,4,1
UNION ALL
SELECT 8,98,2,1
UNION ALL
SELECT 9,12,1,1
UNION ALL
SELECT 10,32,80,2
UNION ALL
SELECT 11,74,68,2
UNION ALL
SELECT 12,58,57,2
UNION ALL
SELECT 13,37,43,2
UNION ALL
SELECT 14,79,32,2
UNION ALL
SELECT 15,29,28,2
UNION ALL
SELECT 16,46,17,2
UNION ALL
SELECT 17,86,13,2
UNION ALL
SELECT 19,75,110,3
UNION ALL
SELECT 20,27,108,3
UNION ALL
SELECT 21,38,104,3
UNION ALL
SELECT 22,87,100,3
UNION ALL
SELECT 23,47,89,3
DECLARE #PositionGroup VARCHAR(1)
SELECT *,ISNULL(#PositionGroup,'') AS Position FROM #RankTable
You can try this:
SELECT ID
,Price
,Qty
,Ratting
,CASE WHEN RowID >= 1 AND RowID <= 3
THEN 0
ELSE CASE WHEN RowID > Total - 3 THEN 1 ELSE 2 END END AS Position
FROM (SELECT ID
,Price
,Qty
,Ratting
,COUNT(*) OVER(PARTITION BY Ratting) AS Total
,ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) AS RowID
,ISNULL(#PositionGroup,'') AS Position
FROM #RankTable) AS T
Use Window Function. Try this.
;WITH cte
AS (SELECT *,
Row_number()OVER(partition BY rating ORDER BY id) rn,
count(id)OVER(partition BY rating) mx
FROM #RankTable)
SELECT ID,
Price,
Qty,
Rating,
mx - rn,
CASE WHEN rn IN ( 1, 2, 3 ) THEN 0
WHEN mx - rn IN( 0, 1, 2 ) THEN 1
ELSE 2
END position
FROM cte
try this as well.
;WITH cte AS
(
SELECT MAX(Row) [Max],
MIN(Row) [Min],
LU.Ratting
FROM (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) Row
FROM #RankTable)LU
GROUP BY LU.Ratting
)
SELECT ID,
R.Price,
R.Qty,
cte.Ratting,
CASE WHEN (Row - Min) <= 2 THEN 0 WHEN (Max - Row) <= 2 THEN 1 ELSE 2 END Position
FROM cte
JOIN (
SELECT Ratting,
ID,
Price,
Qty,
ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) [Row]
FROM #RankTable
) R ON R.Ratting = cte.Ratting
Result:

Multiple row's coulmns in one row's multiple columns

Table Schema
ID Status Patient
1 critical Gabriel
1 moderate Frank
1 critical Dorin
2 low Peter
3 critical Noman
3 moderate Johnson
Expected OutPut
ID Patient1 Patient2
1 Gabriel Dorin
3 Noman Null
Here I have to show only those patient whose situation is critcal.
I found the similar question Multiple column values in a single row, but its in SQL also the columns are hard coded.
Thanks!
First step is to select the critical patients and order them:
select id, patient, row_number() over (partition by id order by patient) as rnk
from your_table
where status='critical';
After this you can select first two critical patients in this manner:
select id,
max(case when rnk=1 then patient end) as Patient1,
max(case when rnk=2 then patient end) as Patient2
from (
select id,
patient,
row_number() over (partition by id order by patient) as rnk
from your_table
where status='critical'
)
group by id;
If you want a more flexible solution you can try a query like below, but you should choose the number of ranks in before the runtime:
with your_table as
(select 1 as id, 'critical' as status, 'Gabriel' as patient from dual
union all
select 1, 'moderate', 'Frank' from dual union all
select 1, 'critical', 'Dorin' from dual union all
select 1, 'critical', 'Vasile' from dual union all
select 2, 'low', 'Peter' from dual union all
select 3, 'critical', 'Noman' from dual union all
select 3, 'moderate', 'Johnson' from dual )
select * from (
select id, patient, row_number() over (partition by id order by patient) as rnk
from your_table
where status='critical'
)
pivot (max(patient) for rnk in (1, 2, 3))
order by 1 ;
(This is for three patients.)
Try to build query and execute the result to a cursor.
SET SERVEROUTPUT ON
DECLARE
v_fact NUMBER := 1;
v_max_cnt number:=1;
V_query CLOB:='';
BEGIN
select max(RNum) into v_max_cnt from(
select row_number() over (partition by ID order by ID) RNum from PATIENTSTATUS where status='critical'
)x;
FOR v_counter IN 1..v_max_cnt LOOP
V_query := V_query||v_fact||' as Patient'||v_fact||(case when v_fact=v_max_cnt then '' else ',' end);
v_fact:=v_fact+1;
END LOOP;
DBMS_OUTPUT.PUT_LINE ('select * from (
select id, patient, row_number() over (partition by id order by patient) as rnk
from PATIENTSTATUS
where status=''critical'')
pivot (max(patient) for rnk in ('||V_query||'))
order by 1;');
END;
From a procedure, data can be inserted to a cursor by
OPEN CUR_Your_Cursor FOR V_query;