How to use where clause within window function in BigQuery - google-bigquery

with
t1 as (
select 'reb' as type, 1 as poss, 1 as ord, 'nick' as name union all
select 'reb' as type, 1 as poss, 2 as ord, null as name union all
select 'shot' as type, 1 as poss, 3 as ord, 'tom' as name union all
select 'reb' as type, 1 as poss, 4 as ord, null as name union all
select 'shot' as type, 1 as poss, 5 as ord, 'bil' as name union all
select 'reb' as type, 2 as poss, 1 as ord, null as name union all
select 'reb' as type, 2 as poss, 2 as ord, null as name union all
select 'shot' as type, 2 as poss, 3 as ord, 'joe' as name union all
select 'reb' as type, 2 as poss, 4 as ord, 'tim' as name union all
select 'shot' as type, 2 as poss, 4 as ord, 'tim' as name
)
select
first_value(name ignore nulls) over (partition by poss order by ord asc rows between unbounded preceding and unbounded following) as firstname
,*
from t1
This is close to the output needed, but not exactly correct. We are using a window function to get the first name that appears in each poss partition, ordering by the ord field.
What we actually need is the first name field where the type is shot. The correct output for firstname would be tom tom tom tom tom joe joe joe joe joe as tom is the first name in poss == 1, based on the order, where the type is shot.

select
first_value(if (type = 'shot', name, null) ignore nulls) over (
partition by poss -- if (type = 'shot', poss, null)
order by ord asc
rows between unbounded preceding and unbounded following
) as firstname
,*
from t1
order by poss asc, ord asc
This seems like a valid solution, didn't initially realize if() statements could be used in the window function like this.

Related

Find last and first row for every id

I have this table:
id
RANK
111
1
111
2
111
3
222
1
222
2
I want to add two colums that will show if this is the first/last row for each id
id
first
last
111
YES
NO
111
NO
NO
111
NO
YES
222
YES
NO
222
NO
YES
Let's first point out that sorting without column to sort this is no good idea.
Usually, an id is unique and will be incremented, so it will already be sufficient to order by id.
If this is not the case, there should be at least be another column with a meaningful value (for example also an incrementing number or a datetime) which can be used to sort the result.
So you should fix your table design if possible and add such a column or make your already existing id column unique.
If this is not possible and you really have to order just by the row number, you could do following:
SELECT id,
CASE WHEN rn = 1 THEN 'YES' ELSE 'NO' END AS first,
CASE WHEN rn = COUNT(*) OVER (PARTITION BY id)
THEN 'YES' ELSE 'NO' END AS last
FROM
(
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) rn
FROM yourtable
);
If you have a column to sort (let's name it "rank"), this will be much safer:
SELECT id,
CASE WHEN rn1 = 1 THEN 'YES' ELSE 'NO' END AS first,
CASE WHEN rn2 = 1 THEN 'YES' ELSE 'NO' END AS last
FROM
(
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY rank) rn1,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY rank DESC) rn2
FROM yourtable
);
Here's one option:
Sample data:
SQL> with
2 test (id, rank) as
3 (select 111, 1 from dual union all
4 select 111, 2 from dual union all
5 select 111, 3 from dual union all
6 select 222, 1 from dual union all
7 select 222, 2 from dual
8 ),
Query begins here:
9 temp as
10 (select id,
11 rank,
12 first_value(rank) over (partition by id) rnk_min,
13 last_value(rank) over (partition by id ) rnk_max
14 from test
15 )
16 select id,
17 case when rank = rnk_min then 'Yes' else 'No' end first,
18 case when rank = rnk_max then 'Yes' else 'No' end last
19 from temp
20 order by id, rank;
ID FIRST LAST
---------- ------- -------
111 Yes No
111 No No
111 No Yes
222 Yes No
222 No Yes
SQL>
If you don't have rows with the same rank per id, you may use lag/lead functions to mark first and last rows with a flag using default argument of these functions, which is used when the function leaves a window boundary.
with sample_tab (id, rank) as (
select 111, 1 from dual union all
select 111, 2 from dual union all
select 111, 3 from dual union all
select 222, 1 from dual union all
select 222, 2 from dual
)
select
id
, lag('No', 1, 'Yes') over(partition by id order by rank asc) as last
, lead('No', 1, 'Yes') over(partition by id order by rank asc) as last
from sample_tab
ID
LAST
LAST
111
Yes
No
111
No
No
111
No
Yes
222
Yes
No
222
No
Yes
If the data may have the same rank for multiple rows per id, you may use the same technique (a case when function goes beyound window boundary) with coalesce.
with sample_tab (id, rank) as (
select 111, 1 from dual union all
select 111, 2 from dual union all
select 111, 2 from dual union all
select 222, 1 from dual union all
select 222, 2 from dual
)
select
id
, coalesce(max('No') over(
partition by id order by rank asc
/*RANGE for logical offset,
setting the same flag for a group of first/last rows*/
range between 1 preceding and 1 preceding
), 'Yes') as first
, coalesce(max('No') over(
partition by id order by rank asc
range between 1 following and 1 following
), 'Yes') as last
from sample_tab
ID
FIRST
LAST
111
Yes
No
111
No
Yes
111
No
Yes
222
Yes
No
222
No
Yes
fiddle

Hackerrank SQL problem to solve in Oracle's SQL version

Query the two cities in STATION with the shortest and longest CITY names, as well as their respective lengths (i.e.: number of characters in the name). If there is more than one smallest or largest city, choose the one that comes first when ordered alphabetically.
The STATION table is described as follows:
Sample Input
For example, CITY has four entries: DEF, ABC, PQRS and WXY.
Sample Output
ABC 3
PQRS 4
Explanation
When ordered alphabetically, the CITY names are listed as ABC, DEF, PQRS, and WXY, with lengths and . The longest name is PQRS, but there are options for shortest named city. Choose ABC, because it comes first alphabetically.
A little bit of analytic functions; sample data in lines #1 - 6; query begins at line #7.
SQL> with station (city) as
2 (select 'DEF' from dual union all
3 select 'ABC' from dual union all
4 select 'PQRS' from dual union all
5 select 'WXY' from dual
6 )
7 select city, len
8 from (select city,
9 length(city) len,
10 rank() over (partition by length(city) order by city) rn
11 from station
12 )
13 where rn = 1
14 order by city;
CITY LEN
---- ----------
ABC 3
PQRS 4
SQL>
Reading your comment, it seems you want something like this:
SQL> with station (city) as
2 (select 'DEF' from dual union all
3 select 'ABC' from dual union all
4 select 'PQRS' from dual union all
5 select 'WXY' from dual union all
6 select 'XX' from dual union all
7 select 'ABCDE' from dual
8 )
9 select city, len
10 from (select city,
11 length(city) len,
12 rank() over (order by length(city) , city) rna,
13 rank() over (order by length(city) desc, city) rnd
14 from station
15 )
16 where rna = 1
17 or rnd = 1
18 order by len, city;
CITY LEN
----- ----------
XX 2
ABCDE 5
SQL>
Try this SQL statement with the fetch first row only clause:
with station (city) as
(select 'DEF' from dual union all
select 'ABC' from dual union all
select 'PQRS' from dual union all
select 'WXY' from dual)
(select city,
length(city)
from station
order by 2, 1
fetch first row only)
union
(select city,
length(city)
from station
order by 2 desc, 1
fetch first row only);
I solved the question this way:
select min(tt.city), tt.city_length
from (select s.city, length(s.city) city_length
from station s
where length(s.city) = (select max(length(t.city)) from station t)
or length(s.city) = (select min(length(p.city)) from station p)
order by 2, 1) tt
group by tt.city_length;
You can use the ROW_NUMBER analytic function in the ORDER BY clause and then FETCH FIRST ROW WITH TIES:
SELECT city,
LENGTH(city) AS length
FROM station
ORDER BY
LEAST(
ROW_NUMBER() OVER ( ORDER BY LENGTH( city ) ASC, city ),
ROW_NUMBER() OVER ( ORDER BY LENGTH( city ) DESC, city )
)
FETCH FIRST ROW WITH TIES;
Which, for the sample data:
CREATE TABLE station ( city ) AS
SELECT 'ABC' FROM DUAL UNION ALL
SELECT 'DEF' FROM DUAL UNION ALL
SELECT 'PQRS' FROM DUAL UNION ALL
SELECT 'XYZ' FROM DUAL;
Outputs:
CITY | LENGTH
:--- | -----:
PQRS | 4
ABC | 3
db<>fiddle here
select min(city) || ' ' ||length(min(city)) from station
UNION
select max(city) || ' ' ||length(max(city)) from station;

How to concatenate strings from two columns based on userid column and the result should contain distinct values?

TABLE OUTPUT TABLE
U_ID|PRODUCT|BRANCH U_ID RESULT
1 AL 8 1 8~AL-BL+1~HG-IK
1 BL 8
1 HJ 1
1 IK 1
2 IK 6
2 Po 8
3 UY 6
As shown in the snippet strings from two columns should be concatenated based on u_id column and in this requirement the final concatenated string should be distinct..
It is a Oracle database
I tried using LISTAGG function but not getting result for two columns.. any idea will help me a lot.. thanks
This is tricky. Oracle doesn't support distinct with listagg(), so you need to use subqueries:
select pu.u_id,
(products || '~' || branches) as result
from (select u_id, listagg(product, '-') within group (order by product) as products
from (select distinct u_id, product from t) pu
group by u_id
) pu join
(select u_id, listagg(branch, '-') within group (order by branch) as branches
from (select distinct u_id, branch from t) bu
group by u_id
) bu
on pu.u_id = bu.u_id
group by pu.u_id;
EDIT:
There is another way to do this without so many subqueries:
select u_id,
(listagg(case when seqnum_p = 1 then product end, '-') within group (order by product) ||
'~' ||
listagg(case when seqnum_b = 1 then branch end, '-') within group (order by branch)
) as result
from (select t.*,
row_number() over (partition by u_id, product order by product) as seqnum_p,
row_number() over (partition by u_id, branch order by branch) as seqnum_b
from t
) t
group by u_id;
Here is an example in rextester.
This could be done, by eliminating duplicates from the listagg strings:
with data as (
select 1 as id, 'AL' as PRODUCT, 8 as BRANCH from dual union all
select 1 as id, 'BL' , 1 from dual union all
select 1 as id, 'HJ ', 1 from dual union all
select 2 as id, 'IK' , 5 from dual union all
select 2 as id, 'IK' , 6 from dual union all
select 2 as id, 'Po' , 8 from dual union all
select 3 as id, 'UY' , 6 from dual
)
select id, PRODUCT||'~'||BRANCH as RESULT from(
select id,
REGEXP_REPLACE(
listagg(PRODUCT,'-') within group (order by id),
'([^-]*)(-\1)+($|-)','\1\3') PRODUCT,
REGEXP_REPLACE(
(listagg( BRANCH,'-') within group (order by id)),
'([^-]*)(-\1)+($|-)','\1\3') BRANCH
from data
group by id
)
Credits for duplicate eliminatuion must go to jack douglass
Results:
ID Result
1 AL-BL-HJ~1-8
2 IK-Po~5-6-8
3 UY~6

Oracle select pivot query to put row adjacent to their counter part using generic column name

I have a table which each entry has a counter pair
Customer
Name Value
Bob 3
Bob 4
Sam 0
Sam 1
Joe 9
I want the following result
Customer
Name Value1 Value2
Bob 3 4
Sam 0 1
Joe 9
I have read this thread, Oracle query to put rows at odd number adjacent to even number, but I want to avoid using the MOD function instead possible using pivot instead.
You can't use the pivot statement here, if you have only two value for each name (it also works with dates, because we can use max and min for dates):
select name, min(value) value1, nullif(max(value), min(value)) value2
from customer_tables
group by name
If Bob, Sam and other have more that two value:
with t (Name, Value) as (
select 'Bob',3 from dual union all
select 'Bob',4 from dual union all
select 'Sam',0 from dual union all
select 'Sam',1 from dual union all
select 'Joe',9 from dual
), t1 (name, value, rn) as (
select name, value, ROW_NUMBER() OVER(partition by name order by value) from t
)
select * from t1
pivot XML (
max(value)
for rn in (ANY)
)
SQL> l
1 with t (Name, Value) as (
2 select 'Bob',3 from dual union all
3 select 'Bob',4 from dual union all
4 select 'Sam',0 from dual union all
5 select 'Sam',1 from dual union all
6 select 'Joe',9 from dual
7 ), t1 (name, value, rn) as (
8 select name, value, ROW_NUMBER() OVER(partition by name order by value) from t
9 )
10 select * from t1
11 pivot XML (
12 max(value)
13 for rn in (ANY)
14* )
SQL> /
NAM RN_XML
--- --------------------------------------------------------------------------------
Bob <PivotSet><item><column name = "RN">1</column><column name = "MAX(VALUE)">3</col
umn></item><item><column name = "RN">2</column><column name = "MAX(VALUE)">4</co
lumn></item></PivotSet>
Joe <PivotSet><item><column name = "RN">1</column><column name = "MAX(VALUE)">9</col
umn></item></PivotSet>
Sam <PivotSet><item><column name = "RN">1</column><column name = "MAX(VALUE)">0</col
umn></item><item><column name = "RN">2</column><column name = "MAX(VALUE)">1</co
lumn></item></PivotSet>
Read more about pivot here

Need a query without using subquery

My table is
PROPOSAL_TABLE
PID QUOTE_ID PDF1
--- -------- ----
1 123 null
2 123 null
3 123 null
4 152 null
5 888 null
I need to select Quote ID with MAX PID.
I have a query:
SELECT PROPOSAL_PDF FROM PROPOSAL_TBL WHERE QUOTE_ID = '123'
AND PID = (SELECT MAX(PID) FROM PROPOSAL_TBL WHERE QUOTE_ID = '123')
How can I get the value without using sub query?
Without using a sub-query you can use the KEEP clause:
select max(pid) as pid
, max(quote_id) keep ( dense_rank first order by pid desc ) as quote_id
, max(pdf1) keep ( dense_rank first order by pid desc ) as pdf1
from proposal_table
where quote_id = '123'
This should be highly efficient but as always test. I would recommend reading Rob van Wijk's blog post on it.
The standard alternative would be to use a sub-query but not require another scan of the table by using an analytic function, e.g. ROW_NUMBER()
select pid, quote_id, pdf1
from ( select a.*, row_number() over ( order by pid ) as rn
from proposal_table a
where quote_id = '123' )
where rn = 1
You can use analytics:
SQL> WITH DATA AS (
2 SELECT 1 pid, 123 quote_id, 'A' pdf1 FROM DUAL
3 UNION ALL SELECT 2 pid, 123 quote_id, 'B' pdf1 FROM DUAL
4 UNION ALL SELECT 3 pid, 123 quote_id, 'C' pdf1 FROM DUAL
5 UNION ALL SELECT 4 pid, 152 quote_id, 'D' pdf1 FROM DUAL
6 UNION ALL SELECT 5 pid, 888 quote_id, 'E' pdf1 FROM DUAL
7 )
8 SELECT DISTINCT first_value(pid) over (ORDER BY pid DESC) pid,
9 quote_id,
10 first_value(pdf1) over (ORDER BY pid DESC) pdf1
11 FROM DATA
12 WHERE quote_id = 123;
PID QUOTE_ID PDF1
---------- ---------- ----
3 123 C
You can also use aggregates:
SQL> WITH DATA AS (
2 SELECT 1 pid, 123 quote_id, 'A' pdf1 FROM DUAL
3 UNION ALL SELECT 2 pid, 123 quote_id, 'B' pdf1 FROM DUAL
4 UNION ALL SELECT 3 pid, 123 quote_id, 'C' pdf1 FROM DUAL
5 UNION ALL SELECT 4 pid, 152 quote_id, 'D' pdf1 FROM DUAL
6 UNION ALL SELECT 5 pid, 888 quote_id, 'E' pdf1 FROM DUAL
7 )
8 SELECT MAX(pid),
9 quote_id,
10 MAX(pdf1) KEEP (DENSE_RANK FIRST ORDER BY pid DESC) pdf1
11 FROM DATA
12 WHERE quote_id = 123
13 GROUP BY quote_id;
MAX(PID) QUOTE_ID PDF1
---------- ---------- ----
3 123 C
Try this ::
SELECT PROPOSAL_PDF
FROM PROPOSAL_TBL WHERE QUOTE_ID = '123'
ORDER BY PID desc limit 1
Sql Server
SELECT TOP 1 PROPOSAL_PDF
FROM PROPOSAL_TBL
WHERE QUOTE_ID = '123'
ORDER BY PID desc
Oracle
SELECT * from (Select * FROM PROPOSAL_TBL
WHERE QUOTE_ID = '123'
ORDER BY PID desc)
WHERE ROWNUM <=1