Select with limited join - sql

I have two tables: products and products_prices.
products table:
id
name
user_id
1
Headphones
1
2
Phone
1
products_prices table:
id
product_id
price
time
1
1
10
1
2
1
15
2
3
1
20
3
4
2
10
4
5
2
15
5
6
2
20
6
I have a simple query:
SELECT * FROM products WHERE (user_id = 1) LIMIT 1 OFFSET 1
So I need to get limited rows from products table with only two prices values from table product_prices ordered by time for each row in products.
(I need to get product with two latest prices).
This is example of what I want to get:
id
user_id
name
curr_price
prev_price
2
1
Phone
20
15
And example of my query:
select products.*,
(SELECT price FROM products_prices WHERE product_id = products.id ORDER BY time asc LIMIT 1 OFFSET 0) as curr_price,
(SELECT price FROM products_prices WHERE product_id = products.id ORDER BY time asc LIMIT 1 OFFSET 1) as prev_price
from "products"
where (products."user_id" = 1)
limit 1 offset 1
Is it possible to do it without subqueries?

Not sure I find any of these easier to read...
0th approach using window functions and a CTE Demo
With products as (SELECT 1 ID, 'Headphones' name, 1 user_id UNION ALL
SELECT 2 ID, 'Phone' name, 1 user_id ),
products_Prices as (SELECT 1 ID, 1 Product_ID, 10 price, 1 time UNION ALL
SELECT 2 ID, 1 Product_ID, 15 price, 2 time UNION ALL
SELECT 3 ID, 1 Product_ID, 20 price, 3 time UNION ALL
SELECT 4 ID, 2 Product_ID, 33 price, 4 time UNION ALL
SELECT 5 ID, 2 Product_ID, 22 price, 5 time UNION ALL
SELECT 6 ID, 2 Product_ID, 11 price, 6 time),
STEP1 as (
SELECT P.ID, P.Name, P.user_ID,
price as CurrentPrice, lead(price) over (partition by P.ID order by time desc) Prev_Price, time,
row_number() over (Partition by P.ID order by time Desc) RN
FROM Products P
LEFT JOIN Products_Prices Z
on Z.Product_ID = P.ID)
SELECT Id, Name, User_ID, CurrentPRice, PRev_Price
From Step1 where RN = 1
Giving us:
+----+------------+---------+--------------+------------+
| id | name | user_id | currentprice | prev_price |
+----+------------+---------+--------------+------------+
| 1 | Headphones | 1 | 20 | 15 |
| 2 | Phone | 1 | 11 | 22 |
+----+------------+---------+--------------+------------+
1st approach using analytics and a CTE: note I changed price numbers to show variance.
DEMO
With products as (SELECT 1 ID, 'Headphones' name, 1 user_id UNION ALL
SELECT 2 ID, 'Phone' name, 1 user_id ),
products_Prices as (SELECT 1 ID, 1 Product_ID, 10 price, 1 time UNION ALL
SELECT 2 ID, 1 Product_ID, 15 price, 2 time UNION ALL
SELECT 3 ID, 1 Product_ID, 20 price, 3 time UNION ALL
SELECT 4 ID, 2 Product_ID, 33 price, 4 time UNION ALL
SELECT 5 ID, 2 Product_ID, 22 price, 5 time UNION ALL
SELECT 6 ID, 2 Product_ID, 11 price, 6 time),
STEP1 as (SELECT P.ID, P.Name, P.user_ID, PP.price, row_number() over (partition by PP.product_ID order by time desc) RN
FROM Products P
LEFT JOIN products_prices PP
on P.ID = PP.Product_ID)
SELECT ID, Name, User_ID, max(case when RN = 1 then Price end) as Current_price, max(case when RN=2 then price end) as Last_price
FROM STEP1
WHERE RN <=2
GROUP BY ID, name, User_ID
Giving us:
+----+------------+---------+---------------+------------+
| id | name | user_id | current_price | last_price |
+----+------------+---------+---------------+------------+
| 2 | Phone | 1 | 11 | 22 |
| 1 | Headphones | 1 | 20 | 15 |
+----+------------+---------+---------------+------------+
Option 2 using lateral.
demo
With products as (SELECT 1 ID, 'Headphones' name, 1 user_id UNION ALL
SELECT 2 ID, 'Phone' name, 1 user_id ),
products_Prices as (SELECT 1 ID, 1 Product_ID, 10 price, 1 time UNION ALL
SELECT 2 ID, 1 Product_ID, 15 price, 2 time UNION ALL
SELECT 3 ID, 1 Product_ID, 20 price, 3 time UNION ALL
SELECT 4 ID, 2 Product_ID, 33 price, 4 time UNION ALL
SELECT 5 ID, 2 Product_ID, 22 price, 5 time UNION ALL
SELECT 6 ID, 2 Product_ID, 11 price, 6 time)
SELECT P.ID, P.Name, P.user_ID, PP.price, time
FROM Products P
LEFT JOIN lateral (SELECT Product_ID, Price, time
FROM Products_Prices Z
WHERE Z.Product_ID = P.ID
ORDER BY Time Desc LIMIT 2) PP
on TRUE
ORDER BY TIME DESC;
Givng us : (unpivoted) and using the row number logic above we could pivot.
+----+------------+---------+-------+------+
| id | name | user_id | price | time |
+----+------------+---------+-------+------+
| 2 | Phone | 1 | 11 | 6 |
| 2 | Phone | 1 | 22 | 5 |
| 1 | Headphones | 1 | 20 | 3 |
| 1 | Headphones | 1 | 15 | 2 |
+----+------------+---------+-------+------+

Related

Grab one record per ID with multiple Lead values

I have a table like this:
ID | Val | Quantity
----------------------
1 | A | 11
1 | B | 15
1 | B | 19
1 | Z | 45
2 | D | 4
2 | E | 25
2 | F | 13
2 | Y | 2
3 | G | 10
3 | H | 15
3 | I | 19
I want to select the top record for each ID ordered by VAL, Quantity AND add the next 2 Val/Quantity within the sort as columns to that row. My expected output look like this:
ID | Val | Quantity | VAL2 | Quantity2 | VAL3 | Quantity3
-------------------------------------------------------------------
1 | A | 11 | B | 15 | B | 19
2 | B | 15 | D | 4 | E | 25
3 | C | 19 | G | 10 | H | 15
I've almost done it using lead, but I don't know how to get rid of the rest of the records in my data-set, as I only want the top.
SELECT ID,
VAL,
Quantity,
lead(VAL,1) over (order by VAL, Quantity ASC) as Val2,
lead(Quantity,1) over (order by VAL, Quantity ASC) as Quantity2,
lead(VAL,2) over (order by VAL, Quantity ASC) as Val3,
lead(Quantity,2) over (order by VAL, Quantity ASC) as Quantity3,
FROM MY_TABLE
order by VAL, Quantity ASC
How can I only select the top record for each ID, while maintaining the lead records? Or is there a more elegant/efficient way to do this?
From your question it seems the expected output should actually be:
ID VAL QUANTITY VAL2 QUANTITY2 VAL3 QUANTITY3
1 A 11 B 15 B 19
2 D 4 E 25 F 13
3 G 10 H 15 I 19
You can get this result with a CTE which generates the LEAD values, as well as a ROW_NUMBER for each set of values. You can then select the first row for each ID from the CTE:
WITH CTE AS (
SELECT ID,
Val, Quantity,
LEAD(Val) OVER (PARTITION BY ID ORDER BY Val, Quantity) AS Val2,
LEAD(Quantity) OVER (PARTITION BY ID ORDER BY Val, Quantity) AS Quantity2,
LEAD(Val, 2) OVER (PARTITION BY ID ORDER BY Val, Quantity) AS Val3,
LEAD(Quantity, 2) OVER (PARTITION BY ID ORDER BY Val, Quantity) AS Quantity3,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Val, Quantity) AS rn
FROM MY_TABLE
)
SELECT ID, Val, Quantity, Val2, Quantity2, Val3, Quantity3
FROM CTE
WHERE rn = 1
Demo on SQLFiddle
You may use ROW_NUMBER to define the order of the rows within the same ID
with t as (
select ID, VAL, QUANTITY,
row_number() over (partition by ID order by VAL, QUANTITY) as rn
from tab)
select *
ID V QUANTITY RN
---------- - ---------- ----------
1 A 11 1
1 B 15 2
1 B 19 3
1 Z 45 4
2 D 4 1
...
In the next step use PIVOT to get the best three values in one row.
with t as (
select ID, VAL, QUANTITY,
row_number() over (partition by ID order by VAL, QUANTITY) as rn
from tab)
select *
from t
PIVOT
(MAX(VAL) as VAL, MAX(QUANTITY) as QUANTITY FOR RN IN (1 as "COL1" ,2 as "COL2",3 as "COL3")
)
ID C COL1_QUANTITY C COL2_QUANTITY C COL3_QUANTITY
---------- - ------------- - ------------- - -------------
1 A 11 B 15 B 19
2 D 4 E 25 F 13
3 G 10 H 15 I 19
If the standar pivot column naming is not fine, simple add a next query and rename the columns.
Note that this query return the same result as apternative approach base on multiple LEAD columns, but you get a bit better flexibility if you plan to vary the number of traced columns.

MSsql select count case only if > 0

I have a query which select and display all values matching criteria
WITH cte AS (
SELECT row_number() OVER(order by cars.make ASC,cars.model ASC) AS rn, cars.id, cars.make
FROM cars
LEFT JOIN transport ON cars.transportfrom=transport.id
WHERE Make in ('DAIHATSU','DODGE','JEEP','KIA','LANCIA')
)
SELECT make, count(CASE WHEN RN BETWEEN 51 AND 100 THEN 1 END) AS CountInResponse, count(1) AS total
FROM cte
GROUP BY make
I got result
make | CountInResponse | total
DAIHATSU | 0 | 5
DODGE | 0 | 2
JEEP | 0 | 14
KIA | 10 | 39
LANCIA | 17 | 17
But how to get only result which > 0?
make | CountInResponse | total
KIA | 10 | 39
LANCIA | 17 | 17
WITH cte AS (
SELECT row_number() OVER(order by cars.make ASC,cars.model ASC) AS rn, cars.id, cars.make
FROM cars
LEFT JOIN transport ON cars.transportfrom=transport.id
WHERE Make in ('DAIHATSU','DODGE','JEEP','KIA','LANCIA')
)
SELECT make, count(CASE WHEN RN BETWEEN 51 AND 100 THEN 1 END) AS
CountInResponse, count(1) AS total
FROM cte
GROUP BY make
having count(CASE WHEN RN BETWEEN 51 AND 100 THEN 1 END) > 0
You should add a having clause at the end.

Oracle SQL: Transform rows to multiple columns

I'm using Oracle 11G and need a way to turn rows into new groups of columns in a select statement. We're transitioning to a 1:3 relationship for some of our data and need a way to get it into a view. Can you help us transform data that looks like this:
+---------+------------+
| User_Id | Station_Id |
+---------+------------+
| 1 | 203 |
| 1 | 204 |
| 2 | 203 |
| 3 | 487 |
| 3 | 3787 |
| 3 | 738 |
+---------+------------+
into this:
+---------+-------------+-------------+---------------+
| User_Id | Station_One | Station_Two | Station_Three |
+---------+-------------+-------------+---------------+
| 1 | 203 | 204 | Null |
| 2 | 203 | Null | Null |
| 3 | 487 | 3787 | 738 |
+---------+-------------+-------------+---------------+
Let me know what ever other specifics you would like and thank you for any help you can give!
You can use row_number and self joins:
with cte as
(
select userid, stationid,
row_number() over(partition by userid order by stationid) rn
from tbl
)
select distinct c1.userid,
c1.stationid station_one,
c2.stationid station_two,
c3.stationid station_three
from cte c1
left join cte c2 on c1.userid=c2.userid and c2.rn=2
left join cte c3 on c1.userid=c3.userid and c3.rn=3
where c1.rn=1
See the demo
You can also do it with row_number and subqueries:
with cte as
(
select userid, stationid,
row_number() over(partition by userid order by stationid) rn
from tbl
)
select distinct userid,
(select stationid from cte c where c.userid=cte.userid and c.rn=1) station_one,
(select stationid from cte c where c.userid=cte.userid and c.rn=2) station_two,
(select stationid from cte c where c.userid=cte.userid and c.rn=3) station_three
from cte
See the demo
The easiest way to accomplish this in my experience is to use conditional aggregation:
WITH mydata AS (
SELECT 1 AS user_id, 203 AS station_id FROM dual
UNION ALL
SELECT 1 AS user_id, 204 AS station_id FROM dual
UNION ALL
SELECT 2 AS user_id, 203 AS station_id FROM dual
UNION ALL
SELECT 3 AS user_id, 487 AS station_id FROM dual
UNION ALL
SELECT 3 AS user_id, 3787 AS station_id FROM dual
UNION ALL
SELECT 3 AS user_id, 738 AS station_id FROM dual
)
SELECT user_id
, MAX(CASE WHEN rn = 1 THEN station_id END) AS station_one
, MAX(CASE WHEN rn = 2 THEN station_id END) AS station_two
, MAX(CASE WHEN rn = 3 THEN station_id END) AS station_three
FROM (
SELECT user_id, station_id, ROW_NUMBER() OVER ( PARTITION BY user_id ORDER BY rownum ) AS rn
FROM mydata
) GROUP BY user_id;
Just replace the mydata CTE in the above query with whatever your table's name is:
SELECT user_id
, MAX(CASE WHEN rn = 1 THEN station_id END) AS station_one
, MAX(CASE WHEN rn = 2 THEN station_id END) AS station_two
, MAX(CASE WHEN rn = 3 THEN station_id END) AS station_three
FROM (
SELECT user_id, station_id, ROW_NUMBER() OVER ( PARTITION BY user_id ORDER BY rownum ) AS rn
FROM mytable
) GROUP BY user_id;

Return only one result for each partition of data

I'd like to be able to do some calculations by partition in BigQuery and then only output 1 row for each partition (rather than a row for every partition). E.g., if I had something like this table:
Category | Location | Count
A | 'home' | 20
A | 'work' | 10
A | 'lab' | 6
B | 'home' | 5
C | 'lab' | 15
C | 'home' | 25
And I'd like to end up with this result
Category | TopLocation | TopCount | SecondLocation | SecondCount
A | 'home' | 20 | 'work' | 10
B | 'home' | 5 | NULL | NULL
C | 'home' | 25 | 'lab' | 15
I thought I could do this with partitions, but this ends up generating a row for every value, rather than the single row that I want, so I then group by category and use FIRST. Is there a better way to do this that avoids generating so many intermediary rows (and, hopefully, avoids the 'large results' issue with window functions).
SELECT
category,
FIRST(TopLocation) TopLocation,
FIRST(TopCount) TopCount,
FIRST(SecondLocation) SecondLocation,
FIRST(SecondCount) SecondCount,
FROM
(SELECT
category,
NTH_VALUE(Location, 1) OVER (PARTITION BY category ORDER BY count) TopLocation,
NTH_VALUE(Count, 1) OVER (PARTITION BY category ORDER BY count) TopCount,
NTH_VALUE(Location, 2) OVER (PARTITION BY category ORDER BY count) SecondLocation,
NTH_VALUE(Count, 1) OVER (PARTITION BY category ORDER BY count) SecondCount
FROM
mytable
)
GROUP BY
category
ORDER BY
category DESC
this should do the work:
select category,
first(if(rank = 1, location, null)) as location_1, first(if(rank = 1, count, null)) as count_1,
first(if(rank = 2, location, null)) as location_2, first(if(rank = 2, count, null)) as count_2,
first(if(rank = 3, location, null)) as location_3, first(if(rank = 3, count, null)) as count_3
from
(select row_number() over (partition by category order by count desc) as rank, *
from
(select 'A' as category, 'home' AS location, 20 as count),
(select 'A' as category, 'work' AS location, 10 as count),
(select 'A' as category, 'lab' AS location, 6 as count),
(select 'B' as category, 'home' AS location, 5 as count),
(select 'C' as category, 'lab' AS location, 15 as count),
(select 'C' as category, 'home' AS location, 25 as count)
)
group by category order by category
result:
Row category location_1 count_1 location_2 count_2 location_3 count_3
1 A home 20 work 10 lab 6
3 B home 5 null null null null
2 C home 25 lab 15 null null
but probably won't solve the problem with 'large query result' on window function
Update: A better solution with #standardSQL
https://stackoverflow.com/a/45112050/132438
How about:
SELECT word, word_count, corpus, rank FROM (
SELECT word, word_count, corpus,
RANK() OVER (PARTITION BY corpus ORDER BY word_count DESC) rank
FROM [publicdata:samples.shakespeare]
WHERE word_count > 6
)
WHERE rank<3

SQL query to Calculate allocation / netting

Here is my source data,
Group | Item | Capacity
-----------------------
1 | A | 100
1 | B | 80
1 | C | 20
2 | A | 90
2 | B | 40
2 | C | 20
The above data shows the capacity to consume "something" for each item.
Now suppose I have maximum 100 allocated to each group. I want to distribute this "100" to each group upto the item's maximum capacity. So my desired output is like this:
Group | Item | Capacity | consumption
-------------------------------------
1 | A | 100 | 100
1 | B | 80 | 0
1 | C | 20 | 0
2 | A | 90 | 90
2 | B | 40 | 10
2 | C | 20 | 0
My question is how do I do it in a single SQL query (preferably avoiding any subquery construct). Please note, number of items in each group is not fixed.
I was trying LAG() with running SUM(), but could not quite produce the desired output...
select
group, item, capacity,
sum (capacity) over (partition by group order by item range between UNBOUNDED PRECEDING AND CURRENT ROW) run_tot,
from table_name
Without a subquery using just the analytic SUM function:
SQL> create table mytable (group_id,item,capacity)
2 as
3 select 1, 'A' , 100 from dual union all
4 select 1, 'B' , 80 from dual union all
5 select 1, 'C' , 20 from dual union all
6 select 2, 'A' , 90 from dual union all
7 select 2, 'B' , 40 from dual union all
8 select 2, 'C' , 20 from dual
9 /
Table created.
SQL> select group_id
2 , item
3 , capacity
4 , case
5 when sum(capacity) over (partition by group_id order by item) > 100 then 100
6 else sum(capacity) over (partition by group_id order by item)
7 end -
8 case
9 when nvl(sum(capacity) over (partition by group_id order by item rows between unbounded preceding and 1 preceding),0) > 100 then 100
10 else nvl(sum(capacity) over (partition by group_id order by item rows between unbounded preceding and 1 preceding),0)
11 end consumption
12 from mytable
13 /
GROUP_ID I CAPACITY CONSUMPTION
---------- - ---------- -----------
1 A 100 100
1 B 80 0
1 C 20 0
2 A 90 90
2 B 40 10
2 C 20 0
6 rows selected.
Here's a solution using recursive subquery factoring. This clearly ignores your preference to avoid subqueries, but doing this in one pass might be impossible.
Probably the only way to do this in one pass is to use MODEL, which I'm not allowed to code after midnight. Maybe someone waking up in Europe can figure it out.
with ranked_items as
(
--Rank the items. row_number() should also randomly break ties.
select group_id, item, capacity,
row_number() over (partition by group_id order by item) consumer_rank
from consumption
),
consumer(group_id, item, consumer_rank, capacity, consumption, left_over) as
(
--Get the first item and distribute as much of the 100 as possible.
select
group_id,
item,
consumer_rank,
capacity,
least(100, capacity) consumption,
100 - least(100, capacity) left_over
from ranked_items
where consumer_rank = 1
union all
--Find the next row by the GROUP_ID and the artificial CONSUMER_ORDER_ID.
--Distribute as much left-over from previous consumption as possible.
select
ranked_items.group_id,
ranked_items.item,
ranked_items.consumer_rank,
ranked_items.capacity,
least(left_over, ranked_items.capacity) consumption,
left_over - least(left_over, ranked_items.capacity) left_over
from ranked_items
join consumer
on ranked_items.group_id = consumer.group_id
and ranked_items.consumer_rank = consumer.consumer_rank + 1
)
select group_id, item, capacity, consumption
from consumer
order by group_id, item;
Sample data:
create table consumption(group_id number, item varchar2(1), capacity number);
insert into consumption
select 1, 'A' , 100 from dual union all
select 1, 'B' , 80 from dual union all
select 1, 'C' , 20 from dual union all
select 2, 'A' , 90 from dual union all
select 2, 'B' , 40 from dual union all
select 2, 'C' , 20 from dual;
commit;
Does this work as expected?
WITH t AS
(SELECT GROUP_ID, item, capacity,
SUM(capacity) OVER (PARTITION BY GROUP_ID ORDER BY item RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) sum_run,
GREATEST(100-SUM(capacity) OVER (PARTITION BY GROUP_ID ORDER BY item RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), 0) AS remain
FROM table_name)
SELECT t.*,
LEAST(sum_run,lag(remain, 1, 100) OVER (PARTITION BY GROUP_ID ORDER BY item)) AS run_tot
FROM t
select group_id,item,capacity,(case when rn=1 then capacity else 0 end) consumption
from
(select group_id,item,capacity,
row_number() over (partition by group_id order by capacity desc) rn from mytable)