SQL Dictate number of joined rows based on column value - sql

I have a strange requirement.
Say I have two tables:
ORDER_TB
SEQ PRODUCT_ID ORDER_QTY
1 1 1
2 1 1
3 1 1
STOCK_TB
LOCATION STOCK_QTY
A1 2
B1 1
Desired Join Result:
PRODUCT_ID ORDER_QTY ASSIGNED_LOCATION
1 1 A1
1 1 A1
1 1 B1
In other words, I'd like to assign each products in order table a location from stock_tb based on the quantity of stocks.
This doesn't look like a set operation to me. Is this possible with joins or are there any other clean alternatives in approaching this problem?

What you need to do is get cumulative sums for each of the columns -- this gives you a first and last order for each value. Then you can do a join on a range.
with o as (
select o.*, cumesum
from ORDER_TB o OUTER APPLY
(select sum(o2.order_qty) as cumesum
from ORDER_TB o2
where o2.seq <= o.seq
) o2
),
s as (
select s.*, s2.cumeqty
from STOCK_TB s outer apply
(select sum(s2.order_qty) as cumeqty
from STOCK_TB
where s2.location <= s.location
) s2
)
select o.*, s.location
from o join
s
on o.cumesum between s.cumeqty - s.order_qty + 1 and s.cumeqty;
Note: this works for the data you provided. However, if the quantities in the two tables don't align, then the logic would be more complicated.

Another approach is to use analytic functions:
with cum as
(select s.*, sum(stock_qty) over(order by location) as cum_qty
from stock_tb s)
select x.product_id, x.order_qty, y.location as assigned_location
from (select o.*,
row_number() over(partition by product_id order by seq) as curr_qty
from order_tb o) x
cross join cum y
where y.cum_qty =
(select min(z.cum_qty) from cum z where z.cum_qty >= x.curr_qty)
or (not exists (select 1 from cum z where z.cum_qty > x.curr_qty) and
y.cum_qty = x.curr_qty)
order by seq
Fiddle: http://sqlfiddle.com/#!6/e41b0/1/0

Related

Find customers with at least 5 transactions in At most 3 consecutive days

I have a table in SQL Server that contains customers' transactions From 2022-02-10 to 2022-03-10.
I want to find customers that have at least 5 transactions on At most three consecutive days
For example, output of below table should be CustomerId = 2 and customerid=3
Id
CustomerId
Transactiondate
1
1
2022-03-01
2
1
2022_03_01
3
1
2022_03_05
4
1
2022_03_07
5
1
2022_03_07
6
2
2022_03_05
7
2
2022_03_05
8
2
2022_03_06
9
2
2022_03_06
10
2
2022_03_07
1
3
2022-03-01
2
3
2022_03_01
3
3
2022_03_01
4
3
2022_03_03
5
3
2022_03_03
I tried this query but it doesn't have good performance for a large table:
select distinct p1.customerid
from trntbl p1
join trntbl p2 on p2.id <> p1.id
and p2.customerid = p1.customerid
and p2.TransactionDate >= p1.TransactionDate
and p2.TransactionDate < date_add(day, 3, p1.prchasedate)
group by p1.customerid, p1.id
having count(*) >= 4
If customers must have done transactions in three consecutive days (meaning that 5 transactions in a day then nothing in the next two days wouldn't count), then this can be done with two self joins:
with cte as
(select CustomerId, Transactiondate, count(*) ct
from table_name
group by CustomerId, Transactiondate)
select distinct t1.CustomerId
from cte t1 inner join cte t2
on t1.Transactiondate = dateadd(day, 1, t2.Transactiondate)
and t1.CustomerId = t2.CustomerId
inner join cte t3
on t2.Transactiondate = dateadd(day, 1, t3.Transactiondate)
and t3.CustomerId = t2.CustomerId
;
Fiddle
Although this is a gaps-and-islands problem, there are shortcuts you can take.
You can group it up by date, then get the row 2 previous, and filter by only rows where the 2 previous row is exactly two days apart.
SELECT DISTINCT
CustomerId
FROM (
SELECT
t.CustomerId,
v.Date,
Prev2 = LAG(v.Date, 2) OVER (PARTITION BY t.CustomerId ORDER BY v.Date)
FROM YourTable t
CROSS APPLY (VALUES( CAST(Transactiondate AS date) )) v(Date)
GROUP BY
t.CustomerId,
v.Date
) t
WHERE DATEDIFF(day, t.Prev2, t.Date) = 2
db<>fiddle
If the base table only has a maximum of one row per date then you can forgo the GROUP BY.
This is actually a gaps and islands problem, you can solve by using analytic window functions to subtract sequential row_number from consecutive days and then grouping, after first "plugging" any gaps with the help of a numbers table.
with numbers as (select top(20) Row_Number() over(order by (select null))-1 n from master.dbo.spt_values),
dRanges as (
select customerId,
Min(Transactiondate) CustStartDate,
Max(Transactiondate) CustEndDate
from t
group by CustomerId
), dates as (
select *
from dranges r
outer apply (
select DateAdd(day,n,r.CustStartDate) SeqDate
from numbers n
where DateAdd(day,n,r.CustStartDate) < = r.CustEndDate
)d
), q as (
select customerId, transactiondate, Count(*) qty
from t
group by CustomerId, Transactiondate
), g as (
select d.CustomerId, d.SeqDate, IsNull(q.qty,0)Qty,
DateAdd(day, - row_number() over (partition by d.customerid order by d.SeqDate), d.SeqDate) as dGrp
from dates d
left join q on q.Transactiondate = d.SeqDate and q.CustomerId = d.CustomerId
)
select customerId
from g
group by CustomerId, dGrp
having Count(*) <= 3 and Sum(qty) >= 5
DB<>Fiddle
You could make use of datediff function and verify if the sum of the date differences are between 3 and 5 (provided the max of the differences is just 1) since the dates might be unique (for example customerid 2 can have transaction dates as 5,6,7,8,9 of March 2022) and this should be taken into account too.
declare #tbl table(id int identity,customerid int,transactiondate date)
insert into #tbl(customerid,transactiondate)
values(1,'2022-03-01')
,(1,'2022-03-01')
,(1,'2022-03-05')
,(1,'2022-03-07')
,(1,'2022-03-07')
,(2,'2022-03-05')
,(2,'2022-03-05')
,(2,'2022-03-06')
,(2,'2022-03-06')
,(2,'2022-03-07')
select customerid from (
select *
,SUM(datediff)over(partition by customerid order by transactiondate)[sum]
,max(datediff)over(partition by customerid order by transactiondate)[max]
from(
select customerid , transactiondate,
DATEDIFF(DAY
,
case when LEAD(transactiondate,1)over(partition by customerid order by transactiondate)
is null then
LAG(transactiondate,1,transactiondate)
over(partition by customerid order by transactiondate)
else
transactiondate end
, case when LEAD(transactiondate,1)over(partition by customerid order by transactiondate)
is null then
transactiondate
else
LEAD(transactiondate,1,transactiondate)
over(partition by customerid order by transactiondate)end) as [datediff]
,ROW_NUMBER()over(partition by customerid order by transactiondate)rownum
from #tbl
)t
)t1
where t1.rownum = 5
and t1.max = 1
and t1.sum between 3 and 5

Need Full Outer Join without having Cross Join

Need to join two table without having cross join between them.
The join condition need to be made on Tabl.month = Tab2.month
Input
Table1 Table2
Month ID Month ID
1 a 1 a
1 b 1 b
1 c 2 g
2 d 3 i
2 e 3 j
3 f 3 k
Output:
Month_Tab1 ID_Tab1 Month_Tab2 ID_Tab2
1 a 1 a
1 b 1 b
1 c Null Null
2 d 2 g
2 e Null Null
3 f 3 i
Null Null 3 j
Null Null 3 k
The above o/p is required, without cross join, have tried full outer but cross join is happening as the ID is duplicate in both Tables. Left/Right join also cannt be applicable as either of the table might have larger set of ID's.
You want a full join, but with row_number() to identify the matches:
select t1.month month_tab1, t1.id id_tab1, t2.month month_tab2, t2.id id_tab2
from (
select t.*, row_number() over(partition by month order by id) rn from table1 t
) t1
full join (
select t.*, row_number() over(partition by month order by id) rn from table2 t) t2
on t2.month = t1.month and t2.rn = t1.rn
You can use a full outer join:
select
a.month,
a.id,
b.month,
b.id
from (
select month, id,
row_number() over(partition by month order by id) as n
from table1
) a
full outer join (
select month, id,
row_number() over(partition by month order by id) as n
from table2
) b on b.month = a.month and b.n = a.n
order by coalesce(a.month, b.month), coalesce(a.n, b.n)

SQL - Finding Customer's largest Location by Order $

I have a table with customer IDs, location IDs, and their order values. I need to select the location ID for each customer with the largest spend
Customer | Location | Order $
1 | 1A | 100
1 | 1A | 20
1 | 1B | 100
2 | 2A | 50
2 | 2B | 20
2 | 2B | 50
So I would get
Customer | Location | Order $
1 | 1A | 120
2 | 2B | 70
I tried something like this:
SELECT
a.CUST
,a.LOC
,c.BOOKINGS
FROM (SELECT DISTINCT TOP 1 b.CUST, b.LOC, sum(b.ORDER_VAL) as BOOKINGS
FROM ORDER_TABLE b
GROUP BY b.CUST, b.LOC
ORDER BY BOOKINGS DESC) as c
INNER JOIN ORDER_TABLE a
ON a.CUST = c.CUST
But that just returns the top order.
Just use variables to emulate ROW_NUM()
DEMO
SELECT *
FROM ( SELECT `Customer`, `Location`, SUM(`Order`) as `Order`,
#rn := IF(#customer = `Customer`,
#rn + 1,
IF(#customer := `Customer`, 1, 1)
) as rn
FROM Table1
CROSS JOIN (SELECT #rn := 0, #customer := '') as par
GROUP BY `Customer`, `Location`
ORDER BY `Customer`, SUM(`Order`) DESC
) t
WHERE t.rn = 1
Firs you have to sum the values for each location:
select Customer, Location, Sum(Order) as tot_order
from order_table
group by Customer, Location
then you can get the maximum order with MAX, and the top location with a combination of group_concat that will return all locations, ordered by total desc, and substring_index in order to get only the top one:
select
Customer,
substring_index(
group_concat(Location order by tot_order desc),
',', 1
) as location,
Max(tot_order) as max_order
from (
select Customer, Location, Sum(Order) as tot_order
from order_table
group by Customer, Location
) s
group by Customer
(if there's a tie, two locations with the same top order, this query will return just one)
This seems like an order by using aggregate function problem. Here is my stab at it;
SELECT
c.customer,
c.location,
SUM(`order`) as `order_total`,
(
SELECT
SUM(`order`) as `order_total`
FROM customer cm
WHERE cm.customer = c.customer
GROUP BY location
ORDER BY `order_total` DESC LIMIT 1
) as max_order_amount
FROM customer c
GROUP BY location
HAVING max_order_amount = order_total
Here is the SQL fiddle. http://sqlfiddle.com/#!9/2ac0d1/1
This is how I'd handle it (maybe not the best method?) - I wrote it using a CTE first, only to see that MySQL doesn't support CTEs, then switched to writing the same subquery twice:
SELECT B.Customer, C.Location, B.MaxOrderTotal
FROM
(
SELECT A.Customer, MAX(A.OrderTotal) AS MaxOrderTotal
FROM
(
SELECT Customer, Location, SUM(`Order`) AS OrderTotal
FROM Table1
GROUP BY Customer, Location
) AS A
GROUP BY A.Customer
) AS B INNER JOIN
(
SELECT Customer, Location, SUM(`Order`) AS OrderTotal
FROM Table1
GROUP BY Customer, Location
) AS C ON B.Customer = C.Customer AND B.MaxOrderTotal = C.OrderTotal;
Edit: used the table structure provided
This solution will provide multiple rows in the event of a tie.
SQL fiddle for this solution
How about:
select a.*
from (
select customer, location, SUM(val) as s
from orders
group by customer, location
) as a
left join
(
select customer, MAX(b.tot) as t
from (
select customer, location, SUM(val) as tot
from orders
group by customer, location
) as b
group by customer
) as c
on a.customer = c.customer where a.s = c.t;
with
Q_1 as
(
select customer,location, sum(order_$) as order_sum
from cust_order
group by customer,location
order by customer, order_sum desc
),
Q_2 as
(
select customer,max(order_sum) as order_max
from Q_1
group by customer
),
Q_3 as
(
select Q_1.customer,Q_1.location,Q_1.order_sum
from Q_1 inner join Q_2 on Q_1.customer = Q_2.customer and Q_1.order_sum = Q_2.order_max
)
select * from Q_3
Q_1 - selects normal aggregate, Q_2 - selects max(aggregate) out of Q_1 and Q_3 selects customer,location, sum(order) from Q_1 which matches with Q_2

left join without duplicate values using MIN()

I have a table_1:
id custno
1 1
2 2
3 3
and a table_2:
id custno qty descr
1 1 10 a
2 1 7 b
3 2 4 c
4 3 7 d
5 1 5 e
6 1 5 f
When I run this query to show the minimum order quantities from every customer:
SELECT DISTINCT table_1.custno,table_2.qty,table_2.descr
FROM table_1
LEFT OUTER JOIN table_2
ON table_1.custno = table_2.custno AND qty = (SELECT MIN(qty) FROM table_2
WHERE table_2.custno = table_1.custno )
Then I get this result:
custno qty descr
1 5 e
1 5 f
2 4 c
3 7 d
Customer 1 appears twice each time with the same minimum qty (& a different description) but I only want to see customer 1 appear once. I don't care if that is the record with 'e' as a description or 'f' as a description.
First of all... I'm not sure why you need to include table_1 in the queries to begin with:
select custno, min(qty) as min_qty
from table_2
group by custno;
But just in case there is other information that you need that wasn't included in the question:
select table_1.custno, ifnull(min(qty),0) as min_qty
from table_1
left outer join table_2
on table_1.custno = table_2.custno
group by table_1.custno;
"Generic" SQL way:
SELECT table_1.custno,table_2.qty,table_2.descr
FROM table_1, table_2
WHERE table_2.id = (SELECT TOP 1 id
FROM table_2
WHERE custno = table_1.custno
ORDER BY qty )
SQL 2008 way (probably faster):
SELECT custno, qty, descr
FROM
(SELECT
custno,
qty,
descr,
ROW_NUMBER() OVER (PARTITION BY custno ORDER BY qty) RowNum
FROM table_2
) A
WHERE RowNum = 1
If you use SQL-Server you could use ROW_NUMBER and a CTE:
WITH CTE AS
(
SELECT table_1.custno,table_2.qty,table_2.descr,
RN = ROW_NUMBER() OVER ( PARTITION BY table_1.custno
Order By table_2.qty ASC)
FROM table_1
LEFT OUTER JOIN table_2
ON table_1.custno = table_2.custno
)
SELECT custno, qty,descr
FROM CTE
WHERE RN = 1
Demolink

only using select in sql instead of group by

I have this table:
supplier | product | qty
--------------------------
s1 | p1 | 300
s1 | p2 | 90
s2 | p3 | 89
I want to find suppliers with more than 2 products.
But only with select and where, no group by. Any suggestion?
Why would you want not to use group by is beyond me, but this might work:
SELECT Supplier FROM table outer WHERE
(
select count(Products) from table inner
where inner.Supplier = outer.Supplier
) > 2
Please bear in mind, that group by is made for stuff like that and should be used.
;WITH
sequenced_data AS
(
SELECT
supplier,
ROW_NUMBER() OVER (PARTITION BY supplier ORDER BY product) AS supplier_product_ordinal
FROM
YourTable
)
SELECT
supplier
FROM
sequenced_data
WHERE
supplier_product_ordinal = 3
But I'd expect it to be slower than using GROUP BY.
SELECT DISTINCT
supplier
FROM
yourTable
WHERE
EXISTS (SELECT * FROM yourTable AS lookup WHERE supplier = yourTable.supplier AND product < yourTable.product)
AND EXISTS (SELECT * FROM yourTable AS lookup WHERE supplier = yourTable.supplier AND product > yourTable.product);
In the usual parts and suppliers database, this relvar is named SP:
SELECT DISTINCT T1.SNO
FROM SP AS T1
JOIN SP AS T2
ON T1.SNO = T2.SNO
AND T2.PNO <> T1.PNO
JOIN SP AS T3
ON T1.SNO = T3.SNO
AND T3.PNO <> T1.PNO
AND T3.PNO <> T2.PNO;
Noting that you can use HAVING without GROUP BY:
SELECT DISTINCT T1.SNO
FROM SP AS T1
WHERE EXISTS (
SELECT 1
FROM SP AS T2
WHERE T2.SNO = T1.SNO
HAVING COUNT(*) > 2
);
;WITH T AS
(
SELECT *,
COUNT(*) OVER (PARTITION BY S) AS Cnt
FROM YourTable
)
SELECT DISTINCT S
FROM T
WHERE Cnt > 2
with subquery:
select distinct supplier
from table a
where (select count(*)
from table b
where b.supplier = a.supplier and b.product <> a.product
) > 1