SELECT users with at least 2 identical products - sql

Users table:
user_id name
1 john
2 mark
3 scott
4 piter
user_products table:
user_id product_id
1 2
1 4
1 5
2 4
2 5
2 7
3 1
3 5
3 4
3 2
4 1
As we see, users 1,2,3 all have products 4 and 5. So how to select users with at least 2 identical products ?

One option is to use a self join:
SELECT
u.user_id,
u.name
FROM user_products up1
INNER JOIN user_products up2
ON up1.product_id = up2.product_id AND
up1.user_id <> up2.user_id
INNER JOIN Users u
ON up1.user_id = u.user_id
GROUP BY
u.user_id,
u.name
HAVING
COUNT(DISTINCT up1.product_id) > 1;
Demo
The idea here is to try to match each of the records for a given user to records from a different user, but having the same product. The aggregation step then checks if a given user still has at least two products after the inner join, which implies that in fact he does have at least two products in common with some other user.
The matching users here are: john, mark, and scott

A self join is the right approach, but I think the right logic is:
SELECT up1.user_id, up2.user_id
FROM user_products up1 JOIN
user_products up2
ON up1.product_id = up2.product_id AND
up1.user_id < up2.user_id
GROUP BY up1.user_id, up2.user_id
HAVING COUNT(DISTINCT up1.product_id) >= 2;
If you want the list of products, you can include array_agg(distinct up1.product_id).

You can try this as well:
select distinct b.user_id from #UserProduct b
join (
select count(1) cnt,product_id from #UserProduct
group by product_id
having count(1) = 3) c
on b.product_id=c.product_id

This time, I hope can help you
Select * into #User From (
Select '1' [user_id], 'john' [user_name] Union All
Select '2' [user_id], 'mark' [user_name] Union All
Select '3' [user_id], 'scott' [user_name] Union All
Select '4' [user_id], 'piter' [user_name]
) A
Select * into #UserProduct From (
Select '1' [user_id], '2' [product_id] union All
Select '1' [user_id], '4' [product_id] union All
Select '1' [user_id], '5' [product_id] union All
Select '2' [user_id], '4' [product_id] union All
Select '2' [user_id], '5' [product_id] union All
Select '2' [user_id], '7' [product_id] union All
Select '3' [user_id], '1' [product_id] union All
Select '3' [user_id], '5' [product_id] union All
Select '3' [user_id], '4' [product_id] union All
Select '3' [user_id], '2' [product_id] union All
Select '4' [user_id], '1' [product_id]
) A
Select U1.[user_id] From (
Select
A.[Product_id] Product_id1,
B.[Product_id] Product_id2
From (
Select [Product_id] From #UserProduct
Group By [Product_id]
) A
Left Join (
Select [Product_id] From #UserProduct
Group By [Product_id]
) B On 1 = 1
Where A.[Product_id] < B.[Product_id]
) Product2
Left Join (
Select [user_id] From #UserProduct
Group By [user_id]
) [User] On 1 = 1
Left Join #UserProduct U1 On U1.[user_id] = [User].[user_id] and U1.Product_id = Product_id1
Left Join #UserProduct U2 On U2.[user_id] = [User].[user_id] and U2.Product_id = Product_id2
Where (U1.[user_id] Is Not Null And U2.[user_id] Is Not Null)
Group By U1.[user_id]
Result :

Hope can help you
Select * into #User From (
Select '1' [user_id], 'john' [user_name] Union All
Select '2' [user_id], 'mark' [user_name] Union All
Select '3' [user_id], 'scott' [user_name] Union All
Select '4' [user_id], 'piter' [user_name]
) A
Select * into #UserProduct From (
Select '1' [user_id], '2' [product_id] union All
Select '1' [user_id], '4' [product_id] union All
Select '1' [user_id], '5' [product_id] union All
Select '2' [user_id], '4' [product_id] union All
Select '2' [user_id], '5' [product_id] union All
Select '2' [user_id], '7' [product_id] union All
Select '3' [user_id], '1' [product_id] union All
Select '3' [user_id], '5' [product_id] union All
Select '3' [user_id], '4' [product_id] union All
Select '3' [user_id], '2' [product_id] union All
Select '4' [user_id], '1' [product_id]
) A
Select U.[user_id], U.[user_name], P.[qty_product]
From #User U
Left Join (
Select
[user_id], Count(*) [qty_product]
From #UserProduct
Group By [user_id]
Having Count(*) > 1
) P On P.[user_id] = U.[user_id]
Result :

Related

How can I group ids by frequency and then exposure type

I am trying to create a frequency table where I can see the number of times an app has been opened after it has been exposed to something (first_impression).
I also want this grouped by exposure type. When I add the CASE statement around exposure, my number of launches turns to one, why is that happening?
I think I'm making a mistake with the grouping?
; DROP TABLE IF EXISTS launches_post_exposure
; CREATE TEMP TABLE launches_post_exposure (
ID VARCHAR(36), app_launches INTEGER, launches_category VARCHAR(50) , exposure VARCHAR(50) )
;INSERT INTO launches_post_exposure (id, app_launches, launches_category, exposure)
SELECT
DISTINCT la.id AS id
, la.app_launches
, CASE
WHEN la.app_launches = 1 THEN '1'
WHEN la.app_launches = 2 THEN '2'
WHEN la.app_launches = 3 THEN '3'
WHEN la.app_launches = 4 THEN '4'
WHEN la.app_launches = 5 THEN '5'
WHEN la.app_launches = 6 THEN '6'
WHEN la.app_launches = 7 THEN '7'
WHEN la.app_launches = 8 THEN '8'
WHEN la.app_launches = 9 THEN '9'
WHEN la.app_launches >= 10 THEN '>=10'
END AS launches_category
, exposure
FROM
(
SELECT
distinct op.id as id
, COUNT(op.open_date) as app_launches
, **CASE
WHEN op.open_date > ex.first_impression THEN 'post_exposure'
WHEN op.open_date < ex.first_impression THEN 'pre_exposure'
WHEN ex.first_impression IS NULL THEN 'Unexposed'
END as exposure**
FROM
opened op
LEFT JOIN exposed ex
ON op.id = ex.id
GROUP BY op.id, op.open_date, ex.first_impression
) la
GROUP BY la.id, la.app_launches, la.exposure;
SELECT
COUNT(DISTINCT id),
launches_category ,
exposure
FROM
launches_post_exposure
GROUP BY launches_category
ORDER BY launches_category

How to get max status as a column?

I want to create a column that shows whether it is the max order_status as TRUE or FALSE based on created_at.
Is there a way to achieve this without a subquery in Snowflake?
Here is my example data:
WITH t1 AS (
SELECT 'A' AS id, 'created' AS status, '2021-05-18 18:30:00'::timestamp AS created_at UNION ALL
SELECT 'A' AS id, 'created' AS status, '2021-05-19 11:30:00'::timestamp AS created_at UNION ALL
SELECT 'A' AS id, 'pending' AS status, '2021-05-19 12:00:00'::timestamp AS created_at UNION ALL
SELECT 'A' AS id, 'successful' AS status, '2021-05-20 18:30:00'::timestamp AS created_at
)
Using windowed MAX:
WITH t1(id, status, created_at) AS (
SELECT 'A', 'created', '2021-05-18 18:30:00'::timestamp UNION ALL
SELECT 'A', 'created', '2021-05-19 11:30:00'::timestamp UNION ALL
SELECT 'A', 'pending', '2021-05-19 12:00:00'::timestamp UNION ALL
SELECT 'A', 'successful', '2021-05-20 18:30:00'::timestamp AS created_at
)
SELECT *, created_at = MAX(created_at) OVER(PARTITION BY ID) AS is_final_order_status
FROM t1;
Output:
A cased row_number could work
SELECT id, status, created_at
, CASE
WHEN 1 = ROW_NUMBER() OVER (PARTITION BY id ORDER BY created_at DESC)
THEN 'TRUE'
ELSE 'FALSE'
END is_final_order_status
FROM t1

How to flag id's by a condition?

I want to create a column that flags an id it has a straight order process. i.e. id’s which don’t have order_status pending or info_required.
e.g. id a has pending, so is_straight will be false. b has no pending or info_required, so it should be true.
Here is the example data:
WITH t1 AS (
SELECT 'a' AS id, 'created' AS status, '2021-11-02 15:04:07'::timestamp AS created_at UNION ALL
SELECT 'a' AS id, 'created' AS status, '2021-11-03 13:23:34'::timestamp AS created_at UNION ALL
SELECT 'a' AS id, 'pending' AS status, '2021-11-07 04:04:46'::timestamp AS created_at UNION ALL
SELECT 'a' AS id, 'successful' AS status, '2021-11-07 13:25:05'::timestamp AS created_at UNION ALL
SELECT 'b' AS id, 'created' AS status, '2021-11-11 16:19:07'::timestamp AS created_at UNION ALL
SELECT 'b' AS id, 'successful' AS status, '2021-11-13 17:57:55'::timestamp AS created_at UNION ALL
SELECT 'c' AS id, 'created' AS status, '2021-11-15 01:09:23'::timestamp AS created_at UNION ALL
SELECT 'c' AS id, 'info_required' AS status, '2021-11-17 11:06:00'::timestamp AS created_at UNION ALL
SELECT 'c' AS id, 'successful' AS status, '2021-11-21 23:35:46'::timestamp AS created_at
)
Using windowed COUNT_IF:
SELECT *,
COUNT_IF(order_status IN ('pending', 'info_required')) OVER(PARTITION BY id) = 0
AS is_straight
FROM t1;
Output:

How to build a closed funnel of user steps in Big Query?

Please help me with the BigQuery query. I need to build a closed funnel of user steps events in a mobile app for a week.
The table looks like this:
It is necessary to collect all unique users who have passed from step 1 to step 2 and so on to step 6 during this period. Between these steps, they could do something else, be distracted by other events. But what is important is the passage of each unique user through these steps in a given period of time.
Please tell me how to create such a funnel?
There can be multiple ways of achieving this. Here is an approach using identical sample data, which is not the most optimal but is very self-explanatory and definite:
with data as (
select 'a' as user_id, cast('2020-01-01 04:45:00' as timestamp) as event_timestamp, '1' as step_name
union all
select 'b' as user_id, cast('2020-01-01 04:50:00' as timestamp) as event_timestamp, '1' as step_name
union all
select 'a' as user_id, cast('2020-01-01 05:00:00' as timestamp) as event_timestamp, '2' as step_name
union all
select 'a' as user_id, cast('2020-01-01 05:15:00' as timestamp) as event_timestamp, '3' as step_name
union all
select 'b' as user_id, cast('2020-01-01 04:55:00' as timestamp) as event_timestamp, '2' as step_name
union all
select 'c' as user_id, cast('2020-01-01 04:58:00' as timestamp) as event_timestamp, '1' as step_name
union all
select 'a' as user_id, cast('2020-01-01 05:16:00' as timestamp) as event_timestamp, '4' as step_name
union all
select 'b' as user_id, cast('2020-01-01 05:16:00' as timestamp) as event_timestamp, '3' as step_name
),
data2 as (
select a.user_id, a.step_name step_1, b.step_name step_2, c.step_name step_3, d.step_name step_4 from ( select user_id, event_timestamp, step_name from data where step_name = '1') a
left join data b on (a.user_id = b.user_id and a.event_timestamp < b.event_timestamp and b.step_name = '2')
left join data c on (b.user_id = c.user_id and b.event_timestamp < c.event_timestamp and c.step_name = '3')
left join data d on (c.user_id = d.user_id and c.event_timestamp < d.event_timestamp and d.step_name = '4')
)
select * from (
select 'step_1' as event_name, count(distinct user_id) as n_users from data2 where step_1 is not null
group by 1
union all
select 'step_2' as event_name, count(distinct user_id) as n_users from data2 where (step_1 is not null and step_2 is not null)
group by 1
union all
select 'step_3' as event_name, count(distinct user_id) as n_users from data2 where (step_1 is not null and step_2 is not null and step_3 is not null)
group by 1
union all
select 'step_4' as event_name, count(distinct user_id) as n_users from data2 where (step_1 is not null and step_2 is not null and step_3 is not null and step_4 is not null)
group by 1
)
order by 1
You can further optimize this based on your specific filters, conditions, etc.

SQL: Trying to understand IF/ELSE

SELECT CASE r.SourceId
WHEN '1' THEN 'ITUNES'
WHEN '2' THEN 'SFR'
WHEN '3' THEN 'ORANGE'
ELSE 'Others'
END as source
, CAST(SUM (r.SalesVolume) AS DECIMAL(14, 4) ) AS Volume
, CAST(SUM (r.SalesVolume * r.CustomerPrice) AS DECIMAL(14, 4) ) AS Value
from Rawdata r
INNER JOIN Product p
ON p.ProductId = r.ProductId
INNER JOIN Calendar c
ON r.DayId = c.DayId
WHERE c.WeekId BETWEEN (20145227) AND (20155230)
AND p.ContentFlavor IN ('SD', 'HD')
AND p.VODEST IN ('VOD','EST')
AND p.Distributor IN ('M6SND')
GROUP BY CASE r.SourceId
WHEN '1' THEN 'ITUNES'
WHEN '2' THEN 'SFR'
WHEN '3' THEN 'ORANGE'
ELSE 'Others'
END
The result of the above query is:
source Volume Value
ITUNES 48316.0000 506067.2600
This result is perfectly OK since my source table RawData doesnt contain any values for SourceId 2 or 3.
But what I basically want is the result to look like is:
source Volume Value
ITUNES 48316.0000 506067.2600
SFR 0 0
ORANGE 0 0
Others 0 0
If there is no value corresponding to any column parameter then I need it to be 0
I assume this could be done using IF/ELSE but not sure how?
with the help of a CTE this is a way to do it. (replace the first query with something more dynamic if you want)
with myChoices (choices)
as (
select
choices
from (
values
('ITUNES'),
('SFR'),
('ORANGE'),
('Others')
) [ ] (choices)
),
myQuery ([source],[Volume],[Value])
as (
SELECT CASE r.SourceId
WHEN '1' THEN 'ITUNES'
WHEN '2' THEN 'SFR'
WHEN '3' THEN 'ORANGE'
ELSE 'Others'
END as source
, CAST(SUM (r.SalesVolume) AS DECIMAL(14, 4) ) AS Volume
, CAST(SUM (r.SalesVolume * r.CustomerPrice) AS DECIMAL(14, 4) ) AS Value
from Rawdata r
INNER JOIN Product p
ON p.ProductId = r.ProductId
INNER JOIN Calendar c
ON r.DayId = c.DayId
WHERE c.WeekId BETWEEN (20145227) AND (20155230)
AND p.ContentFlavor IN ('SD', 'HD')
AND p.VODEST IN ('VOD','EST')
AND p.Distributor IN ('M6SND')
GROUP BY CASE r.SourceId
WHEN '1' THEN 'ITUNES'
WHEN '2' THEN 'SFR'
WHEN '3' THEN 'ORANGE'
ELSE 'Others'
END
)
select
c.choices,
ISNULL(q.Volume,0)Volume,
ISNULL(q.Value,0)Value
from myChoices c
left join myQuery q on
c.choices = q.[source]
Create an inline view called "Product_Inline_View", which is like
(select 1 as SourceId, 'ITUNES' as source_name
union all
select 2 as SourceId, 'SFR' as source_name
union all
select 3 as SourceId, 'ORANGE' as source_name
)
Right Join the Product_Inline_view with the Query you have, but without the CASE.
And then do the group by.