Outer join removing duplicated columns - sql

I'm trying to joint multiple data obtained trough a query like this:
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE
from
MY_TABLE
where
FIELD = 'DESIRED_VALUE'
group by 1;
That results in data such as:
YEAR DESIRED_VALUE1
2022-09 52
2022-10 117
2022-11 95
2023-01 73
YEAR_MONTH DESIRED_VALUE2
2022-11 35
2022-12 30
2023-01 29
I want to end up with a table such as:
YEAR_MONTH DESIRED_VALUE1 DESIRED_VALUE2
2022-09 52 NULL
2022-10 117 NULL
2022-11 95 35
2022-12 53 30
2023-01 73 29
I don't have previous knowledge of which dates will be returned by each query, if that makes sense, so I can't infer if it's a left join, for instance. So I'm doing a full
with result_1 as
(
query1
),
result_2 as
(
query2
)
select *
from
result_1
full outer join result_2
on
result_1.YEAR_MONTH = result_2.YEAR_MONTH
Which gives me this:
YEAR_MONTH DESIRED_VALUE2 YEAR_MONTH_2 DESIRED_VALUE1
2023-01 29 2023-01 73
NULL NULL 2022-10 117
2022-11 35 2022-11 95
NULL NULL 2022-09 52
2022-12 30 NULL NULL
But I want do display a single YEAR_MONTH column, that shows all existent values:
YEAR_MONTH DESIRED_VALUE2 DESIRED_VALUE1
2023-01 29 73
2022-10 NULL 117
2022-11 35 95
2022-09 NULL 52
2022-12 30 NULL
To resolve that, I use:
COALESCE(DESIRED_VALUE1.YEAR_MONTH, DESIRED_VALUE2.YEAR_MONTH) as YEAR_MONTH
However, if I add more data:
with result_1 as
(
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE1
from
MY_TABLE
where
STATUS = 'DESIRED_VALUE1'
group by 1
),
result_2 as
(
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE2
from
MY_TABLE
where
STATUS = 'DESIRED_VALUE2'
group by 1
),
result3 as
(
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE3
from
MY_TABLE
where
STATUS = 'DESIRED_VALUE3'
and CONDITION = 'CONDITION'
group by 1
)
select
COALESCE(DESIRED_VALUE1.YEAR_MONTH, DESIRED_VALUE2.YEAR_MONTH, DESIRED_VALUE3.YEAR_MONTH) as YEAR_MONTH,
DESIRED_VALUE1,
DESIRED_VALUE2,
DESIRED_VALUE3
from
result_1
full outer join
result_2
on
result_1.YEAR_MONTH = result_2.YEAR_MONTH
full outer join
result_3
on
result_2.YEAR_MONTH = result_3.YEAR_MONTH
order by YEAR_MONTH desc;
I start getting repeated YEAR_MONTH
YEAR_MONTH DESIRED_VALUE1 DESIRED_VALUE2 DESIRED_VALUE3
2023-01 73 29 83
2022-12 53 30 57
2022-11 95 35 71
2022-10 NULL 39 NULL
2022-10 117 NULL NULL
2022-09 18 NULL NULL
2022-09 52 NULL NULL
I'm not sure what's the best way to approach this problem.

If correctly understand what you're trying to accomplish, wouldn't it be a whole lot simpler (and a lot more performant) to just say
select to_varchar(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
sum( case STATUS when 'DESIRED_VALUE1' then 1 else 0 end ) as DESIRED_VALUE1,
sum( case STATUS when 'DESIRED_VALUE2' then 1 else 0 end ) as DESIRED_VALUE2,
sum( case STATUS when 'DESIRED_VALUE3' then 1 else 0 end ) as DESIRED_VALUE3
from MY_TABLE
where STATUS in ('DESIRED_VALUE1, DESIRED_VALUE2, DESIRED_VALUE3 )
group by 1
order by 1

try this
;with cte(date, value1, value2) as(
select date, value1, null from query1
union
select date, null, value2 from query2
)
select date
, sum(value1)
, sum(value2)
from cte
group by date

Related

Calculate and find the second day of the week in SQL

Let's say I have data like this
CustomerID
Trans_date
C001
01-sep-22
C001
04-sep-22
C001
14-sep-22
C002
03-sep-22
C002
01-sep-22
C002
18-sep-22
C002
20-sep-22
C003
02-sep-22
C003
28-sep-22
C004
08-sep-22
C004
18-sep-22
But I'm unable to find the first and second transaction based on Trans_date.
I wish for the result to look like this:
CustomerID
Trans_week
first
second
C001
35
35
37
C001
35
35
37
C001
37
35
37
C002
35
35
37
C002
35
35
37
C002
37
35
37
C002
38
35
37
C003
35
35
39
C003
39
35
39
C004
36
36
37
C004
37
36
37
And for the last result will show like this:
CustomerID
first
second
C001
35
37
C002
35
37
C003
35
39
C004 didnt include because i would need who cust id who come first in their 1st week.
You may use ROW_NUMBER() function -inside a subquery- to get the first and second transaction dates for a customer, then use conditional MAX window function on the results of that subquery.
SELECT CustomerID, DATEPART(week,CustTrans) AS Trans_week,
DATEPART(week, MAX(CASE rn WHEN 1 THEN CustTrans END) OVER (PARTITION BY CustomerID)) first,
DATEPART(week, MAX(CASE rn WHEN 2 THEN CustTrans END) OVER (PARTITION BY CustomerID)) second
FROM
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY CustTrans) rn
FROM trydata
) T
ORDER BY CustomerID, Trans_week
See a demo on SQL Server.
As you requested in the comments, if you want to select only one row per customer that showing the first and second weeks, use the following query:
SELECT CustomerID,
DATEPART(week, MAX(CASE rn WHEN 1 THEN CustTrans END)) first,
DATEPART(week, MAX(CASE rn WHEN 2 THEN CustTrans END)) second
FROM
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY CustTrans) rn
FROM trydata
) T
WHERE rn <= 2
GROUP BY CustomerID
ORDER BY CustomerID
See a demo.
with cte (RN,CustomerID, FirstWeek,SecondWeek ) as
( SELECT ROW_NUMBER() over(partition by CustomerID ORDER BY CustomerID ) RN, CustomerID,FirstWeek, isnull((select TOP 1 (DATEPART(week,CustTrans))
from trydata c
where c.CustomerID = SRC.CustomerID AND DATEPART(week,C.CustTrans) > SRC.FirstWeek
ORDER BY DATEPART(week,C.CustTrans) ),'0') AS SecondWeek
FROM (
SELECT CustomerID,DATEPART(week,CustTrans) TransWeek,
(select MIN(DATEPART(week,CustTrans)) from trydata c where c.CustomerID = trydata.CustomerID) AS FirstWeek
FROM trydata
) SRC )
select CustomerID,FirstWeek,SecondWeek from cte where RN = 1
Output:
Example 2 :
WITH CTE (CustomerID,FIrstWeek,RN) AS (
SELECT CustomerID,MIN(DATEPART(week,CustTrans)) TransWeek,
ROW_NUMBER() over(partition by CustomerID ORDER BY DATEPART(week,CustTrans) asc ) FROM TryData
GROUP BY CustomerID,DATEPART(week,CustTrans)
)
SELECT CTE.CustomerID, CTE.FIrstWeek,
(select TOP 1 (DATEPART(week,c.CustTrans))
from trydata c
where c.CustomerID = CTE.CustomerID AND DATEPART(week,C.CustTrans) > CTE.FIrstWeek
) SecondWeek
FROM CTE
WHERE RN = 1
FIddle Demo
Edit: This can be done on easier way and less complex.

Snowflake SQL - Count Distinct Users within descending time interval

I want to count the distinct amount of users over the last 60 days, and then, count the distinct amount of users over the last 59 days, and so on and so forth.
Ideally, the output would look like this (TARGET OUTPUT)
Day Distinct Users
60 200
59 200
58 188
57 185
56 180
[...] [...]
where 60 days is the max total possible distinct users, and then 59 would have a little less and so on and so forth.
my query looks like this.
select
count(distinct (case when datediff(day,DATE,current_date) <= 60 then USER_ID end)) as day_60,
count(distinct (case when datediff(day,DATE,current_date) <= 59 then USER_ID end)) as day_59,
count(distinct (case when datediff(day,DATE,current_date) <= 58 then USER_ID end)) as day_58
FROM Table
The issue with my query is that This outputs the data by column instead of by rows (like shown below) AND, most importantly, I have to write out this logic 60x for each of the 60 days.
Current Output:
Day_60 Day_59 Day_58
209 207 207
Is it possible to write the SQL in a way that creates the target as shown initially above?
Using below data in CTE format -
with data_cte(dates,userid) as
(select * from values
('2022-05-01'::date,'UID1'),
('2022-05-01'::date,'UID2'),
('2022-05-02'::date,'UID1'),
('2022-05-02'::date,'UID2'),
('2022-05-03'::date,'UID1'),
('2022-05-03'::date,'UID2'),
('2022-05-03'::date,'UID3'),
('2022-05-04'::date,'UID1'),
('2022-05-04'::date,'UID1'),
('2022-05-04'::date,'UID2'),
('2022-05-04'::date,'UID3'),
('2022-05-04'::date,'UID4'),
('2022-05-05'::date,'UID1'),
('2022-05-06'::date,'UID1'),
('2022-05-07'::date,'UID1'),
('2022-05-07'::date,'UID2'),
('2022-05-08'::date,'UID1')
)
Query to get all dates and count and distinct counts -
select dates,count(userid) cnt, count(distinct userid) cnt_d
from data_cte
group by dates;
DATES
CNT
CNT_D
2022-05-01
2
2
2022-05-02
2
2
2022-05-03
3
3
2022-05-04
5
4
2022-05-05
1
1
2022-05-06
1
1
2022-05-08
1
1
2022-05-07
2
2
Query to get difference of date from current date
select dates,datediff(day,dates,current_date()) ddiff,
count(userid) cnt,
count(distinct userid) cnt_d
from data_cte
group by dates;
DATES
DDIFF
CNT
CNT_D
2022-05-01
45
2
2
2022-05-02
44
2
2
2022-05-03
43
3
3
2022-05-04
42
5
4
2022-05-05
41
1
1
2022-05-06
40
1
1
2022-05-08
38
1
1
2022-05-07
39
2
2
Get records with date difference beyond a certain range only -
include clause having
select datediff(day,dates,current_date()) ddiff,
count(userid) cnt,
count(distinct userid) cnt_d
from data_cte
group by dates
having ddiff<=43;
DDIFF
CNT
CNT_D
43
3
3
42
5
4
41
1
1
39
2
2
38
1
1
40
1
1
If you need to prefix 'day' to each date diff count, you can
add and outer query to previously fetched data-set and add the needed prefix to the date diff column as following -
I am using CTE syntax, but you may use sub-query given you will select from table -
,cte_1 as (
select datediff(day,dates,current_date()) ddiff,
count(userid) cnt,
count(distinct userid) cnt_d
from data_cte
group by dates
having ddiff<=43)
select 'day_'||to_char(ddiff) days,
cnt,
cnt_d
from cte_1;
DAYS
CNT
CNT_D
day_43
3
3
day_42
5
4
day_41
1
1
day_39
2
2
day_38
1
1
day_40
1
1
Updated the answer to get distinct user count for number of days range.
A clause can be included in the final query to limit to number of days needed.
with data_cte(dates,userid) as
(select * from values
('2022-05-01'::date,'UID1'),
('2022-05-01'::date,'UID2'),
('2022-05-02'::date,'UID1'),
('2022-05-02'::date,'UID2'),
('2022-05-03'::date,'UID5'),
('2022-05-03'::date,'UID2'),
('2022-05-03'::date,'UID3'),
('2022-05-04'::date,'UID1'),
('2022-05-04'::date,'UID6'),
('2022-05-04'::date,'UID2'),
('2022-05-04'::date,'UID3'),
('2022-05-04'::date,'UID4'),
('2022-05-05'::date,'UID7'),
('2022-05-06'::date,'UID1'),
('2022-05-07'::date,'UID8'),
('2022-05-07'::date,'UID2'),
('2022-05-08'::date,'UID9')
),cte_1 as
(select datediff(day,dates,current_date()) ddiff,userid
from data_cte), cte_2 as
(select distinct ddiff from cte_1 )
select cte_2.ddiff,
(select count(distinct userid)
from cte_1 where cte_1.ddiff <= cte_2.ddiff) cnt
from cte_2
order by cte_2.ddiff desc
DDIFF
CNT
47
9
46
9
45
9
44
8
43
5
42
4
41
3
40
1
You can do unpivot after getting your current output.
sample one.
select
*
from (
select
209 Day_60,
207 Day_59,
207 Day_58
)unpivot ( cnt for days in (Day_60,Day_59,Day_58));

cumulative count returning more rows than expected

base_table
eom account_id closings checkouts
2018-11-01 1 21 147
2018-12-01 1 20 214
calendar_table
month account_id
2020-11-01 1
2014-04-01 1
Based on two tables, above, I would like to create a month-by-month cumulative closings and checkouts.
The calendar_table contains the months the account id is active. Thus, it is used as the main table (in the from clause).
with
base_table as (
select eom, account_id, closings, checkouts
from base_table bt
where account_id in (3,30,122,152,161,179)
)
,calendar_table as (
select ct.month, c.external_id as account_id
from calendar_table ct
left join customers c
on c.id = ct.organization_id
where account_id in (3,30,122,152,161,179)
)
,cumulative_table as (
select ct."month"
,list.account_id
,coalesce(bt.closings,0) as closings
,coalesce(sum(closings) OVER (PARTITION BY list.account_id
ORDER BY ct."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0)
as cum_closings
,coalesce(bt.checkouts,0) as checkouts
,coalesce(sum(checkouts) OVER (PARTITION BY list.account_id
ORDER BY ct."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0)
AS cum_checkouts
from calendar_table ct
cross join (select distinct account_id from base_table) list
left join base_table bt
on bt.account_id = list.account_id and bt.eom = ct.month
)
select *
from cumulative_table
The query above returns a cumulative table that contains duplications, probably because of the cross join.
month account_id closings cum_closings checkouts cum_checkouts
01/11/17 1 20 20 282 282
01/11/17 1 20 40 282 564
01/11/17 1 20 60 282 846
01/12/17 1 17 77 346 1192
01/12/17 1 17 94 346 1538
01/12/17 1 17 111 346 1884
I expect the query to return one month per account id.
month account_id closings cum_closings checkouts cum_checkouts
01/11/17 1 20 20 282 282
01/12/17 1 17 37 346 628
You can do more simple :
WITH list AS
( select bt.eom, bt.account_id, bt.closings, bt.checkouts
, ct.month, ct.organization_id
from base_table bt
inner join calendar_table ct
on ct.account_id = bt.account_id
where bt.account_id in (3,30,122,152,161,179)
)
select l.month, c.external_id as account_id
, coalesce(l.closings,0) as closings
, coalesce( sum(l.closings) OVER (PARTITION BY l.account_id
ORDER BY l."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
, 0
) as cum_closings
, coalesce(l.checkouts,0) as checkouts
, coalesce( sum(l.checkouts) OVER (PARTITION BY l.account_id
ORDER BY l."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
, 0
) AS cum_checkouts
from list as l
left join customers c
on c.id = l.organization_id ;

SQL: Get date when cumulative sum reaches a mark

I have a table in the following format:
APP_iD| Date | Impressions
113 2015-01-01 10
113 2015-01-02 5
113 2015-01-03 50
113 2015-01-04 35
113 2015-01-05 30
113 2015-01-06 75
Now, I need to know the date when cumulative SUM of those impressions crossed 65/100/150 and so on.
I tried using CASE WHEN statement:
CASE WHEN SUM(impressions) >100
THEN date
but it doesn't sum the data across the column. It just does checks against the individual row.
My final result should look like:
APP_iD | Date_65 | Date_100 | Date_150
113 2015-01-03 2015-01-04 2015-01-06
Does anyone know how to do this?
Is this even possible?
Use sum() over() to get the running sum and check for the required values with a case expression. Finally aggregate the results to get one row per each app_id.
select app_id,max(dt_65),max(dt_100),max(dt_150)
from (
select app_id
,case when sum(impressions) over(partition by app_id order by dt) between 65 and 99 then dt end dt_65
,case when sum(impressions) over(partition by app_id order by dt) between 100 and 149 then dt end dt_100
,case when sum(impressions) over(partition by app_id order by dt) >= 150 then dt end dt_150
from t) x
group by app_id
with c as (
select
app_id, date,
sum(impressions) over (partition by app_id order by date) as c
from t
)
select app_id, s65.date, s100.date, s150.date
from
(
select distinct on (app_id) app_id, date
from c
where c >= 65 and c < 100
order by app_id, date
) s65
left join
(
select distinct on (app_id) app_id, date
from c
where c >= 100 and c <150
order by app_id, date
) s100 using (app_id)
left join
(
select distinct on (app_id) app_id, date
from c
where c >= 150
order by app_id, date
) s150 using (app_id)
;
app_id | date | date | date
--------+------------+------------+------------
113 | 2015-01-03 | 2015-01-04 | 2015-01-06
Without the pivot:
select distinct on (app_id, break) app_id, break, date
from (
select *,
case
when c < 100 then 65
when c < 150 then 100
else 150
end as break
from (
select
app_id, date,
sum(impressions) over (partition by app_id order by date) as c
from t
) t
where c >= 65
) t
order by app_id, break, date
;
app_id | break | date
--------+-------+------------
113 | 65 | 2015-01-03
113 | 100 | 2015-01-04
113 | 150 | 2015-01-06
You can try this for desired result.
with t as (select app_id, date, sum(Impressions)
over (partition by app_id order by date) AS s from tbl)
select app_id,
min(date_65) AS date_65 ,
min(date_100) AS date_100,
min(date_150) AS date_150
-- more columns to observe other sum of Impressions
from
(select app_id,
CASE WHEN (s >= 65 and s < 100) THEN date END AS date_65,
CASE WHEN (s >= 100 and s < 150) THEN date END AS date_100,
CASE WHEN (s >= 150 ) THEN date END AS date_150
-- more cases to observe other sum of Impressions
from t) q
group by q.app_id
if you want to observe more sum of Impressions, just add more conditions

How to get data with a must different conditions?

If i have tables with the following structure :
USERS :
USERID,NAME
TIMETB :
CHECKTIME,Sysdate,modified,USERID
If i have sample data like this :
USERS :
USERID NAME
434 moh
77 john
66 yara
TIMETB :
CHECKTIME USERID modified
2015-12-21 07:20:00.000 434 0
2015-12-21 08:39:00.000 434 2
2015-12-22 07:31:00.000 434 0
2015-12-21 06:55:00.000 77 0
2015-12-21 07:39:00.000 77 0
2015-12-25 07:11:00.000 66 0
2015-12-25 07:22:00.000 66 0
2015-12-25 07:50:00.000 66 2
2015-12-26 07:40:00.000 66 2
2015-12-26 07:21:00.000 66 2
Now i want to get the users who have two or more different transactions(modified) at the same date :
The result i expect is :
CHECKTIME USERID modified NAME
2015-12-21 07:20:00.000 434 0 moh
2015-12-21 08:39:00.000 434 2 moh
2015-12-25 07:11:00.000 66 0 yara
2015-12-25 07:22:00.000 66 0 yara
2015-12-25 07:50:00.000 66 2 yara
I write the following query but i get more than i expect i mean i get users who have transactions of the same (modified) !!.
SELECT a.CHECKTIME,
a.Sysdate,
(CASE WHEN a.modified = 0 THEN 'ADD' ELSE 'DELETE' END) AS modified,
b.BADGENUMBER,
b.name,
a.Emp_num AS Creator
FROM TIMETB a
INNER JOIN Users b ON a.USERID = b.USERID
WHERE YEAR(checktime) = 2015
AND MONTH(checktime) = 12
AND (
SELECT COUNT(*)
FROM TIMETB cc
WHERE cc.USERID = a.USERID
AND CONVERT(DATE, cc.CHECKTIME) = CONVERT(DATE, a.CHECKTIME)
AND cc.modified IN (0, 2)
) >= 2
AND a.modified IS NOT NULL
AND a.Emp_num IS NOT NULL
You use window functions for this:
select t.*
from (select t.*,
count(*) over (partition by userid, cast(checktime as date)) as cnt
from timetb t
) t
where cnt >= 2;
If you want the name, just join in the appropriate table.
EDIT:
If you want different values of a column, a simple way is to compare the min and max values:
select t.*
from (select t.*,
min(modified) over (partition by userid, cast(checktime as date)) as minm,
max(modified) over (partition by userid, cast(checktime as date)) as maxm
from timetb t
) t
where minm <> maxm;