SQL: calculate MAU by window function - sql

I'm trying unsuccessfully to calculate a MAU- monthly distinct active users, by using window functions.
I need the calculation for each day during the month, for the preceding 30 days
This is what I have so far:
select
t.datee
, t.app,i.sourcee
, i.campaign
, t.mobile
, sum(count(distinct t.user_id)) over (
PARTITION BY
date_trunc('month',datee)
, t.app
, i.sourcee
, i.campaign
, t.mobile
ORDER BY datee asc
ROWS BETWEEN 30 PRECEDING AND CURRENT ROW
)
FROM dim_x i
JOIN agg_y t
ON i.app=t.app
AND i.mobile=t.mobile
WHERE t.datee>=CURRENT_DATE-30
AND t.datee<CURRENT_DATE
GROUP BY 1,2,3,4,5
order by 1 desc
But all I get is a sum of active users by all days instead of sum of distinct users. I'm using Vertica db.
Any suggestions?

I'm not getting, really, why you should need an OLAP expression for that.
Aren't you looking for the total number of distinct users per:
year-month combination out of datee
app
sourcee (whatever that might be)
campaign
mobile (probably mobile number)
?
A simple GROUP BY would do, as far as I'm concerned. If I disregard sourcee, campaign and mobile, selecting just from one table: input for argument's sake, with some sample data I just made up, this query:
SELECT
YEAR(datee) * 100 + MONTH(datee) AS yearmonth
, app
, COUNT(DISTINCT user_id) AS monthly_active_users
FROM input
GROUP BY 1,2
ORDER BY 1
;
... would return:
YEARMONTH|app |monthly_active_users
201,601|app-a| 2
201,601|app-b| 2
201,602|app-a| 2
201,602|app-b| 2
201,603|app-a| 2
201,603|app-b| 2
201,604|app-a| 2
201,604|app-b| 2
201,605|app-a| 2
201,605|app-b| 2
201,606|app-a| 1
201,606|app-b| 1
Just editing my previous answer. You seem to need the running COUNT DISTINCT of user id-s , partitioned by several expressions.
With the input from the WITH clause below, would you need a report like this (only showing the first 12 rows of 53, ordered by datee, app)?
datee |app |user_id |running_active_users
2016-01-01|app-a|arthur | 1
2016-01-04|app-b|ford | 1
2016-01-07|app-a|trillian| 2
2016-01-10|app-b|zaphod | 2
2016-01-13|app-a|arthur | 2
2016-01-16|app-b|ford | 2
2016-01-19|app-a|trillian| 2
2016-01-22|app-b|zaphod | 2
2016-01-25|app-a|arthur | 2
2016-01-28|app-b|ford | 2
2016-01-31|app-a|trillian| 2
2016-02-03|app-b|zaphod | 2
?
If that's the case, I don't see the reason for existence of your GROUP BY clause, though.
Below is the query with GROUP BY as above with test data returning the results above in a WITH clause. Regard that input as the join between your two tables.
WITH
input(datee,app,user_id) AS (
SELECT DATE '2016-01-01','app-a','arthur'
UNION ALL SELECT DATE '2016-01-04','app-b','ford'
UNION ALL SELECT DATE '2016-01-07','app-a','trillian'
UNION ALL SELECT DATE '2016-01-10','app-b','zaphod'
UNION ALL SELECT DATE '2016-01-25','app-a','arthur'
UNION ALL SELECT DATE '2016-01-28','app-b','ford'
UNION ALL SELECT DATE '2016-03-04','app-b','ford'
UNION ALL SELECT DATE '2016-03-25','app-a','arthur'
UNION ALL SELECT DATE '2016-04-09','app-b','ford'
UNION ALL SELECT DATE '2016-04-30','app-a','arthur'
UNION ALL SELECT DATE '2016-05-06','app-a','trillian'
UNION ALL SELECT DATE '2016-05-09','app-b','zaphod'
UNION ALL SELECT DATE '2016-05-15','app-b','ford'
UNION ALL SELECT DATE '2016-06-05','app-a','arthur'
UNION ALL SELECT DATE '2016-01-13','app-a','arthur'
UNION ALL SELECT DATE '2016-01-16','app-b','ford'
UNION ALL SELECT DATE '2016-01-31','app-a','trillian'
UNION ALL SELECT DATE '2016-02-03','app-b','zaphod'
UNION ALL SELECT DATE '2016-02-06','app-a','arthur'
UNION ALL SELECT DATE '2016-02-09','app-b','ford'
UNION ALL SELECT DATE '2016-02-12','app-a','trillian'
UNION ALL SELECT DATE '2016-02-15','app-b','zaphod'
UNION ALL SELECT DATE '2016-02-18','app-a','arthur'
UNION ALL SELECT DATE '2016-02-21','app-b','ford'
UNION ALL SELECT DATE '2016-02-24','app-a','trillian'
UNION ALL SELECT DATE '2016-02-27','app-b','zaphod'
UNION ALL SELECT DATE '2016-03-01','app-a','arthur'
UNION ALL SELECT DATE '2016-03-10','app-b','zaphod'
UNION ALL SELECT DATE '2016-03-13','app-a','arthur'
UNION ALL SELECT DATE '2016-03-16','app-b','ford'
UNION ALL SELECT DATE '2016-03-28','app-b','ford'
UNION ALL SELECT DATE '2016-03-31','app-a','trillian'
UNION ALL SELECT DATE '2016-04-06','app-a','arthur'
UNION ALL SELECT DATE '2016-04-12','app-a','trillian'
UNION ALL SELECT DATE '2016-04-15','app-b','zaphod'
UNION ALL SELECT DATE '2016-04-27','app-b','zaphod'
UNION ALL SELECT DATE '2016-05-03','app-b','ford'
UNION ALL SELECT DATE '2016-05-27','app-b','ford'
UNION ALL SELECT DATE '2016-05-30','app-a','trillian'
UNION ALL SELECT DATE '2016-01-19','app-a','trillian'
UNION ALL SELECT DATE '2016-01-22','app-b','zaphod'
UNION ALL SELECT DATE '2016-03-07','app-a','trillian'
UNION ALL SELECT DATE '2016-03-19','app-a','trillian'
UNION ALL SELECT DATE '2016-03-22','app-b','zaphod'
UNION ALL SELECT DATE '2016-04-03','app-b','zaphod'
UNION ALL SELECT DATE '2016-04-18','app-a','arthur'
UNION ALL SELECT DATE '2016-04-21','app-b','ford'
UNION ALL SELECT DATE '2016-04-24','app-a','trillian'
UNION ALL SELECT DATE '2016-05-12','app-a','arthur'
UNION ALL SELECT DATE '2016-05-18','app-a','trillian'
UNION ALL SELECT DATE '2016-05-21','app-b','zaphod'
UNION ALL SELECT DATE '2016-05-24','app-a','arthur'
UNION ALL SELECT DATE '2016-06-02','app-b','zaphod'
)
SELECT
YEAR(datee) * 100 + MONTH(datee) AS YEARMONTH
, app
, COUNT(DISTINCT user_id) AS monthly_active_users
FROM input
GROUP BY 1,2
ORDER BY 1
;

Related

How can I use the LAG FUNCTION to show revenue this year vs last year in Snowflake with?

I would like to show the current revenue vs last's year's revenue in the same row per region per type. Example:For 2022-04-01, US, Type 1 --> REVENUE: 2456, REVENUE_LAST_YEAR: 4000
2021-04-01, US, Type 1 --> REVENUE: 4000, REVENUE_LAST_YEAR: 0
For some reason, the Lag formula in Snowflake is showing wrong values. Could someone please help ?
WITH
indata(dt,region,type,revenue) AS (
SELECT DATE '2021-04-01','US','Type 1',4000 UNION ALL SELECT DATE '2021-05-01','Europe','Type 2',5777
UNION ALL SELECT DATE '2021-06-01','US','Type 1',45433 UNION ALL SELECT DATE '2021-07-01','Europe','Type 2',8955
UNION ALL SELECT DATE '2021-08-01','US','Type 1',45777 UNION ALL SELECT DATE '2021-09-01','Asia','Type 1',7533
UNION ALL SELECT DATE '2021-10-01','US','Type 1',8866 UNION ALL SELECT DATE '2021-11-01','Asia','Type 2',5534
UNION ALL SELECT DATE '2021-12-01','US','Type 2',4000 UNION ALL SELECT DATE '2022-01-01','Asia','Type 1',7244
UNION ALL SELECT DATE '2022-02-01','US','Type 1',6678 UNION ALL SELECT DATE '2022-03-01','Asia','Type 1',5654
UNION ALL SELECT DATE '2022-04-01','US','Type 1',2456 UNION ALL SELECT DATE '2022-05-01','Asia','Type 1',4525
UNION ALL SELECT DATE '2022-06-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-07-01','Asia','Type 1',6654
UNION ALL SELECT DATE '2022-08-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-09-01','Asia','Type 2',5754
UNION ALL SELECT DATE '2022-10-01','US','Type 1',7744 UNION ALL SELECT DATE '2022-11-01','Asia','Type 2',5644
UNION ALL SELECT DATE '2022-12-01','Europe','Type 2',6775 UNION ALL SELECT DATE '2023-01-01','Asia','Type 2',6777
UNION ALL SELECT DATE '2023-02-01','Europe','Type 2',7755
)
SELECT indata.*,
lag(REVENUE, 1, 0) over (partition by region,type,revenue order by year(dt)) REVENUE_last_year
FROM indata
order by year(dt)
Partitioning by region, type and month-day:
SELECT indata.*,
LAG(REVENUE, 1, 0) over (partition by region,type, TO_VARCHAR(dt, 'mmdd')
order by dt) AS REVENUE_last_year
FROM indata
ORDER BY dt;
Output:

Rolling sum previous 12 months grouped by 2 dimensions (SQL- Snowflake)

I have the following table structure available in the C:
I am struggling in Snowflake with a query that should show me the the sum of previous 12 months for every distinct month in the table split into three dimensions .
The way reporting date 01.08.2022 for region='US' and type=1 is calculated: it is the sum of the past 12 months' row of "revenue_12_months" = 4000+ 45433+45777+ 8866+ 4000+ 6678+ 2456+ 6677+ 6677+ 7744+ 6775 + 7755
WITH
indata(dt,region,type,revenue) AS (
SELECT DATE '2021-04-01','US','Type 1',4000 UNION ALL SELECT DATE '2021-05-01','Europe','Type 2',5777
UNION ALL SELECT DATE '2021-05-01','US','Type 1',45433 UNION ALL SELECT DATE '2021-07-01','Europe','Type 2',8955
UNION ALL SELECT DATE '2021-06-01','US','Type 1',45777 UNION ALL SELECT DATE '2021-09-01','Asia','Type 1',7533
UNION ALL SELECT DATE '2021-07-01','US','Type 1',8866 UNION ALL SELECT DATE '2021-11-01','Asia','Type 2',5534
UNION ALL SELECT DATE '2021-08-01','US','Type 1',4000 UNION ALL SELECT DATE '2022-01-01','Asia','Type 1',7244
UNION ALL SELECT DATE '2021-09-01','US','Type 1',6678 UNION ALL SELECT DATE '2022-03-01','Asia','Type 1',5654
UNION ALL SELECT DATE '2021-10-01','US','Type 1',2456 UNION ALL SELECT DATE '2022-05-01','Asia','Type 1',4525
UNION ALL SELECT DATE '2021-11-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-07-01','Asia','Type 1',6654
UNION ALL SELECT DATE '2021-12-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-09-01','Asia','Type 2',5754
UNION ALL SELECT DATE '2022-01-01','US','Type 1',7744 UNION ALL SELECT DATE '2022-11-01','Asia','Type 2',5644
UNION ALL SELECT DATE '2022-02-01','US','Type 1',6775 UNION ALL SELECT DATE '2023-01-01','Asia','Type 2',6777
UNION ALL SELECT DATE '2022-03-01','US','Type 1',7755
)
select dt,region,type, SUM(revenue) OVER (ORDER BY dt,region,type ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) revenue_12_months
from indata
You want it per region and type, so that needs to part of the partition by clause
sum(revenue) over (partition by region, type order by dt rows between 11 preceding and current row)
could it be that you simply forgot the GROUP BY part in your window function?
WITH
indata(dt,region,type,revenue) AS (
SELECT DATE '2021-04-01','US','Type 1',4000 UNION ALL SELECT DATE '2021-05-01','Europe','Type 2',5777
UNION ALL SELECT DATE '2021-05-01','US','Type 1',45433 UNION ALL SELECT DATE '2021-07-01','Europe','Type 2',8955
UNION ALL SELECT DATE '2021-06-01','US','Type 1',45777 UNION ALL SELECT DATE '2021-09-01','Asia','Type 1',7533
UNION ALL SELECT DATE '2021-07-01','US','Type 1',8866 UNION ALL SELECT DATE '2021-11-01','Asia','Type 2',5534
UNION ALL SELECT DATE '2021-08-01','US','Type 1',4000 UNION ALL SELECT DATE '2022-01-01','Asia','Type 1',7244
UNION ALL SELECT DATE '2021-09-01','US','Type 1',6678 UNION ALL SELECT DATE '2022-03-01','Asia','Type 1',5654
UNION ALL SELECT DATE '2021-10-01','US','Type 1',2456 UNION ALL SELECT DATE '2022-05-01','Asia','Type 1',4525
UNION ALL SELECT DATE '2021-11-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-07-01','Asia','Type 1',6654
UNION ALL SELECT DATE '2021-12-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-09-01','Asia','Type 2',5754
UNION ALL SELECT DATE '2022-01-01','US','Type 1',7744 UNION ALL SELECT DATE '2022-11-01','Asia','Type 2',5644
UNION ALL SELECT DATE '2022-02-01','US','Type 1',6775 UNION ALL SELECT DATE '2023-01-01','Asia','Type 2',6777
UNION ALL SELECT DATE '2022-03-01','US','Type 1',7755
)
select dt,region,type, SUM(revenue) OVER (PARTITION BY region, type ORDER BY dt,region,type ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) revenue_12_months
from indata
ORDER BY REGION, TYPE, DT;
Best regards,
TK

PL/SQL distinct date for loop

I want to use for loop for date in my table which only cares years and months, not days.
CURSOR ret_cur is SELECT orderdate FROM Orders WHERE status
= 'DELAYED';
ret_rec ret_cur%ROWTYPE;
I currently have
insert into Orders(OrderId, CustomerId, RetailerId, ProductId, Count,
UnitPrice, OrderDate, Status) values (2,2,1,10,45,60,
to_date('20180102','YYYYMMDD'),'DELIVERED');
this data type in my orders table. (its an example for format)
I want to use DISTINCT to iterate through orderdate based on YYYY-MM. (dont care Day)
I have tried select distinct to_char(orderdate, 'YYYY-MM') but I seems to not work.
for example, if i have 20180103, 20180104, 20180105 , it should be one iteration since they all have same years and months.
To select days without time you could tunc(sysdate). For months we have to group by a char-value:
select to_char(mydatecol,'yyyymm'), count(*) from
(
select sysdate mydatecol from dual UNION ALL -- Fake-Table with some dates
select sysdate - 1 mydatecol from dual UNION ALL
select sysdate - 2 mydatecol from dual UNION ALL
select sysdate - 3 mydatecol from dual UNION ALL
select sysdate - 4 mydatecol from dual UNION ALL
select sysdate - 30 mydatecol from dual UNION ALL
select sysdate - 31 mydatecol from dual UNION ALL
select sysdate - 32 mydatecol from dual UNION ALL
select sysdate - 33 mydatecol from dual
)
group by to_char(mydatecol,'yyyymm')
Result:
201809 3
201810 6
I think you'd like to have such a collation as below :
with Orders
(
OrderId, CustomerId, RetailerId, ProductId,
Count, UnitPrice, OrderDate, Status
) as
(
select 2,2,1,10,45,60, to_date('20180102','YYYYMMDD'),'DELIVERED' from dual
)
select o.*
from Orders o
where to_char(OrderDate,'yyyy-mm')
= to_char(to_date('&myDate','yyyymmdd'),'yyyy-mm');
-- for "myDate" substitution variable use 20180103 or 20180104 or 20180105 .. etc.
The best way todo this kind of query is to truncate the date value:
SELECT CustomerId, trunc(OrderDate,'MM') OrderMonth
, sum(Count) totalCount
, sum(Count*UnitPrice) totalPrice
FROM Orders
GROUP BY CustomerId, trunc(OrderDate,'MM')
for example...

SQL counting days with gap / overlapping

I am working on a "counting days" problem almost identical to this one. I have a list of date(s), and need to count how many days used excluding duplicate, and handling the gaps. Same input and output.
From: Markus Jarderot
Input
ID d1 d2
1 2011-08-01 2011-08-08
1 2011-08-02 2011-08-06
1 2011-08-03 2011-08-10
1 2011-08-12 2011-08-14
2 2011-08-01 2011-08-03
2 2011-08-02 2011-08-06
2 2011-08-05 2011-08-09
Output
ID hold_days
1 11
2 8
SQL to find time elapsed from multiple overlapping intervals
But for the life of me I couldn't understand Markus Jarderot's solution.
SELECT DISTINCT
t1.ID,
t1.d1 AS date,
-DATEDIFF(DAY, (SELECT MIN(d1) FROM Orders), t1.d1) AS n
FROM Orders t1
LEFT JOIN Orders t2 -- Join for any events occurring while this
ON t2.ID = t1.ID -- is starting. If this is a start point,
AND t2.d1 <> t1.d1 -- it won't match anything, which is what
AND t1.d1 BETWEEN t2.d1 AND t2.d2 -- we want.
GROUP BY t1.ID, t1.d1, t1.d2
HAVING COUNT(t2.ID) = 0
Why is DATEDIFF(DAY, (SELECT MIN(d1) FROM Orders), t1.d1) picking from the min(d1) from the entire list? Is that regardless of ID.
And what does t1.d1 BETWEEN t2.d1 AND t2.d2 do? Is that to ensure only overlapped interval are calculated?
Same thing with group by, I think because if in the event the same identical period will be discarded? I tried to trace the solution by hand but getting more confused.
This is mostly a duplicate of my answer here (including explanation) but with the inclusion of grouping on an id column. It should use a single table scan and does not require a recursive sub-query factoring clause (CTE) or self joins.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE your_table ( id, usr, start_date, end_date ) AS
SELECT 1, 'A', DATE '2017-06-01', DATE '2017-06-03' FROM DUAL UNION ALL
SELECT 1, 'B', DATE '2017-06-02', DATE '2017-06-04' FROM DUAL UNION ALL -- Overlaps previous
SELECT 1, 'C', DATE '2017-06-06', DATE '2017-06-06' FROM DUAL UNION ALL
SELECT 1, 'D', DATE '2017-06-07', DATE '2017-06-07' FROM DUAL UNION ALL -- Adjacent to previous
SELECT 1, 'E', DATE '2017-06-11', DATE '2017-06-20' FROM DUAL UNION ALL
SELECT 1, 'F', DATE '2017-06-14', DATE '2017-06-15' FROM DUAL UNION ALL -- Within previous
SELECT 1, 'G', DATE '2017-06-22', DATE '2017-06-25' FROM DUAL UNION ALL
SELECT 1, 'H', DATE '2017-06-24', DATE '2017-06-28' FROM DUAL UNION ALL -- Overlaps previous and next
SELECT 1, 'I', DATE '2017-06-27', DATE '2017-06-30' FROM DUAL UNION ALL
SELECT 1, 'J', DATE '2017-06-27', DATE '2017-06-28' FROM DUAL UNION ALL -- Within H and I
SELECT 2, 'K', DATE '2011-08-01', DATE '2011-08-08' FROM DUAL UNION ALL -- Your data below
SELECT 2, 'L', DATE '2011-08-02', DATE '2011-08-06' FROM DUAL UNION ALL
SELECT 2, 'M', DATE '2011-08-03', DATE '2011-08-10' FROM DUAL UNION ALL
SELECT 2, 'N', DATE '2011-08-12', DATE '2011-08-14' FROM DUAL UNION ALL
SELECT 3, 'O', DATE '2011-08-01', DATE '2011-08-03' FROM DUAL UNION ALL
SELECT 3, 'P', DATE '2011-08-02', DATE '2011-08-06' FROM DUAL UNION ALL
SELECT 3, 'Q', DATE '2011-08-05', DATE '2011-08-09' FROM DUAL;
Query 1:
SELECT id,
SUM( days ) AS total_days
FROM (
SELECT id,
dt - LAG( dt ) OVER ( PARTITION BY id
ORDER BY dt ) + 1 AS days,
start_end
FROM (
SELECT id,
dt,
CASE SUM( value ) OVER ( PARTITION BY id
ORDER BY dt ASC, value DESC, ROWNUM ) * value
WHEN 1 THEN 'start'
WHEN 0 THEN 'end'
END AS start_end
FROM your_table
UNPIVOT ( dt FOR value IN ( start_date AS 1, end_date AS -1 ) )
)
WHERE start_end IS NOT NULL
)
WHERE start_end = 'end'
GROUP BY id
Results:
| ID | TOTAL_DAYS |
|----|------------|
| 1 | 25 |
| 2 | 13 |
| 3 | 9 |
The brute force method is to create all days (in a recursive query) and then count:
with dates(id, day, d2) as
(
select id, d1 as day, d2 from mytable
union all
select id, day + 1, d2 from dates where day < d2
)
select id, count(distinct day)
from dates
group by id
order by id;
Unfortunately there is a bug in some Oracle versions and recursive queries with dates don't work there. So try this code and see whether it works in your system. (I have Oracle 11.2 and the bug still exists there; so I guess you need Oracle 12c.)
I guess Markus' idea is to find all starting points that are not within other ranges and all ending points that aren't. Then just take the first starting point till the first ending point, then the next starting point till the next ending point, etc. As Markus isn't using a window function to number starting and ending points, he must find a more complicated way to achieve this. Here is the query with ROW_NUMBER. Maybe this gives you a start what to look for in Markus' query.
select startpoint.id, sum(endpoint.day - startpoint.day)
from
(
select id, d1 as day, row_number() over (partition by id order by d1) as rn
from mytable m1
where not exists
(
select *
from mytable m2
where m1.id = m2.id
and m1.d1 > m2.d1 and m1.d1 <= m2.d2
)
) startpoint
join
(
select id, d2 as day, row_number() over (partition by id order by d1) as rn
from mytable m1
where not exists
(
select *
from mytable m2
where m1.id = m2.id
and m1.d2 >= m2.d1 and m1.d2 < m2.d2
)
) endpoint on endpoint.id = startpoint.id and endpoint.rn = startpoint.rn
group by startpoint.id
order by startpoint.id;
If all your intervals start at different dates, consider them in ascending order by d1 counting how many days are from d1 to the next interval.
You can discard an interval of it is contained in another one.
The last interval won't have a follower.
This query should give you how many days each interval give
select a.id, a.d1,nvl(min(b.d1), a.d2) - a.d1
from orders a
left join orders b
on a.id = b.id and a.d1 < b.d1 and a.d2 between b.d1 and b.d2
group by a.id, a.d1
Then group by id and sum days

SQL: Dynamic Date creation issue

Need Suggestion to make it dynamic On Dates.
Expected:
Date, Total Sellers, Sellers From Previous Date
Currently:
Data in table(active_seller_codes): date, seller_code
Queries:
-- Date Wise Sellers Count
select date,count(distinct seller_code) as Sellers_COunt
from active_seller_codes where date between '2016-12-15' AND '2016-12-15'
-- Sellers from previous Days
select date,count(distinct seller_code) as Last_Day_Seller
from active_seller_codes
where date between '2016-12-15' AND '2016-12-15'
and seller_code IN(
select seller_code from active_seller_codes
where date between '2016-12-14' AND '2016-12-14'
)
group by 1
Database Using: Vertica
Reading attentively, you seem to want one row in the report, with the data from the search date in the first two columns and the data of the day before the search date in the third and fourth column, like so:
sales_date|sellers_count|prev_date |prev_sellers_count
2016-12-15| 8|2016-12-14| 5
The solution could be something like this (without the first Common Table Expression, which, in my case, contains the data, but in your case, the data would be in your active_seller_codes table.
WITH
-- initial input
(sales_date,seller_code) AS (
SELECT DATE '2016-12-15',42
UNION ALL SELECT DATE '2016-12-15',43
UNION ALL SELECT DATE '2016-12-15',44
UNION ALL SELECT DATE '2016-12-15',45
UNION ALL SELECT DATE '2016-12-15',46
UNION ALL SELECT DATE '2016-12-15',47
UNION ALL SELECT DATE '2016-12-15',48
UNION ALL SELECT DATE '2016-12-15',49
UNION ALL SELECT DATE '2016-12-14',42
UNION ALL SELECT DATE '2016-12-14',44
UNION ALL SELECT DATE '2016-12-14',46
UNION ALL SELECT DATE '2016-12-14',48
UNION ALL SELECT DATE '2016-12-14',50
UNION ALL SELECT DATE '2016-12-13',42
UNION ALL SELECT DATE '2016-12-13',43
UNION ALL SELECT DATE '2016-12-13',44
UNION ALL SELECT DATE '2016-12-13',45
UNION ALL SELECT DATE '2016-12-13',46
UNION ALL SELECT DATE '2016-12-13',47
UNION ALL SELECT DATE '2016-12-13',48
UNION ALL SELECT DATE '2016-12-13',49
)
,
-- search argument this, in the real query, would come just after the WITH keyword
-- as the above would be the source table
search_dt(search_dt) AS (SELECT DATE '2016-12-15')
,
-- the two days we're interested in, de-duped
distinct_two_days AS (
SELECT DISTINCT
sales_date
, seller_code
FROM active_seller_codes
WHERE sales_date IN (
SELECT search_dt FROM search_dt -- the search date
UNION ALL SELECT search_dt - 1 FROM search_dt -- the day before
)
)
,
-- the two days we want one above the other,
-- with index for the final pivot
vertical AS (
SELECT
ROW_NUMBER() OVER (ORDER BY sales_date DESC) AS idx
, sales_date
, count(DISTINCT seller_code) AS seller_count
FROM distinct_two_days
GROUP BY 2
)
SELECT
MAX(CASE idx WHEN 1 THEN sales_date END) AS sales_date
, SUM(CASE idx WHEN 1 THEN seller_count END) AS sellers_count
, MAX(CASE idx WHEN 2 THEN sales_date END) AS prev_date
, SUM(CASE idx WHEN 2 THEN seller_count END) AS prev_sellers_count
FROM vertical
;
sales_date|sellers_count|prev_date |prev_sellers_count
2016-12-15| 8|2016-12-14| 5