Oracle SQL revenue YTD computation - sql

I want to write an oracle SQL query to compute monthly YTD revenue (cumulative sum) for all possible combinations of the given dimensions. There are also some months where there are no transactions and hence no revenue, in this case the previous month YTD revenue must be displayed for that dimension combination. Given table:
| Month | site | channel | type | revenue |
| ----- | ---- | ------- | ---- | ------- |
| 2017-02 | abc | 1 | A | 50 |
| 2017-04 | abc | 2 | B | 100 |
| 2018-12 | xyz | 1 | A | 150 |
Sample Desired output:
| Month | site | channel | type | ytd revenue |
| ----- | ---- | ------- | ---- | ------- |
| 2017-01 | abc | 1 | A | 0 |
| 2017-02 | abc | 1 | A | 50 |
| 2017-03 | abc | 1 | A | 50 |
| 2017-04 | abc | 1 | A | 50 |
| ------ | --- | -- | -- | --- |
| 2018-12 | abc | 1 | A | 1000 |
| ----- | -- | -- | -- | --- |
| 2017-04 | abc | 2 | A | 100 |
| ---- | --- | - | - | -- |
| 2018-12 | abc | 2 | A | 10 |
| --- | -- | - | - | -- |
| 2018-12 | xyz | 1 | A | 150 |
the fiscal year starts in 1st month and ends in 12th month. So the cumulative sum or YTD revenue must be from 1st month to 12th month every year for all dimension combinations as illustrated in the sample output above.

Use a PARTITION OUTER JOIN:
SELECT ADD_MONTHS( t.year, c.month - 1 ) AS month,
t.site,
t.channel,
t.type,
SUM( COALESCE( t.revenue, 0 ) ) OVER (
PARTITION BY t.site, t.channel, t.type, t.year
ORDER BY c.month
) AS ytd_revenue
FROM (
SELECT LEVEL AS month
FROM DUAL
CONNECT BY LEVEL <= 12
) c
LEFT OUTER JOIN (
SELECT t.*,
TRUNC( month, 'YY' ) AS year
FROM table_name t
) t
PARTITION BY ( site, channel, type, year )
ON ( c.month = EXTRACT( MONTH FROM t.month ) );
Which, for the sample data:
CREATE TABLE table_name ( Month, site, channel, type, revenue ) AS
SELECT DATE '2017-02-01', 'abc', 1, 'A', 50 FROM DUAL UNION ALL
SELECT DATE '2017-04-01', 'abc', 2, 'B', 100 FROM DUAL UNION ALL
SELECT DATE '2018-12-01', 'xyz', 1, 'A', 150 FROM DUAL;
Outputs:
MONTH | SITE | CHANNEL | TYPE | YTD_REVENUE
:------------------ | :--- | ------: | :--- | ----------:
2017-01-01 00:00:00 | abc | 1 | A | 0
2017-02-01 00:00:00 | abc | 1 | A | 50
2017-03-01 00:00:00 | abc | 1 | A | 50
2017-04-01 00:00:00 | abc | 1 | A | 50
2017-05-01 00:00:00 | abc | 1 | A | 50
2017-06-01 00:00:00 | abc | 1 | A | 50
2017-07-01 00:00:00 | abc | 1 | A | 50
2017-08-01 00:00:00 | abc | 1 | A | 50
2017-09-01 00:00:00 | abc | 1 | A | 50
2017-10-01 00:00:00 | abc | 1 | A | 50
2017-11-01 00:00:00 | abc | 1 | A | 50
2017-12-01 00:00:00 | abc | 1 | A | 50
2017-01-01 00:00:00 | abc | 2 | B | 0
2017-02-01 00:00:00 | abc | 2 | B | 0
2017-03-01 00:00:00 | abc | 2 | B | 0
2017-04-01 00:00:00 | abc | 2 | B | 100
2017-05-01 00:00:00 | abc | 2 | B | 100
2017-06-01 00:00:00 | abc | 2 | B | 100
2017-07-01 00:00:00 | abc | 2 | B | 100
2017-08-01 00:00:00 | abc | 2 | B | 100
2017-09-01 00:00:00 | abc | 2 | B | 100
2017-10-01 00:00:00 | abc | 2 | B | 100
2017-11-01 00:00:00 | abc | 2 | B | 100
2017-12-01 00:00:00 | abc | 2 | B | 100
2018-01-01 00:00:00 | xyz | 1 | A | 0
2018-02-01 00:00:00 | xyz | 1 | A | 0
2018-03-01 00:00:00 | xyz | 1 | A | 0
2018-04-01 00:00:00 | xyz | 1 | A | 0
2018-05-01 00:00:00 | xyz | 1 | A | 0
2018-06-01 00:00:00 | xyz | 1 | A | 0
2018-07-01 00:00:00 | xyz | 1 | A | 0
2018-08-01 00:00:00 | xyz | 1 | A | 0
2018-09-01 00:00:00 | xyz | 1 | A | 0
2018-10-01 00:00:00 | xyz | 1 | A | 0
2018-11-01 00:00:00 | xyz | 1 | A | 0
2018-12-01 00:00:00 | xyz | 1 | A | 150
Or, if you want the complete date range rather than just each year:
WITH calendar ( month ) AS (
SELECT ADD_MONTHS( start_month, LEVEL - 1 )
FROM (
SELECT MIN( ADD_MONTHS( TRUNC( ADD_MONTHS( month, -3 ), 'YY' ), 3 ) ) AS start_month,
ADD_MONTHS( MAX( TRUNC( ADD_MONTHS( month, -3 ), 'YY' ) ), 14 ) AS end_month
FROM table_name
)
CONNECT BY
ADD_MONTHS( start_month, LEVEL - 1 ) <= end_month
)
SELECT TO_CHAR( c.month, 'YYYY-MM' ) AS month,
t.site,
t.channel,
t.type,
SUM( COALESCE( t.revenue, 0 ) ) OVER (
PARTITION BY t.site, t.channel, t.type, TRUNC( c.month, 'YY' )
ORDER BY c.month
) AS ytd_revenue
FROM calendar c
LEFT OUTER JOIN (
SELECT t.*,
TRUNC( month, 'YY' ) AS year
FROM table_name t
) t
PARTITION BY ( site, channel, type )
ON ( c.month = t.month )
ORDER BY
site, channel, type, month;
Which outputs:
MONTH | SITE | CHANNEL | TYPE | YTD_REVENUE
:------------------ | :--- | ------: | :--- | ----------:
2017-01-01 00:00:00 | abc | 1 | A | 0
2017-02-01 00:00:00 | abc | 1 | A | 50
2017-03-01 00:00:00 | abc | 1 | A | 50
2017-04-01 00:00:00 | abc | 1 | A | 50
2017-05-01 00:00:00 | abc | 1 | A | 50
2017-06-01 00:00:00 | abc | 1 | A | 50
2017-07-01 00:00:00 | abc | 1 | A | 50
2017-08-01 00:00:00 | abc | 1 | A | 50
2017-09-01 00:00:00 | abc | 1 | A | 50
2017-10-01 00:00:00 | abc | 1 | A | 50
2017-11-01 00:00:00 | abc | 1 | A | 50
2017-12-01 00:00:00 | abc | 1 | A | 50
2018-01-01 00:00:00 | abc | 1 | A | 0
2018-02-01 00:00:00 | abc | 1 | A | 0
2018-03-01 00:00:00 | abc | 1 | A | 0
2018-04-01 00:00:00 | abc | 1 | A | 0
2018-05-01 00:00:00 | abc | 1 | A | 0
2018-06-01 00:00:00 | abc | 1 | A | 0
2018-07-01 00:00:00 | abc | 1 | A | 0
2018-08-01 00:00:00 | abc | 1 | A | 0
2018-09-01 00:00:00 | abc | 1 | A | 0
2018-10-01 00:00:00 | abc | 1 | A | 0
2018-11-01 00:00:00 | abc | 1 | A | 0
2018-12-01 00:00:00 | abc | 1 | A | 0
2017-01-01 00:00:00 | abc | 2 | B | 0
2017-02-01 00:00:00 | abc | 2 | B | 0
2017-03-01 00:00:00 | abc | 2 | B | 0
2017-04-01 00:00:00 | abc | 2 | B | 100
2017-05-01 00:00:00 | abc | 2 | B | 100
2017-06-01 00:00:00 | abc | 2 | B | 100
2017-07-01 00:00:00 | abc | 2 | B | 100
2017-08-01 00:00:00 | abc | 2 | B | 100
2017-09-01 00:00:00 | abc | 2 | B | 100
2017-10-01 00:00:00 | abc | 2 | B | 100
2017-11-01 00:00:00 | abc | 2 | B | 100
2017-12-01 00:00:00 | abc | 2 | B | 100
2018-01-01 00:00:00 | abc | 2 | B | 0
2018-02-01 00:00:00 | abc | 2 | B | 0
2018-03-01 00:00:00 | abc | 2 | B | 0
2018-04-01 00:00:00 | abc | 2 | B | 0
2018-05-01 00:00:00 | abc | 2 | B | 0
2018-06-01 00:00:00 | abc | 2 | B | 0
2018-07-01 00:00:00 | abc | 2 | B | 0
2018-08-01 00:00:00 | abc | 2 | B | 0
2018-09-01 00:00:00 | abc | 2 | B | 0
2018-10-01 00:00:00 | abc | 2 | B | 0
2018-11-01 00:00:00 | abc | 2 | B | 0
2018-12-01 00:00:00 | abc | 2 | B | 0
2017-01-01 00:00:00 | xyz | 1 | A | 0
2017-02-01 00:00:00 | xyz | 1 | A | 0
2017-03-01 00:00:00 | xyz | 1 | A | 0
2017-04-01 00:00:00 | xyz | 1 | A | 0
2017-05-01 00:00:00 | xyz | 1 | A | 0
2017-06-01 00:00:00 | xyz | 1 | A | 0
2017-07-01 00:00:00 | xyz | 1 | A | 0
2017-08-01 00:00:00 | xyz | 1 | A | 0
2017-09-01 00:00:00 | xyz | 1 | A | 0
2017-10-01 00:00:00 | xyz | 1 | A | 0
2017-11-01 00:00:00 | xyz | 1 | A | 0
2017-12-01 00:00:00 | xyz | 1 | A | 0
2018-01-01 00:00:00 | xyz | 1 | A | 0
2018-02-01 00:00:00 | xyz | 1 | A | 0
2018-03-01 00:00:00 | xyz | 1 | A | 0
2018-04-01 00:00:00 | xyz | 1 | A | 0
2018-05-01 00:00:00 | xyz | 1 | A | 0
2018-06-01 00:00:00 | xyz | 1 | A | 0
2018-07-01 00:00:00 | xyz | 1 | A | 0
2018-08-01 00:00:00 | xyz | 1 | A | 0
2018-09-01 00:00:00 | xyz | 1 | A | 0
2018-10-01 00:00:00 | xyz | 1 | A | 0
2018-11-01 00:00:00 | xyz | 1 | A | 0
2018-12-01 00:00:00 | xyz | 1 | A | 150
db<>fiddle here
Fiscal Years (April to March):
WITH calendar ( month ) AS (
SELECT ADD_MONTHS( start_month, LEVEL - 1 )
FROM (
SELECT MIN( TRUNC( ADD_MONTHS( month, -3 ), 'YY' ) ) AS start_month,
ADD_MONTHS( MAX( TRUNC( ADD_MONTHS( month, -3 ), 'YY' ) ), 11 ) AS end_month
FROM table_name
)
CONNECT BY
ADD_MONTHS( start_month, LEVEL - 1 ) <= end_month
)
SELECT TO_CHAR( ADD_MONTHS( c.month, 3 ), 'YYYY-MM' ) AS month,
t.site,
t.channel,
t.type,
SUM( COALESCE( t.revenue, 0 ) ) OVER (
PARTITION BY t.site, t.channel, t.type, TRUNC( c.month, 'YY' )
ORDER BY c.month
) AS ytd_revenue
FROM calendar c
LEFT OUTER JOIN (
SELECT ADD_MONTHS( month, -3 ) AS month,
site,
channel,
type,
revenue,
TRUNC( ADD_MONTHS( month, -3 ), 'YY' ) AS year
FROM table_name t
) t
PARTITION BY ( site, channel, type )
ON ( c.month = t.month )
ORDER BY
site, channel, type, month;
db<>fiddle here

If I understand correctly, you can use cross join to get all the rows and then left join and a cumulative sum to get the most recent value:
select m.month, sc.site, sc.channel, sc.type,
sum(revenue) over (partition by sc.site, sc.channel, sc.type, trunc(m.month, 'YYYY') order by m.month) as ytd_revenue
from (select distinct month from t) m cross join
(select distinct site, channel, type from t) sct left join
t
on t.month = m.month and t.site = sct.site and
t.channel = sc.channel and t.type = sct.type;
This assumes that all months are available in the data. If not, you need to generate the months . . . either with an explicit list or using some sort of generator such as:
with months(month) as (
select date '2019-01-01' as month
from dual
union all
select month + interval '1' month
from months
where month < date '2021-1-01'
)

Related

cumulative amount to current_date

base_table
month id sales cumulative_sales
2021-01-01 33205 10 10
2021-02-01 33205 15 25
Based on the base table above, I would like to add more rows up to the current month,
even if there is no sales.
Expected table
month id sales cumulative_sales
2021-01-01 33205 10 10
2021-02-01 33205 15 25
2021-03-01 33205 0 25
2021-04-01 33205 0 25
2021-05-01 33205 0 25
.........
2021-11-01 33205 0 25
My query stops at
select month, id, sales,
sum(sales) over (partition by id
order by month
rows between unbounded preceding and current row) as cumulative_sales
from base_table
This works. Assumes the month column is constrained to hold only "first of the month" dates. Use the desired hard-coded start date, or use another CTE to get the earliest date from base_table:
with base_table as (
select *
from (values
('2021-01-01'::date,33205,10)
,('2021-02-01' ,33205,15)
,('2021-01-01' ,12345,99)
,('2021-04-01' ,12345,88)
) dat("month",id,sales)
)
select cal.dt::date
,list.id
,coalesce(dat.sales,0) as sales
,coalesce(sum(dat.sales) over (partition by list.id order by cal.dt),0) as cumulative_sales
from generate_series('2020-06-01' /* use desired start date here */,current_date,'1 month') cal(dt)
cross join (select distinct id from base_table) list
left join base_table dat on dat."month" = cal.dt and dat.id = list.id
;
Results:
| dt | id | sales | cumulative_sales |
+------------+-------+-------+------------------+
| 2020-06-01 | 12345 | 0 | 0 |
| 2020-07-01 | 12345 | 0 | 0 |
| 2020-08-01 | 12345 | 0 | 0 |
| 2020-09-01 | 12345 | 0 | 0 |
| 2020-10-01 | 12345 | 0 | 0 |
| 2020-11-01 | 12345 | 0 | 0 |
| 2020-12-01 | 12345 | 0 | 0 |
| 2021-01-01 | 12345 | 99 | 99 |
| 2021-02-01 | 12345 | 0 | 99 |
| 2021-03-01 | 12345 | 0 | 99 |
| 2021-04-01 | 12345 | 88 | 187 |
| 2021-05-01 | 12345 | 0 | 187 |
| 2021-06-01 | 12345 | 0 | 187 |
| 2021-07-01 | 12345 | 0 | 187 |
| 2021-08-01 | 12345 | 0 | 187 |
| 2021-09-01 | 12345 | 0 | 187 |
| 2021-10-01 | 12345 | 0 | 187 |
| 2021-11-01 | 12345 | 0 | 187 |
| 2020-06-01 | 33205 | 0 | 0 |
| 2020-07-01 | 33205 | 0 | 0 |
| 2020-08-01 | 33205 | 0 | 0 |
| 2020-09-01 | 33205 | 0 | 0 |
| 2020-10-01 | 33205 | 0 | 0 |
| 2020-11-01 | 33205 | 0 | 0 |
| 2020-12-01 | 33205 | 0 | 0 |
| 2021-01-01 | 33205 | 10 | 10 |
| 2021-02-01 | 33205 | 15 | 25 |
| 2021-03-01 | 33205 | 0 | 25 |
| 2021-04-01 | 33205 | 0 | 25 |
| 2021-05-01 | 33205 | 0 | 25 |
| 2021-06-01 | 33205 | 0 | 25 |
| 2021-07-01 | 33205 | 0 | 25 |
| 2021-08-01 | 33205 | 0 | 25 |
| 2021-09-01 | 33205 | 0 | 25 |
| 2021-10-01 | 33205 | 0 | 25 |
| 2021-11-01 | 33205 | 0 | 25 |
The cross join pairs every date output by generate_series() with every id value from base_table.
The left join ensures that no dt+id pairs get dropped from the output when no such record exists in base_table.
The coalesce() functions ensure that the sales and cumulative_sales show 0 instead of null for dt+id combinations that don't exist in base_table. Remove them if you don't mind seeing nulls in those columns.

Redshift SQL - Count Sequences of Repeating Values Within Groups

I have a table that looks like this:
| id | date_start | gap_7_days |
| -- | ------------------- | --------------- |
| 1 | 2021-06-10 00:00:00 | 0 |
| 1 | 2021-06-13 00:00:00 | 0 |
| 1 | 2021-06-19 00:00:00 | 0 |
| 1 | 2021-06-27 00:00:00 | 0 |
| 2 | 2021-07-04 00:00:00 | 1 |
| 2 | 2021-07-11 00:00:00 | 1 |
| 2 | 2021-07-18 00:00:00 | 1 |
| 2 | 2021-07-25 00:00:00 | 1 |
| 2 | 2021-08-01 00:00:00 | 1 |
| 2 | 2021-08-08 00:00:00 | 1 |
| 2 | 2021-08-09 00:00:00 | 0 |
| 2 | 2021-08-16 00:00:00 | 1 |
| 2 | 2021-08-23 00:00:00 | 1 |
| 2 | 2021-08-30 00:00:00 | 1 |
| 2 | 2021-08-31 00:00:00 | 0 |
| 2 | 2021-09-01 00:00:00 | 0 |
| 2 | 2021-08-08 00:00:00 | 1 |
| 2 | 2021-08-15 00:00:00 | 1 |
| 2 | 2021-08-22 00:00:00 | 1 |
| 2 | 2021-08-23 00:00:00 | 1 |
For each ID, I check whether consecutive date_start values are 7 days apart, and put a 1 or 0 in gap_7_days accordingly.
I want to do the following (using Redshift SQL only):
Get the length of each sequence of consecutive 1s in gap_7_days for each ID
Expected output:
| id | date_start | gap_7_days | sequence_length |
| -- | ------------------- | --------------- | --------------- |
| 1 | 2021-06-10 00:00:00 | 0 | |
| 1 | 2021-06-13 00:00:00 | 0 | |
| 1 | 2021-06-19 00:00:00 | 0 | |
| 1 | 2021-06-27 00:00:00 | 0 | |
| 2 | 2021-07-04 00:00:00 | 1 | 6 |
| 2 | 2021-07-11 00:00:00 | 1 | 6 |
| 2 | 2021-07-18 00:00:00 | 1 | 6 |
| 2 | 2021-07-25 00:00:00 | 1 | 6 |
| 2 | 2021-08-01 00:00:00 | 1 | 6 |
| 2 | 2021-08-08 00:00:00 | 1 | 6 |
| 2 | 2021-08-09 00:00:00 | 0 | |
| 2 | 2021-08-16 00:00:00 | 1 | 3 |
| 2 | 2021-08-23 00:00:00 | 1 | 3 |
| 2 | 2021-08-30 00:00:00 | 1 | 3 |
| 2 | 2021-08-31 00:00:00 | 0 | |
| 2 | 2021-09-01 00:00:00 | 0 | |
| 2 | 2021-08-08 00:00:00 | 1 | 4 |
| 2 | 2021-08-15 00:00:00 | 1 | 4 |
| 2 | 2021-08-22 00:00:00 | 1 | 4 |
| 2 | 2021-08-23 00:00:00 | 1 | 4 |
Get the number of sequences for each ID
Expected output:
| id | num_sequences |
| -- | ------------------- |
| 1 | 0 |
| 2 | 3 |
How can I achieve this?
If you want the number of sequences, just look at the previous value. When the current value is "1" and the previous is NULL or 0, then you have a new sequence.
So:
select id,
sum( (gap_7_days = 1 and coalesce(prev_gap_7_days, 0) = 0)::int ) as num_sequences
from (select t.*,
lag(gap_7_days) over (partition by id order by date_start) as prev_gap_7_days
from t
) t
group by id;
If you actually want the lengths of the sequences, as in the intermediate results, then ask a new question. That information is not needed for this question.

Merge historical periods of an dimension entity into one

I have a Slowly Changing Dimension type 2 with rows that are identical (besides the Start and End date). How do I write a pretty SQL query to merge rows that identical and have connected time periods?
Current data
+-------------+---------------------+--------------+------------+
| DimensionID | DimensionAttribute | RowStartDate | RowEndDate |
+-------------+---------------------+--------------+------------+
| 1 | SomeValue | 2019-01-01 | 2019-01-31 |
| 1 | SomeValue | 2019-02-01 | 2019-02-28 |
| 1 | AnotherValue | 2019-03-01 | 2019-03-31 |
| 1 | SomeValue | 2019-04-01 | 2019-04-30 |
| 1 | SomeValue | 2019-05-01 | 2019-05-31 |
| 2 | SomethingElse | 2019-01-01 | 2019-01-31 |
| 2 | SomethingElse | 2019-02-01 | 2019-02-28 |
| 2 | SomethingElse | 2019-03-01 | 2019-03-31 |
| 2 | CompletelyDifferent | 2019-04-01 | 2019-04-30 |
| 2 | SomethingElse | 2019-05-01 | 2019-05-31 |
+-------------+---------------------+--------------+------------+
Result
+-------------+---------------------+--------------+------------+
| DimensionID | DimensionAttribute | RowStartDate | RowEndDate |
+-------------+---------------------+--------------+------------+
| 1 | SomeValue | 2019-01-01 | 2019-02-28 |
| 1 | AnotherValue | 2019-03-01 | 2019-03-31 |
| 1 | SomeValue | 2019-04-01 | 2019-05-31 |
| 2 | SomethingElse | 2019-01-01 | 2019-03-31 |
| 2 | CompletelyDifferent | 2019-04-01 | 2019-04-30 |
| 2 | SomethingElse | 2019-05-01 | 2019-05-31 |
+-------------+---------------------+--------------+------------+
For this version of the problem, I would use lag() to determine where the groups start, then a cumulative sum and aggregation:
select dimensionid, DimensionAttribute,
min(row_start_date), max(row_end_date)
from (select t.*,
sum(case when prev_red = dateadd(day, -1, row_start_date)
then 0 else 1
end) over (partition by dimensionid, DimensionAttribute order by row_start_date) as grp
from (select t.*,
lag(row_end_date) over (partition by dimensionid, DimensionAttribute order by row_start_date) as prev_red
from t
) t
) t
group by dimensionid, DimensionAttribute, grp;
In particular, this will recognize gaps in the rows. It will only combine rows when they exactly fit together -- the previous end date is one day before the start date. This can be tweaked, of course, to allow a gap of 1 or 2 days or to allow overlaps.

Aggregating tsrange values into day buckets with a tie-breaker

So I've got a schema that lets people donate $ to a set of organizations, and that donation is tied to a certain arbitrary period of time. I'm working on a report that looks at each day, and for each organization shows the total number of donations and the total cumulative value of those donations for that organization's day.
For example, here's a mockup of 3 donors, Alpha (orange), Bravo (green), and Charlie (Blue) donating to 2 different organizations (Foo and Bar) over various time periods:
I've created a SQLFiddle that implements the above example in a schema that somewhat reflects what I'm working with in reality: http://sqlfiddle.com/#!17/88969/1
(The schema is broken out into more tables than what you'd come up with given the problem statement to better reflect the real-life version I'm working with)
So far, the query that I've managed to put together looks like this:
WITH report_dates AS (
SELECT '2018-01-01'::date + g AS date
FROM generate_series(0, 14) g
), organizations AS (
SELECT id AS organization_id FROM users
WHERE type = 'Organization'
)
SELECT * FROM report_dates rd
CROSS JOIN organizations o
LEFT JOIN LATERAL (
SELECT
COALESCE(sum(doa.amount_cents), 0) AS total_donations_cents,
COALESCE(count(doa.*), 0) AS total_donors
FROM users
LEFT JOIN donor_organization_amounts doa ON doa.organization_id = users.id
LEFT JOIN donor_amounts da ON da.id = doa.donor_amounts_id
LEFT JOIN donor_schedules ds ON ds.donor_amounts_id = da.id
WHERE (users.id = o.organization_id) AND (ds.period && tsrange(rd.date::timestamp, rd.date::timestamp + INTERVAL '1 day', '[)'))
) o2 ON true;
With the results looking like this:
| date | organization_id | total_donations_cents | total_donors |
|------------|-----------------|-----------------------|--------------|
| 2018-01-01 | 1 | 0 | 0 |
| 2018-01-02 | 1 | 250 | 1 |
| 2018-01-03 | 1 | 250 | 1 |
| 2018-01-04 | 1 | 1750 | 3 |
| 2018-01-05 | 1 | 1750 | 3 |
| 2018-01-06 | 1 | 1750 | 3 |
| 2018-01-07 | 1 | 750 | 2 |
| 2018-01-08 | 1 | 850 | 2 |
| 2018-01-09 | 1 | 850 | 2 |
| 2018-01-10 | 1 | 500 | 1 |
| 2018-01-11 | 1 | 500 | 1 |
| 2018-01-12 | 1 | 500 | 1 |
| 2018-01-13 | 1 | 1500 | 2 |
| 2018-01-14 | 1 | 1000 | 1 |
| 2018-01-15 | 1 | 0 | 0 |
| 2018-01-01 | 2 | 0 | 0 |
| 2018-01-02 | 2 | 250 | 1 |
| 2018-01-03 | 2 | 250 | 1 |
| 2018-01-04 | 2 | 1750 | 2 |
| 2018-01-05 | 2 | 1750 | 2 |
| 2018-01-06 | 2 | 1750 | 2 |
| 2018-01-07 | 2 | 1750 | 2 |
| 2018-01-08 | 2 | 2000 | 2 |
| 2018-01-09 | 2 | 2000 | 2 |
| 2018-01-10 | 2 | 1500 | 1 |
| 2018-01-11 | 2 | 1500 | 1 |
| 2018-01-12 | 2 | 0 | 0 |
| 2018-01-13 | 2 | 1000 | 2 |
| 2018-01-14 | 2 | 500 | 1 |
| 2018-01-15 | 2 | 0 | 0 |
That's pretty close, however the problem with this query is that on days where a donation ends and that same donor begins a new one, it should only count that donor's donation one time, using the higher amount donation as a tie-breaker for the cumulative $ count. An example of that is on 2018-01-13 for organization Foo: total_donors should be 1 and total_donations_cents 1000.
I tried to implement a tie-breaker for using DISTINCT ON but I got off into the weeds... any help would be appreciated!
Also, should I be worried about the performance implications of my implementation so far, given the CTEs and the CROSS JOIN?
Figured it out using DISTINCT ON: http://sqlfiddle.com/#!17/88969/4
WITH report_dates AS (
SELECT '2018-01-01'::date + g AS date
FROM generate_series(0, 14) g
), organizations AS (
SELECT id AS organization_id FROM users
WHERE type = 'Organization'
), donors_by_date AS (
SELECT * FROM report_dates rd
CROSS JOIN organizations o
LEFT JOIN LATERAL (
SELECT DISTINCT ON (date, da.donor_id)
da.donor_id,
doa.id,
doa.donor_amounts_id,
doa.amount_cents
FROM users
LEFT JOIN donor_organization_amounts doa ON doa.organization_id = users.id
LEFT JOIN donor_amounts da ON da.id = doa.donor_amounts_id
LEFT JOIN donor_schedules ds ON ds.donor_amounts_id = da.id
WHERE (users.id = o.organization_id) AND (ds.period && tsrange(rd.date::timestamp, rd.date::timestamp + INTERVAL '1 day', '[)'))
ORDER BY date, da.donor_id, doa.amount_cents DESC
) foo ON true
)
SELECT
date,
organization_id,
COALESCE(SUM(amount_cents), 0) AS total_donations_cents,
COUNT(*) FILTER (WHERE donor_id IS NOT NULL) AS total_donors
FROM donors_by_date
GROUP BY date, organization_id
ORDER BY organization_id, date;
Result:
| date | organization_id | total_donations_cents | total_donors |
|------------|-----------------|-----------------------|--------------|
| 2018-01-01 | 1 | 0 | 0 |
| 2018-01-02 | 1 | 250 | 1 |
| 2018-01-03 | 1 | 250 | 1 |
| 2018-01-04 | 1 | 1750 | 3 |
| 2018-01-05 | 1 | 1750 | 3 |
| 2018-01-06 | 1 | 1750 | 3 |
| 2018-01-07 | 1 | 750 | 2 |
| 2018-01-08 | 1 | 850 | 2 |
| 2018-01-09 | 1 | 850 | 2 |
| 2018-01-10 | 1 | 500 | 1 |
| 2018-01-11 | 1 | 500 | 1 |
| 2018-01-12 | 1 | 500 | 1 |
| 2018-01-13 | 1 | 1000 | 1 |
| 2018-01-14 | 1 | 1000 | 1 |
| 2018-01-15 | 1 | 0 | 0 |
| 2018-01-01 | 2 | 0 | 0 |
| 2018-01-02 | 2 | 250 | 1 |
| 2018-01-03 | 2 | 250 | 1 |
| 2018-01-04 | 2 | 1750 | 2 |
| 2018-01-05 | 2 | 1750 | 2 |
| 2018-01-06 | 2 | 1750 | 2 |
| 2018-01-07 | 2 | 1750 | 2 |
| 2018-01-08 | 2 | 2000 | 2 |
| 2018-01-09 | 2 | 2000 | 2 |
| 2018-01-10 | 2 | 1500 | 1 |
| 2018-01-11 | 2 | 1500 | 1 |
| 2018-01-12 | 2 | 0 | 0 |
| 2018-01-13 | 2 | 1000 | 2 |
| 2018-01-14 | 2 | 500 | 1 |
| 2018-01-15 | 2 | 0 | 0 |

Get last value with delta from previous row

I have data
| account | type | position | created_date |
|---------|------|----------|------|
| 1 | 1 | 1 | 2016-08-01 00:00:00 |
| 2 | 1 | 2 | 2016-08-01 00:00:00 |
| 1 | 2 | 2 | 2016-08-01 00:00:00 |
| 2 | 2 | 1 | 2016-08-01 00:00:00 |
| 1 | 1 | 2 | 2016-08-02 00:00:00 |
| 2 | 1 | 1 | 2016-08-02 00:00:00 |
| 1 | 2 | 1 | 2016-08-03 00:00:00 |
| 2 | 2 | 2 | 2016-08-03 00:00:00 |
| 1 | 1 | 2 | 2016-08-04 00:00:00 |
| 2 | 1 | 1 | 2016-08-04 00:00:00 |
| 1 | 2 | 2 | 2016-08-07 00:00:00 |
| 2 | 2 | 1 | 2016-08-07 00:00:00 |
I need to get last positions (account, type, position) and delta from previous position. I'm trying to use Window functions but only get all rows and can't grouping them/get last.
SELECT
account,
type,
FIRST_VALUE(position) OVER w AS position,
FIRST_VALUE(position) OVER w - LEAD(position, 1, 0) OVER w AS delta,
created_date
FROM table
WINDOW w AS (PARTITION BY account ORDER BY created_date DESC)
I have result
| account | type | position | delta | created_date |
|---------|------|----------|-------|--------------|
| 1 | 1 | 1 | 1 | 2016-08-01 00:00:00 |
| 1 | 1 | 2 | 1 | 2016-08-02 00:00:00 |
| 1 | 1 | 2 | 0 | 2016-08-04 00:00:00 |
| 1 | 2 | 2 | 2 | 2016-08-01 00:00:00 |
| 1 | 2 | 1 | -1 | 2016-08-03 00:00:00 |
| 1 | 2 | 2 | 1 | 2016-08-07 00:00:00 |
| 2 | 1 | 2 | 2 | 2016-08-01 00:00:00 |
| 2 | 2 | 1 | 1 | 2016-08-01 00:00:00 |
| and so on |
but i need only last record for each account/type pair
| account | type | position | delta | created_date |
|---------|------|----------|-------|--------------|
| 1 | 1 | 2 | 0 | 2016-08-04 00:00:00 |
| 1 | 2 | 2 | 1 | 2016-08-07 00:00:00 |
| 2 | 1 | 1 | 0 | 2016-08-04 00:00:00 |
| and so on |
Sorry for my bad language and Thanks for any help.
My "best" try..
WITH cte_delta AS (
SELECT
account,
type,
FIRST_VALUE(position) OVER w AS position,
FIRST_VALUE(position) OVER w - LEAD(position, 1, 0) OVER w AS delta,
created_date
FROM table
WINDOW w AS (PARTITION BY account ORDER BY created_date DESC)
),
cte_date AS (
SELECT
account,
type,
MAX(created_date) AS created_date
FROM cte_delta
GROUP BY account, type
)
SELECT cd.*
FROM
cte_delta cd,
cte_date ct
WHERE
cd.account = ct.account
AND cd.type = ct.type
AND cd.created_date = ct.created_date