Calculating compound interest with deposits/withdraws - sql

I'm trying to calculate the total on an interest-bearing account accounting for deposits/withdraws with BigQuery.
Example scenario:
Daily interest rate = 10%
Value added/removed on every day: [100, 0, 29, 0, -100] (negative means amount removed)
The totals for each day are:
Day 1: 0*1.1 + 100 = 100
Day 2: 100*1.1 + 0 = 110
Day 3: 110*1.1 + 29 = 150
Day 4: 150*1.1 + 0 = 165
Day 5: 165*1.1 - 100 = 81.5
This would be trivial to implement in a language like Python
daily_changes = [100, 0, 29, 0, -100]
interest_rate = 0.1
result = []
for day, change in enumerate(daily_changes):
if day == 0:
result.append(change)
else:
result.append(result[day-1]*(1+interest_rate) + change)
print(result)
# Result: [100, 110.00000000000001, 150.00000000000003, 165.00000000000006, 81.50000000000009]
My difficulty lies in calculating values for row N when they depend on row N-1 (the usual SUM(...) OVER (ORDER BY...) solution does not suffice here).
Here's a CTE to test with the mock data in this example.
with raw_data as (
select 1 as day, numeric '100' as change union all
select 2 as day, numeric '0' as change union all
select 3 as day, numeric '29' as change union all
select 4 as day, numeric '0' as change union all
select 5 as day, numeric '-100' as change
)
select * from raw_data

You may try below:
SELECT day,
ROUND((SELECT SUM(c * POW(1.1, day - o - 1))
FROM t.changes c WITH OFFSET o), 2) AS totals
FROM (
SELECT *, ARRAY_AGG(change) OVER (ORDER BY day) changes
FROM raw_data
) t;
+-----+--------+
| day | totals |
+-----+--------+
| 1 | 100.0 |
| 2 | 110.0 |
| 3 | 150.0 |
| 4 | 165.0 |
| 5 | 81.5 |
+-----+--------+

Another option with use of recursive CTE
with recursive raw_data as (
select 1 as day, numeric '100' as change union all
select 2 as day, numeric '0' as change union all
select 3 as day, numeric '29' as change union all
select 4 as day, numeric '0' as change union all
select 5 as day, numeric '-100' as change
), iterations as (
select *, change as total
from raw_data where day = 1
union all
select r.day, r.change, 1.1 * i.total + r.change
from iterations i join raw_data r
on r.day = i.day + 1
)
select *
from iterations
with output

Related

oracle SQL counter restarts when time difference is > x

I want to create a new column in my query whereby it takes into account the difference of the current rows datetime - previous datetime. This column could be a counter where if the difference is <-100, it stays as 1, but once there difference is > -100, the column is 0.
Ideally then I would want to only pull in the rows that come after the last 0 record.
My query:
with products as (
select * from (
select distinct
ID,
UnixDateTime,
OrderNumber,
to_date('1970-01-01','YYYY-MM-DD') + numtodsinterval(UnixDateTime,'SECOND')+ 1/24 as "Date_Time"
from DB
where
(date '1970-01-01' + UnixDateTime * interval '1' second) + interval '1' hour
> sysdate - interval '2' day
)
),
prod_prev AS (
SELECT p.*,
lag("Date_Time")over(order by "Date_Time" ASC) as Previous_Time,
lag(UnixDateTime)over(order by "Date_Time" ASC) as UnixDateTime_Previous_Time "Date_Time") - "Date_Time" AS diff
FROM products p
),
run_sum AS (
SELECT p.*, "Date_Time"-Previous_Time as "Diff", UnixDateTime_Previous_Time-UnixDateTime AS "UnixDateTime_Diff"
FROM prod_prev p
)
SELECT * FROM run_sum
ORDER By UnixDateTime, "Date_Time" DESC
my query result from above query:
ID
UnixDateTime
OrderNumber
Date_Time
Previous_Time
diff
UnixDateTime_Diff
1
1662615688
100
08-SEP-2022 06:41:28
(null)
(null)
(null)
2
1662615752
100
08-SEP-2022 06:42:32
08-SEP-2022 06:41:28
0.00074
-64
3
1662615765
100
08-SEP-2022 06:42:45
008-SEP-2022 06:42:32
0.000150
-13
4
1662615859
100
08-SEP-2022 06:44:19
08-SEP-2022 06:42:45
0.001088
-128
5
1662615987
100
08-SEP-2022 06:46:27
08-SEP-2022 06:44:19
0.00148
-44
6
1662616031
100
08-SEP-2022 06:47:11
08-SEP-2022 06:46:27
0.00051
-36
the counter is the below example should be 1 if the UnixDateTime_Diff is < -100 and 0 if its >-100
then if I could only pull in records AFTER the most recent 0 record.
You use:
lag("Date_Time")over(order by "Date_Time" DESC)
And get the previous value when the values are ordered in DESCending order; this will get the previous higher value. If you want the previous lower value then either use:
lag("Date_Time") over (order by "Date_Time" ASC)
or
lead("Date_Time") over (order by "Date_Time" DESC)
If you want to perform row-by-row processing then, from Oracle 12, you can use MATCH_RECOGNIZE:
SELECT id,
unixdatetime,
ordernumber,
date_time,
next_unixdatetime,
next_unixdatetime - unixdatetime AS diff,
CASE cls
WHEN 'WITHIN_100' THEN 1
ELSE 0
END AS within_100
from (
select distinct
ID,
UnixDateTime,
OrderNumber,
TIMESTAMP '1970-01-01 00:00:00 UTC' + UnixDateTime * INTERVAL '1' SECOND
AS Date_Time
from DB
where TIMESTAMP '1970-01-01 00:00:00 UTC' + UnixDateTime * INTERVAL '1' SECOND
> SYSTIMESTAMP - INTERVAL '2' DAY
)
MATCH_RECOGNIZE(
ORDER BY unixdatetime
MEASURES
NEXT(unixdatetime) AS next_unixdatetime,
classifier() AS cls
ALL ROWS PER MATCH
PATTERN (within_100* any_row)
DEFINE
within_100 AS NEXT(unixdatetime) < unixdatetime + 100
) m
Which, for the sample data:
CREATE TABLE db (ID, UnixDateTime, OrderNumber) AS
SELECT 1, 1662615688, 100 FROM DUAL UNION ALL
SELECT 2, 1662615752, 100 FROM DUAL UNION ALL
SELECT 3, 1662615765, 100 FROM DUAL UNION ALL
SELECT 4, 1662615859, 100 FROM DUAL UNION ALL
SELECT 5, 1662615987, 100 FROM DUAL UNION ALL
SELECT 6, 1662616031, 100 FROM DUAL;
Outputs:
ID
UNIXDATETIME
ORDERNUMBER
DATE_TIME
NEXT_UNIXDATETIME
DIFF
WITHIN_100
1
1662615688
100
2022-09-08 05:41:28.000000000 UTC
1662615752
64
1
2
1662615752
100
2022-09-08 05:42:32.000000000 UTC
1662615765
13
1
3
1662615765
100
2022-09-08 05:42:45.000000000 UTC
1662615859
94
1
4
1662615859
100
2022-09-08 05:44:19.000000000 UTC
1662615987
128
0
5
1662615987
100
2022-09-08 05:46:27.000000000 UTC
1662616031
44
1
6
1662616031
100
2022-09-08 05:47:11.000000000 UTC
null
null
0
fiddle

SQL: How to Query by Reindexing Dates to Calculate Generic Daily Prognosis

I am attempting to understand the progression of my observations that are on time relative to when they were expected, regardless of the date they were expected. Therefore, I want to reindex each observation and generate a list that starts at day 0 (on the expected day) and then calculate forward for 10 more days (arbitrary).
I am testing this in BigQuery:
CREATE TABLE `db.tbl` (
id INTEGER,
expected DATE,
actual DATE
)
INSERT INTO `db.tbl`
( id , expected , actual )
VALUES
( 1 , '2022-01-01' , '2022-01-02' ),
( 2 , '2022-01-11' , '2022-01-20' ),
( 3 , '2022-01-21' , '2022-01-20' )
So, the first row represents an observation that was "missing"/"late"/"not on time" on day 0 (2022-01-01) and then "on time" from day 1 (2022-01-02) until the end of my window of interest (day 10).
The second row represents an observation that was "late" from day 0 (2022-01-11) to day 8 (2022-01-19) and "on time" after that.
The third row represents an observation that was observed early, so it should be "on time" from day 0 through day 10.
I would want the result to be:
day count fraction
0 1 0.33
1 2 0.67
2 2 0.67
3 2 0.67
4 2 0.67
5 2 0.67
6 2 0.67
7 2 0.67
8 2 0.67
9 3 1.00
10 3 1.00
Is this possible with a SELECT statement?
CREATE TEMP TABLE sample (
id INTEGER,
expected DATE,
actual DATE
);
INSERT INTO sample
( id , expected , actual )
VALUES
( 1 , '2022-01-01' , '2022-01-02' ),
( 2 , '2022-01-11' , '2022-01-20' ),
( 3 , '2022-01-21' , '2022-01-20' );
WITH observations AS (
SELECT day, COUNTIF(v = '1') AS count, (SELECT COUNT(id) FROM sample) AS total
FROM sample,
UNNEST([IF(DATE_DIFF(actual, expected, DAY) < 0, 0, DATE_DIFF(actual, expected, DAY))]) diff,
UNNEST(SPLIT(REPEAT('0', diff) || REPEAT('1', 10 - diff), '')) v WITH OFFSET day
GROUP BY 1
)
SELECT day, count, ROUND(count / total, 2) AS fraction
FROM observations;
output:
Consider below
select day, sum(ontime) cnt, round(avg(ontime),2) fraction
from (
select day, if(dt < actual, 0, 1) ontime
from your_table,
unnest(generate_array(0,10)) day
left join unnest(generate_date_array(expected, actual)) dt with offset as day
using(day)
)
group by day
if applied to sample data in your question
with your_table as (
select 1 id, date '2022-01-01' expected, date '2022-01-02' actual union all
select 2, '2022-01-11' , '2022-01-20' union all
select 3, '2022-01-21' , '2022-01-20'
)
output is

TSQL - dates overlapping - number of days

I have the following table on SQL Server:
ID
FROM
TO
OFFER NUMBER
1
2022.01.02
9999.12.31
1
1
2022.01.02
2022.02.10
2
2
2022.01.05
2022.02.15
1
3
2022.01.02
9999.12.31
1
3
2022.01.15
2022.02.20
2
3
2022.02.03
2022.02.25
3
4
2022.01.16
2022.02.05
1
5
2022.01.17
2022.02.13
1
5
2022.02.05
2022.02.13
2
The range includes the start date but excludes the end date.
The date 9999.12.31 is given (comes from another system), but we could use the last day of the current quarter instead.
I need to find a way to determine the number of days when the customer sees exactly one, two, or three offers. The following picture shows the method upon id 3:
The expected results should be like (without using the last day of the quarter):
ID
# of days when the customer sees only 1 offer
# of days when the customer sees 2 offers
# of days when the customer sees 3 offers
1
2913863
39
0
2
41
0
0
3
2913861
24
17
4
20
0
0
5
19
8
0
I've found this article but it did not enlighten me.
Also I have limited privileges that is I am not able to declare a variable for example so I need to use "basic" TSQL.
Please provide a detailed explanation besides the code.
Thanks in advance!
The following will (for each ID) extract all distinct dates, construct non-overlapping date ranges to test, and will count up the number of offers per range. The final step is to sum and format.
The fact that the start dates are inclusive and the end dates are exclusive while sometimes non-intuitive for the human, actually works well in algorithms like this.
DECLARE #Data TABLE (Id INT, FromDate DATETIME, ToDate DATETIME, OfferNumber INT)
INSERT #Data
VALUES
(1, '2022-01-02', '9999-12-31', 1),
(1, '2022-01-02', '2022-02-10', 2),
(2, '2022-01-05', '2022-02-15', 1),
(3, '2022-01-02', '9999-12-31', 1),
(3, '2022-01-15', '2022-02-20', 2),
(3, '2022-02-03', '2022-02-25', 3),
(4, '2022-01-16', '2022-02-05', 1),
(5, '2022-01-17', '2022-02-13', 1),
(5, '2022-02-05', '2022-02-13', 2)
;
WITH Dates AS ( -- Gather distinct dates
SELECT Id, Date = FromDate FROM #Data
UNION --(distinct)
SELECT Id, Date = ToDate FROM #Data
),
Ranges AS ( --Construct non-overlapping ranges (The ToDate = NULL case will be ignored later)
SELECT ID, FromDate = Date, ToDate = LEAD(Date) OVER(PARTITION BY Id ORDER BY Date)
FROM Dates
),
Counts AS ( -- Calculate days and count offers per date range
SELECT R.Id, R.FromDate, R.ToDate,
Days = DATEDIFF(DAY, R.FromDate, R.ToDate),
Offers = COUNT(*)
FROM Ranges R
JOIN #Data D ON D.Id = R.Id
AND D.FromDate <= R.FromDate
AND D.ToDate >= R.ToDate
GROUP BY R.Id, R.FromDate, R.ToDate
)
SELECT Id
,[Days with 1 Offer] = SUM(CASE WHEN Offers = 1 THEN Days ELSE 0 END)
,[Days with 2 Offers] = SUM(CASE WHEN Offers = 2 THEN Days ELSE 0 END)
,[Days with 3 Offers] = SUM(CASE WHEN Offers = 3 THEN Days ELSE 0 END)
FROM Counts
GROUP BY Id
The WITH clause introduces Common Table Expressions (CTEs) which progressively build up intermediate results until a final select can be made.
Results:
Id
Days with 1 Offer
Days with 2 Offers
Days with 3 Offers
1
2913863
39
0
2
41
0
0
3
2913861
24
17
4
20
0
0
5
19
8
0
Alternately, the final select could use a pivot. Something like:
SELECT Id,
[Days with 1 Offer] = ISNULL([1], 0),
[Days with 2 Offers] = ISNULL([2], 0),
[Days with 3 Offers] = ISNULL([3], 0)
FROM (SELECT Id, Offers, Days FROM Counts) C
PIVOT (SUM(Days) FOR Offers IN ([1], [2], [3])) PVT
ORDER BY Id
See This db<>fiddle for a working example.
Find all date points for each ID. For each date point, find the number of overlapping.
Refer to comments within query
with
dates as
(
-- get all date points
select ID, theDate = FromDate from offers
union -- union to exclude any duplicate
select ID, theDate = ToDate from offers
),
cte as
(
select ID = d.ID,
Date_Start = d.theDate,
Date_End = LEAD(d.theDate) OVER (PARTITION BY ID ORDER BY theDate),
TheCount = c.cnt
from dates d
cross apply
(
-- Count no of overlapping
select cnt = count(*)
from offers x
where x.ID = d.ID
and x.FromDate <= d.theDate
and x.ToDate > d.theDate
) c
)
select ID, TheCount, days = sum(datediff(day, Date_Start, Date_End))
from cte
where Date_End is not null
group by ID, TheCount
order by ID, TheCount
Result :
ID
TheCount
days
1
1
2913863
1
2
39
2
1
41
3
1
2913861
3
2
29
3
3
12
4
1
20
5
1
19
5
2
8
To get to the required format, use PIVOT
dbfiddle demo

Redshift: Grouping rows by range and adding to output columns

I have data like this:
Table 1: (lots of items denoted by 1, 2, 3 etc. and with sales date in epochs and the number of sales on the given date as Number. The data only covers the last 12 weeks of sales)
Item | Sales_Date | Number
1 1587633401000 2
1 1587374201000 3
1 1585732601000 4
1 1583054201000 1
1 1582190201000 2
1 1580548601000 3
What I was as the output is a single line per item with each column showing the total sales for each individual month:
Output:
Item | Month_1_Sales | Month_2_Sales | Month_3_Sales
1 3 3 9
As the only sale that occurred happened at 1580548601000 (sales = 3), while 1583054201000 (sales = 1) and 1582190201000 (sales = 2) both occur in Month 2 etc.
So I need to split the sales dates into groups by month, sum their sales numbers, and then these numbers in columns. I am very new to SQL so don't know where to start. Would anyone be able to help?
You can extract the months from the timestamp using:
select extract(month from (timestamp 'epoch' + sales_date / 1000 * interval '1 second'))
However, I am guessing that you really want 4-week periods, because 12 weeks of data is not 3 complete months. That would make more sense to me. For the calculation, use the difference from the earliest date and then use arithmetic and conditional aggregation:
select item,
sum(case when floor((sales_date - min_sales_date) / (1000 * 60 * 60 * 24 * 4 * 7)) = 2
then number
end) as month_3_sales
sum(case when floor((sales_date - min_sales_date) / (1000 * 60 * 60 * 24 * 4 * 7)) = 1
then number
end) as month_2_sales
sum(case when floor((sales_date - min_sales_date) / (1000 * 60 * 60 * 24 * 4 * 7)) = 0
then number
end) as month_3_sales
from (select t1.*,
min(sales_date) over () as min_sales_date
from table1 t1
) t1
group by item;

Oracle first and last observation over multiple windows

I have a problem with a query in Oracle.
My table contains all of the loan applications from last year. Some of the customers have more than one application. I want to aggregate those applications as follows:
For each customer, I want to find his first application (let's call it A) in the last year and then I want to find out what was the last application in 30 days interval, counting from the first application (say B is the last one). Next, I need to find the application following B and again find for it the last one in 30 days interval, as in the previous step. What I want as the result is the table with the latest and earliest applications on each customer's interval. It is also possible that the first one is the same as the last one.
How could I do this in Oracle without plsql? Is this possible? Should I use cumulative sums of time intervals for it? (but then the starting point for each sum depends on the counted sum..)
Let's say the table has a following form:
application_id (unique) | customer_id (not unique) | create_date
1 1 2017-01-02 <- first
2 1 2017-01-10 <- middle
3 1 2017-01-30 <- last
4 1 2017-05-02 <- first and last
5 1 2017-06-02 <- first
6 1 2017-06-30 <- middle
7 1 2017-06-30 <- middle
8 1 2017-07-01 <- last
What I expect is:
application_id (unique) | customer_id (not unique) | create_date
1 1 2017-01-02 <- first
3 1 2017-01-30 <- last
4 1 2017-05-02 <- first and last
5 1 2017-06-02 <- first
8 1 2017-07-01 <- last
Thanks in advance for help.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table_name ( application_id, customer_id, create_date ) AS
SELECT 1, 1, DATE '2017-01-02' FROM DUAL UNION ALL -- <- first
SELECT 2, 1, DATE '2017-01-10' FROM DUAL UNION ALL -- <- middle
SELECT 3, 1, DATE '2017-01-30' FROM DUAL UNION ALL -- <- last
SELECT 4, 1, DATE '2017-05-02' FROM DUAL UNION ALL -- <- first and last
SELECT 5, 1, DATE '2017-06-02' FROM DUAL UNION ALL -- <- first
SELECT 6, 1, DATE '2017-06-30' FROM DUAL UNION ALL -- <- middle
SELECT 7, 1, DATE '2017-06-30' FROM DUAL UNION ALL -- <- middle
SELECT 8, 1, DATE '2017-07-01' FROM DUAL -- <- last
Query 1:
WITH data ( application_id, customer_id, create_date, first_date, grp ) AS (
SELECT t.application_id,
t.customer_id,
t.create_date,
t.create_date,
1
FROM table_name t
WHERE application_id = 1
UNION ALL
SELECT t.application_id,
t.customer_id,
t.create_date,
CASE WHEN t.create_date <= d.first_date + INTERVAL '30' DAY
THEN d.first_date
ELSE t.create_date
END,
CASE WHEN t.create_date <= d.first_date + INTERVAL '30' DAY
THEN grp
ELSE grp + 1
END
FROM data d
INNER JOIN table_name t
ON ( d.customer_id = t.customer_id
AND d.application_id + 1 = t.application_id )
)
SELECT application_id,
customer_id,
create_date,
grp
FROM (
SELECT d.*,
ROW_NUMBER() OVER ( PARTITION BY customer_id, grp ORDER BY create_date ASC ) AS rn_a,
ROW_NUMBER() OVER ( PARTITION BY customer_id, grp ORDER BY create_date DESC ) AS rn_d
FROM data d
)
WHERE rn_a = 1
OR rn_d = 1
Results:
| APPLICATION_ID | CUSTOMER_ID | CREATE_DATE | GRP |
|----------------|-------------|----------------------|-----|
| 1 | 1 | 2017-01-02T00:00:00Z | 1 |
| 3 | 1 | 2017-01-30T00:00:00Z | 1 |
| 4 | 1 | 2017-05-02T00:00:00Z | 2 |
| 5 | 1 | 2017-06-02T00:00:00Z | 3 |
| 8 | 1 | 2017-07-01T00:00:00Z | 3 |