I have table load_ext which is an external table for the below file structure
customer
interval_type
data_count
Start_time
interval1
interval2
interval3
,..interval24
67891
60
5
06022022040000AM
0.07
0.767
0.65
0.69
0
0...
12345
60
8
06022022120000PM
0.07
0.767
0.65
0.69
0.767
0.69
0
0
To explain the above columns, All columns are varchar2. Interval type is in minutes, data_count column says the number of intervals to be posted starting from the start_time column, Interval1 is the value for 00:00:00 to 01:00:00 AM and likewise. Target table will have the same structure but the above intervals should be moved to the respective columns. For example, the value of interval1 column in the first row should be moved to column interval4 and the same for all other columns to the respective interval periods.
My target table should have the data like below
customer
interval_type
data_count
Start_time
interval1
interval2
interval3
interval4
interval5
..interval24
67891
60
5
06022022040000AM
0
0
0
0.07
0.767
0.65
0.81
0
0
12345
60
8
06022022120000PM
0
0
0
0
0
0
0
0
0.07
0.65
0.07
0.65
0
0...
I am providing the table data with ',' delimiter as the table structure is too big to post in the same format. This has to be done in Oracle only, we are using Oracle 19.
Unpivot the columns to rows, add the hours from the start time to the interval and then pivot back to columns:
SELECT *
FROM (
SELECT customer,
interval_type
data_count,
start_time,
MOD(
interval_name + TO_CHAR(TO_DATE(start_time, 'DDMMYYYYHH12MISSAM'), 'HH24'),
24
) AS interval_name,
value
FROM table_name
UNPIVOT (value FOR interval_name IN (
interval1 AS 00,
interval2 AS 01,
interval3 AS 02,
interval4 AS 03,
interval5 AS 04,
interval6 AS 05,
interval7 AS 06,
interval8 AS 07,
interval9 AS 08,
interval10 AS 09,
interval11 AS 10,
interval12 AS 11,
interval13 AS 12,
interval14 AS 13,
interval15 AS 14,
interval16 AS 15,
interval17 AS 16,
interval18 AS 17,
interval19 AS 18,
interval20 AS 19,
interval21 AS 20,
interval22 AS 21,
interval23 AS 22,
interval24 AS 23
))
)
PIVOT (
MAX(value) FOR interval_name IN (
00 AS interval24,
01 AS interval1,
02 AS interval2,
03 AS interval3,
04 AS interval4,
05 AS interval5,
06 AS interval6,
07 AS interval7,
08 AS interval8,
09 AS interval9,
10 AS interval10,
11 AS interval11,
12 AS interval12,
13 AS interval13,
14 AS interval14,
15 AS interval15,
16 AS interval16,
17 AS interval17,
18 AS interval18,
19 AS interval19,
20 AS interval20,
21 AS interval21,
22 AS interval22,
23 AS interval23
)
);
Which, for the sample data:
CREATE TABLE table_name (
customer,
interval_type,
data_count,
Start_time,
interval1,
interval2,
interval3,
interval4,
interval5,
interval6,
interval7,
interval8,
interval9,
interval10,
interval11,
interval12,
interval13,
interval14,
interval15,
interval16,
interval17,
interval18,
interval19,
interval20,
interval21,
interval22,
interval23,
interval24
) AS
SELECT 67891, 60, 5, '06022022040000AM', 0.07, 0.767, 0.65, 0.69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 FROM DUAL UNION ALL
SELECT 12345, 60, 8, '06022022120000PM', 0.07, 0.767, 0.65, 0.69, 0.767, 0.69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 FROM DUAL;
Outputs:
CUSTOMER
DATA_COUNT
START_TIME
INTERVAL24
INTERVAL1
INTERVAL2
INTERVAL3
INTERVAL4
INTERVAL5
INTERVAL6
INTERVAL7
INTERVAL8
INTERVAL9
INTERVAL10
INTERVAL11
INTERVAL12
INTERVAL13
INTERVAL14
INTERVAL15
INTERVAL16
INTERVAL17
INTERVAL18
INTERVAL19
INTERVAL20
INTERVAL21
INTERVAL22
INTERVAL23
12345
60
06022022120000PM
0
0
0
0
0
0
0
0
0
0
0
0
.07
.767
.65
.69
.767
.69
0
0
0
0
0
0
67891
60
06022022040000AM
0
0
0
0
.07
.767
.65
.69
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
db<>fiddle here
Here is one way to deal with it. First UNPIVOT the intervals, then index them (COLUMN_NO) and use MODEL clause to shift the values according to DATA_COUNT and finaly PIVOT it back again. Here is the sample with 12 intervals:
WITH
tbl AS
(
SELECT '67891' "CUSTOMER",
'60' "INTERVAL_TYPE",
'5' "DATA_COUNT",
'06022022040000AM' "START_TIME",
'0.07' "INTERVAL1",
'0.767' "INTERVAL2",
'0.65' "INTERVAL3",
'0.69' "INTERVAL4",
'0.62' "INTERVAL5",
'0.61' "INTERVAL6",
'0.70' "INTERVAL7",
'0.68' "INTERVAL8",
'0.62' "INTERVAL9",
'0.59' "INTERVAL10",
'0.69' "INTERVAL11",
'0.60' "INTERVAL12" FROM DUAL UNION ALL
--
SELECT '12345' "CUSTOMER",
'60' "INTERVAL_TYPE",
'8' "DATA_COUNT",
'06022022120000PM' "START_TIME",
'0.07' "INTERVAL1",
'0.767' "INTERVAL2",
'0.65' "INTERVAL3",
'0.69' "INTERVAL4",
'0.767' "INTERVAL5",
'0.69' "INTERVAL6",
'0.70' "INTERVAL7",
'0.68' "INTERVAL8",
'0.62' "INTERVAL9",
'0.59' "INTERVAL10",
'0.69' "INTERVAL11",
'0.60' "INTERVAL12" FROM DUAL
),
datarows AS
(
SELECT CUSTOMER, INTERVAL_TYPE, DATA_COUNT, START_TIME,
VALUE_NAME, VALUE_OF
FROM tbl
UNPIVOT (
VALUE_OF FOR VALUE_NAME
IN (INTERVAL1, INTERVAL2, INTERVAL3, INTERVAL4, INTERVAL5, INTERVAL6,
INTERVAL7, INTERVAL8, INTERVAL9, INTERVAL10, INTERVAL11, INTERVAL12)
)
),
dataset AS
(
SELECT
CUSTOMER, INTERVAL_TYPE, DATA_COUNT, START_TIME,
REPLACE(VALUE_NAME, 'INTERVAL', '') "COLUMN_NO",
VALUE_NAME,
VALUE_OF,
VALUE_OF "ORIG_VALUE"
FROM
datarows
),
combined AS
(
SELECT
CUSTOMER,
INTERVAL_TYPE,
DATA_COUNT,
START_TIME,
COLUMN_NO,
VALUE_NAME,
Nvl(VALUE_OF, '0') "VALUE_OF",
ORIG_VALUE
FROM
dataset
MODEL
PARTITION BY (CUSTOMER)
DIMENSION BY (COLUMN_NO, DATA_COUNT)
MEASURES (VALUE_OF, VALUE_NAME, INTERVAL_TYPE, START_TIME, ORIG_VALUE)
RULES ITERATE(12)
(
VALUE_OF[ITERATION_NUMBER + 1, ANY] = CASE
WHEN CV(COLUMN_NO) - To_Number(CV(DATA_COUNT)) + 1 < 0 - To_Number(CV(DATA_COUNT)) - 1 --CV(COLUMN_NO)
THEN '0'
ELSE
ORIG_VALUE[To_Char(To_Number(CV(COLUMN_NO)) - To_Number(CV(DATA_COUNT)) + 2), CV(DATA_COUNT)]
END
)
)
SELECT * FROM
(
SELECT
CUSTOMER,
INTERVAL_TYPE,
DATA_COUNT,
START_TIME,
VALUE_NAME,
VALUE_OF
FROM
combined
WHERE CUSTOMER = '67891'
)
PIVOT(
MAX(VALUE_OF)
FOR VALUE_NAME
IN ('INTERVAL1', 'INTERVAL2', 'INTERVAL3', 'INTERVAL4', 'INTERVAL5', 'INTERVAL6',
'INTERVAL7', 'INTERVAL8', 'INTERVAL9', 'INTERVAL10', 'INTERVAL11', 'INTERVAL12')
)
UNION ALL
SELECT * FROM
(
SELECT
CUSTOMER,
INTERVAL_TYPE,
DATA_COUNT,
START_TIME,
VALUE_NAME,
VALUE_OF
FROM
combined
WHERE CUSTOMER = '12345'
)
PIVOT(
MAX(VALUE_OF)
FOR VALUE_NAME
IN ('INTERVAL1%' AS INTERV1, 'INTERVAL2', 'INTERVAL3', 'INTERVAL4', 'INTERVAL5', 'INTERVAL6',
'INTERVAL7', 'INTERVAL8', 'INTERVAL9', 'INTERVAL10', 'INTERVAL11', 'INTERVAL12')
)
--
--
-- CUSTOMER INTERVAL_TYPE DATA_COUNT START_TIME 'INTERVAL1' 'INTERVAL2' 'INTERVAL3' 'INTERVAL4' 'INTERVAL5' 'INTERVAL6' 'INTERVAL7' 'INTERVAL8' 'INTERVAL9' 'INTERVAL10' 'INTERVAL11' 'INTERVAL12'
-- -------- ------------- ---------- ---------------- ------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ------------ ------------ ------------
-- 67891 60 5 06022022040000AM 0 0 0.07 0.767 0.65 0.69 0.62 0.61 0.70 0.68 0.62
-- 12345 60 8 06022022120000PM 0 0 0 0 0 0.07 0.767 0.65 0.69 0.767 0.69
Hopefully, you could use it to solve the problem. Regards...
Thank you for trying to help me, I figured out an alternate and faster way of doing it. Instead of using the load_ext which is an external table, I preferred using the utl_file to read the file directly line by line and writing the same by to new file, in between, I used an existing table that has rows already created for each interval.
My existing for interval periods has start and end time stamp for every 15 mins and its respective period number. So I just considered this period number based on the timestamp from the file for the respective line and generate as many spaces and concatenate to the last column of the file. The last column in the file contains all the intervals but only limited to the number provided in the data_count field of the file.
My steps are like this.
Rename and Open the source file in loop
Read the one line at a time, retrieve the data field.
Based on the timestamp, get the period number from the existing table and add data_count to this.
Generate as many spaces as derived in step 3 and concatenate to data field.
write this to a new file named as actual source file.
My job is done in a faster way compared to the SQL query.
Thank You Again
Related
I have a table like
timestamp
type
value
08.01.2023
1
5
07.01.2023
0
20
06.01.2023
1
1
05.01.2023
0
50
04.01.2023
0
50
03.01.2023
1
1
02.01.2023
1
1
01.01.2023
1
1
Type 1 means a deposit, type 0 means a withdrawal.
The thing is when a type is 1 then the amount is the exact amount the user deposited so we can just sum that but type 0 means a withdrawal in percentage.
What I'm looking for is to create another column with current deposited amount. For the example above it would look like that.
timestamp
type
value
deposited
08.01.2023
1
5
5.4
07.01.2023
0
20
1.4
06.01.2023
1
1
1.75
05.01.2023
0
50
0.75
04.01.2023
0
50
1.5
03.01.2023
1
1
3
02.01.2023
1
1
2
01.01.2023
1
1
1
I can't figure out how to make a sum like this which would subtract percentage of previous total
You are trying to carry state over time, so ether need to use a UDTF to doing the carry work for you. Or use a recursive CTE
with data(transaction_date, type, value) as (
select to_date(column1, 'dd.mm.yyyy'), column2, column3
from values
('08.01.2023', 1, 5),
('07.01.2023', 0, 20),
('06.01.2023', 1, 1),
('05.01.2023', 0, 50),
('04.01.2023', 0, 50),
('03.01.2023', 1, 1),
('02.01.2023', 1, 1),
('01.01.2023', 1, 1)
), pre_process_data as (
select *
,iff(type = 0, 0, value)::number as add
,iff(type = 0, value, 0)::number as per
,row_number()over(order by transaction_date asc) as rn
from data
), rec_cte_block as (
with recursive rec_sub_cte as (
select
p.*,
p.add::number(20,4) as deposited
from pre_process_data as p
where p.rn = 1
union all
select
p.*,
round(div0((r.deposited + p.add)*(100-p.per), 100), 2) as deposited
from rec_sub_cte as r
left join pre_process_data as p
where p.rn = r.rn+1
)
select *
from rec_sub_cte
)
select * exclude(add, per, rn)
from rec_cte_block
order by 1;
I wrote the recursive CTE this way, as there currently is an incident if IFF or CASE is used inside the CTE.
TRANSACTION_DATE
TYPE
VALUE
DEPOSITED
2023-01-01
1
1
1
2023-01-02
1
1
2
2023-01-03
1
1
3
2023-01-04
0
50
1.5
2023-01-05
0
50
0.75
2023-01-06
1
1
1.75
2023-01-07
0
20
1.4
2023-01-08
1
5
6.4
Solution without recursion and UDTF
create table depo (timestamp date,type int, value float);
insert into depo values
(cast('01.01.2023' as date),1, 1.0)
,(cast('02.01.2023' as date),1, 1.0)
,(cast('03.01.2023' as date),1, 1.0)
,(cast('04.01.2023' as date),0, 50.0)
,(cast('05.01.2023' as date),0, 50.0)
,(cast('06.01.2023' as date),1, 1.0)
,(cast('07.01.2023' as date),0, 20.0)
,(cast('08.01.2023' as date),1, 5.0)
;
with t0 as(
select *
,sum(case when type=0 and value>=100 then 1 else 0 end)over(order by timestamp) gr
from depo
)
,t1 as (select timestamp as dt,type,gr
,case when type=1 then value else 0 end depo
,case when type=0 then ((100.0-value)/100.0) else 0.0 end pct
,sum(case when type=0 and value<100 then log((100.0-value)/100.0,2.0)
when type=0 and value>=100 then null
else 0.0
end)
over(partition by gr order by timestamp ROWS BETWEEN CURRENT ROW
AND UNBOUNDED FOLLOWING) totLog
from t0
)
,t2 as(
select *
,case when type=1 then
isnull(sum(depo*power(cast(2.0 as float),totLog))
over(partition by gr order by dt rows between unbounded preceding and 1 preceding)
,0)/power(cast(2.0 as float),totLog)
+depo
else
isnull(sum(depo*power(cast(2.0 as float),totLog))
over(partition by gr order by dt rows between unbounded preceding and 1 preceding)
,0)/power(cast(2.0 as float),totLog)*pct
end rest
from t1
)
select dt,type,depo,pct*100 pct
,rest-lag(rest,1,0)over(order by dt) movement
,rest
from t2
order by dt
dt
type
depo
pct
movement
rest
2023-01-01
1
1
0
1
1
2023-02-01
1
1
0
1
2
2023-03-01
1
1
0
1
3
2023-04-01
0
0
50
-1.5
1.5
2023-05-01
0
0
50
-0.75
0.75
2023-06-01
1
1
0
1
1.75
2023-07-01
0
0
80
-0.35
1.4
2023-08-01
1
5
0
5
6.4
I think, it is better to perform this kind of calculations on client side or middle level.
Sequential calculations are difficult to implement in Sql. In some special cases, you can use logarithmic expressions. But it is clearer and easier to implement through recursion, as #Simeon showed.
To expand on #ValNik's answer
The fist simple step is to change "deduct 20%, then deduct 50%, then deduct 30%" in to a multiplication...
X - 20% - 50% - 30%
=>
x * 0.8 * 0.5 * 0.7
=>
x * 0.28
The second trick is to understand how to calculate cumulative PRODUCT() when you only have cumulative sum; SUM() OVER (), using the properties of logarithms...
a * b == exp( log(a) + log(b) )
0.8 * 0.5 * 0.7
=>
exp( log(0.8) + log(0.5) + log(0.7) )
=>
exp( -0.2231 + -0.6931 + -0.3567 )
=>
exp( -1.2730 )
=>
0.28
The next trick is easier to explain with integers rather than percentages. That is to be able to break down the original problem in to one that can be solved using "cumulative sum" and "cumulative product"...
Current working:
row_id
type
value
equation
result
1
+
10
0 + 10
10
2
+
20
(0 + 10 + 20)
30
3
*
2
(0 + 10 + 20) * 2
60
4
+
30
(0 + 10 + 20) * 2 + 30
90
5
*
3
((0 + 10 + 20) * 2 + 30) * 3
270
Rearranged working:
row_id
type
value
CUMPROD
new equation
result
1
+
10
2*3=6
(10*6 ) / 6
10
2
+
20
2*3=6
(10*6 + 20*6 ) / 6
30
3
*
2
3=3
(10*6 + 20*6 ) / 3
60
4
+
30
3=3
(10*6 + 20*6 + 30*3) / 3
90
5
*
3
=1
(10*6 + 20*6 + 30*3) / 1
270
CUMPROD is the "cumulative product" of all future "multiplication values".
The equation is then the "cumulative sum" of value * CUMPROD divided by the current CUMPROD.
So...
row 1 : SUM(10*6 ) / 6 => SUM(10 )
row 2 : SUM(10*6, 20*6 ) / 6 => SUM(10, 20)
row 3 : SUM(10*6, 20*6 ) / 3 => SUM(10, 20) * 2
row 4 : SUM(10*6, 20*6, 30*3) / 3 => SUM(10, 20) * 2 + SUM(30)
row 5 : SUM(10*6, 20*6, 30*3) / 1 => SUM(10, 20) * 2*3 + SUM(30) * 3
The only things to be cautious of are:
LOG(0) = Infinity (which would happen when deducting 100%)
Deducting more than 100% makes no sense
So, I copied #ValNik's code that creates a new partition every time 100% or more is deducted (forcing everything in the next partition to start at zero again).
This gives the following SQL (a re-arranged version of #ValNik's code):
WITH
partition_when_deduct_everything AS
(
SELECT
*,
SUM(
CASE WHEN type = 0 AND value >= 100 THEN 1 ELSE 0 END
)
OVER (
ORDER BY timestamp
)
AS deduct_everything_id,
CASE WHEN type = 1 THEN value
ELSE 0
END
AS deposit,
CASE WHEN type = 1 THEN 1.0 -- Deposits == Deduct 0%
WHEN value >= 100 THEN 1.0 -- Treat "deduct everything" as a special case
ELSE (100.0-value)/100.0 -- Change "deduct 20%" to "multiply by 0.8"
END
AS multiplier
FROM
your_table
)
,
cumulative_product_of_multipliers as
(
SELECT
*,
EXP(
ISNULL(
SUM(
LOG(multiplier)
)
OVER (
PARTITION BY deduct_everything_id
ORDER BY timestamp
ROWS BETWEEN 1 FOLLOWING
AND UNBOUNDED FOLLOWING
)
, 0
)
)
AS future_multiplier
FROM
partition_when_deduct_everything
)
SELECT
*,
ISNULL(
SUM(
deposit * future_multiplier
)
OVER (
PARTITION BY deduct_everything_id
ORDER BY timestamp
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW
),
0
)
/
future_multiplier
AS rest
FROM
cumulative_product_of_multipliers
Demo : https://dbfiddle.uk/mrioIMiB
So how this should be solved, is a UDTF, because it requires to "sorting the data once" and "traversing the data only once", and if you have different PARTITIONS aka user_id etc etc you can work in parallel):
create or replace function carry_value_state(_TYPE float, _VALUE float)
returns table (DEPOSITED float)
language javascript
as
$$
{
initialize: function(argumentInfo, context) {
this.carried_value = 0.0;
},
processRow: function (row, rowWriter, context){
if(row._TYPE === 1) {
this.carried_value += row._VALUE;
} else {
let limited = Math.max(Math.min(row._VALUE, 100.0), 0.0);
this.carried_value -= (this.carried_value * limited) / 100;
}
rowWriter.writeRow({DEPOSITED: this.carried_value});
}
}
$$;
which then gets used like:
select d.*,
c.*
from data as d
,table(carry_value_state(d.type::float, d.value::float) over (order by transaction_date)) as c
order by 1;
so for the data we have been using in the example, that gives:
TRANSACTION_DATE
TYPE
VALUE
DEPOSITED
2023-01-01
1
1
1
2023-01-02
1
1
2
2023-01-03
1
1
3
2023-01-04
0
50
1.5
2023-01-05
0
50
0.75
2023-01-06
1
1
1.75
2023-01-07
0
20
1.4
2023-01-08
1
5
6.4
yes, the results are now in floating point, so you should double round to avoid FP representation problems, like:
round(round(c.deposited, 6) , 2) as deposited
An alternative approach using Match_Recognize(), POW(), SUM().
I would not recommend using Match_Recognize() unless you have too, it's fiddly and can waste time, however does look elegant.
with data(transaction_date, type, value) as (
select
to_date(column1, 'dd.mm.yyyy'),
column2,
column3
from
values
('08.01.2023', 1, 5),
('07.01.2023', 0, 20),
('06.01.2023', 1, 1),
('05.01.2023', 0, 50),
('04.01.2023', 0, 50),
('03.01.2023', 1, 1),
('02.01.2023', 1, 1),
('01.01.2023', 1, 1)
)
select
*
from
data match_recognize(
order by
transaction_date measures
sum(iff(CLASSIFIER() = 'ROW_WITH_DEPOSIT', value, 0)) DEPOSITS,
pow(iff(CLASSIFIER() = 'ROW_WITH_WITHDRAWL', value / 100, 1) ,count(row_with_withdrawl.*)) DISCOUNT_FROM_WITHDRAWL,
CLASSIFIER() TRANS_TYPE,
first(transaction_date) as start_date,
last(transaction_date) as end_date,
count(*) as rows_in_sequence,
count(row_with_deposit.*) as num_deposits,
count(row_with_withdrawl.*) as num_withdrawls
after
match skip PAST LAST ROW pattern((row_with_deposit + | row_with_withdrawl +)) define row_with_deposit as type = 1,
row_with_withdrawl as type = 0
);
I am trying to exclude the rows with zero values from all months showing in the below table.
This is sample data; there will be thousand of products with partial or full zero in between 12 months in real scenario. There will also be more months (columns) in real scenario.
The expected result will be look like this.
In case the affected columns are always natural numbers, you can sum them and check whether they are > 0.
SELECT * FROM yourtable WHERE jan + feb + march + april > 0;
If they can be negative, but are always numbers, you can do the same based on their absolute values.
SELECT * FROM yourtable WHERE ABS(jan) + ABS(feb) + ABS(march) + ABS(april) > 0;
In real scenario, you'll just have to put more columns into the where clause.
Sample data:
SQL> with test (prod_nm, jan, feb, mar, apr) as
2 (select 'GRANULES' , 500, 200, 100, 500 from dual union all
3 select 'INJECTION', 0, 0, 300, 550 from dual union all
4 select 'VET-A' , 0, 0, 0, 300 from dual union all
5 select 'VET-B' , 0, 0, 0, 0 from dual
6 )
Query:
7 select *
8 from test
9 where jan <> 0 or feb <> 0 or mar <> 0 or apr <> 0;
PROD_NM JAN FEB MAR APR
--------- ---------- ---------- ---------- ----------
GRANULES 500 200 100 500
INJECTION 0 0 300 550
VET-A 0 0 0 300
SQL>
Use where + any:
with test (prod_nm, jan, feb, mar, apr) as
(select 'GRANULES' , 500, 200, 100, 500 from dual union all
select 'INJECTION', 0, 0, 300, 550 from dual union all
select 'VET-A' , 0, 0, 0, 300 from dual union all
select 'VET-B' , 0, 0, 0, 0 from dual
)
select *
from test
where 0 < any (jan, feb, mar, apr);
PROD_NM JAN FEB MAR APR
--------- ---------- ---------- ---------- ----------
GRANULES 500 200 100 500
INJECTION 0 0 300 550
VET-A 0 0 0 300
SQL>
I want to create a new column in my query whereby it takes into account the difference of the current rows datetime - previous datetime. This column could be a counter where if the difference is <-100, it stays as 1, but once there difference is > -100, the column is 0.
Ideally then I would want to only pull in the rows that come after the last 0 record.
My query:
with products as (
select * from (
select distinct
ID,
UnixDateTime,
OrderNumber,
to_date('1970-01-01','YYYY-MM-DD') + numtodsinterval(UnixDateTime,'SECOND')+ 1/24 as "Date_Time"
from DB
where
(date '1970-01-01' + UnixDateTime * interval '1' second) + interval '1' hour
> sysdate - interval '2' day
)
),
prod_prev AS (
SELECT p.*,
lag("Date_Time")over(order by "Date_Time" ASC) as Previous_Time,
lag(UnixDateTime)over(order by "Date_Time" ASC) as UnixDateTime_Previous_Time "Date_Time") - "Date_Time" AS diff
FROM products p
),
run_sum AS (
SELECT p.*, "Date_Time"-Previous_Time as "Diff", UnixDateTime_Previous_Time-UnixDateTime AS "UnixDateTime_Diff"
FROM prod_prev p
)
SELECT * FROM run_sum
ORDER By UnixDateTime, "Date_Time" DESC
my query result from above query:
ID
UnixDateTime
OrderNumber
Date_Time
Previous_Time
diff
UnixDateTime_Diff
1
1662615688
100
08-SEP-2022 06:41:28
(null)
(null)
(null)
2
1662615752
100
08-SEP-2022 06:42:32
08-SEP-2022 06:41:28
0.00074
-64
3
1662615765
100
08-SEP-2022 06:42:45
008-SEP-2022 06:42:32
0.000150
-13
4
1662615859
100
08-SEP-2022 06:44:19
08-SEP-2022 06:42:45
0.001088
-128
5
1662615987
100
08-SEP-2022 06:46:27
08-SEP-2022 06:44:19
0.00148
-44
6
1662616031
100
08-SEP-2022 06:47:11
08-SEP-2022 06:46:27
0.00051
-36
the counter is the below example should be 1 if the UnixDateTime_Diff is < -100 and 0 if its >-100
then if I could only pull in records AFTER the most recent 0 record.
You use:
lag("Date_Time")over(order by "Date_Time" DESC)
And get the previous value when the values are ordered in DESCending order; this will get the previous higher value. If you want the previous lower value then either use:
lag("Date_Time") over (order by "Date_Time" ASC)
or
lead("Date_Time") over (order by "Date_Time" DESC)
If you want to perform row-by-row processing then, from Oracle 12, you can use MATCH_RECOGNIZE:
SELECT id,
unixdatetime,
ordernumber,
date_time,
next_unixdatetime,
next_unixdatetime - unixdatetime AS diff,
CASE cls
WHEN 'WITHIN_100' THEN 1
ELSE 0
END AS within_100
from (
select distinct
ID,
UnixDateTime,
OrderNumber,
TIMESTAMP '1970-01-01 00:00:00 UTC' + UnixDateTime * INTERVAL '1' SECOND
AS Date_Time
from DB
where TIMESTAMP '1970-01-01 00:00:00 UTC' + UnixDateTime * INTERVAL '1' SECOND
> SYSTIMESTAMP - INTERVAL '2' DAY
)
MATCH_RECOGNIZE(
ORDER BY unixdatetime
MEASURES
NEXT(unixdatetime) AS next_unixdatetime,
classifier() AS cls
ALL ROWS PER MATCH
PATTERN (within_100* any_row)
DEFINE
within_100 AS NEXT(unixdatetime) < unixdatetime + 100
) m
Which, for the sample data:
CREATE TABLE db (ID, UnixDateTime, OrderNumber) AS
SELECT 1, 1662615688, 100 FROM DUAL UNION ALL
SELECT 2, 1662615752, 100 FROM DUAL UNION ALL
SELECT 3, 1662615765, 100 FROM DUAL UNION ALL
SELECT 4, 1662615859, 100 FROM DUAL UNION ALL
SELECT 5, 1662615987, 100 FROM DUAL UNION ALL
SELECT 6, 1662616031, 100 FROM DUAL;
Outputs:
ID
UNIXDATETIME
ORDERNUMBER
DATE_TIME
NEXT_UNIXDATETIME
DIFF
WITHIN_100
1
1662615688
100
2022-09-08 05:41:28.000000000 UTC
1662615752
64
1
2
1662615752
100
2022-09-08 05:42:32.000000000 UTC
1662615765
13
1
3
1662615765
100
2022-09-08 05:42:45.000000000 UTC
1662615859
94
1
4
1662615859
100
2022-09-08 05:44:19.000000000 UTC
1662615987
128
0
5
1662615987
100
2022-09-08 05:46:27.000000000 UTC
1662616031
44
1
6
1662616031
100
2022-09-08 05:47:11.000000000 UTC
null
null
0
fiddle
I'm trying to calculate the total on an interest-bearing account accounting for deposits/withdraws with BigQuery.
Example scenario:
Daily interest rate = 10%
Value added/removed on every day: [100, 0, 29, 0, -100] (negative means amount removed)
The totals for each day are:
Day 1: 0*1.1 + 100 = 100
Day 2: 100*1.1 + 0 = 110
Day 3: 110*1.1 + 29 = 150
Day 4: 150*1.1 + 0 = 165
Day 5: 165*1.1 - 100 = 81.5
This would be trivial to implement in a language like Python
daily_changes = [100, 0, 29, 0, -100]
interest_rate = 0.1
result = []
for day, change in enumerate(daily_changes):
if day == 0:
result.append(change)
else:
result.append(result[day-1]*(1+interest_rate) + change)
print(result)
# Result: [100, 110.00000000000001, 150.00000000000003, 165.00000000000006, 81.50000000000009]
My difficulty lies in calculating values for row N when they depend on row N-1 (the usual SUM(...) OVER (ORDER BY...) solution does not suffice here).
Here's a CTE to test with the mock data in this example.
with raw_data as (
select 1 as day, numeric '100' as change union all
select 2 as day, numeric '0' as change union all
select 3 as day, numeric '29' as change union all
select 4 as day, numeric '0' as change union all
select 5 as day, numeric '-100' as change
)
select * from raw_data
You may try below:
SELECT day,
ROUND((SELECT SUM(c * POW(1.1, day - o - 1))
FROM t.changes c WITH OFFSET o), 2) AS totals
FROM (
SELECT *, ARRAY_AGG(change) OVER (ORDER BY day) changes
FROM raw_data
) t;
+-----+--------+
| day | totals |
+-----+--------+
| 1 | 100.0 |
| 2 | 110.0 |
| 3 | 150.0 |
| 4 | 165.0 |
| 5 | 81.5 |
+-----+--------+
Another option with use of recursive CTE
with recursive raw_data as (
select 1 as day, numeric '100' as change union all
select 2 as day, numeric '0' as change union all
select 3 as day, numeric '29' as change union all
select 4 as day, numeric '0' as change union all
select 5 as day, numeric '-100' as change
), iterations as (
select *, change as total
from raw_data where day = 1
union all
select r.day, r.change, 1.1 * i.total + r.change
from iterations i join raw_data r
on r.day = i.day + 1
)
select *
from iterations
with output
I have a table like this:
#Row ID Status1 Status2 TimeStatusChange
------------------------------------------
1 24 0 0 2020-09-02 09:18:02.233
2 48 0 0 2020-09-02 09:18:58.540
3 24 1 0 2020-09-02 09:19:47.233
4 24 0 0 2020-09-02 09:19:47.587
5 48 0 1 2020-09-02 09:22:53.923
6 36 1 0 2020-09-02 09:24:14.343
7 48 0 0 2020-09-02 09:24:49.670
8 24 1 0 2020-09-02 09:38:37.820
and would like to know, how to calculate the sum of timespans for all status (1 or 2) changes from 0 to 1 (or 1 to 0) grouped by ID.
In this example for ID 24, Status1 from 0 to 1, it would be the difference of TimeStatusChange of #Row 3 and #row 1 + difference of TimeStatusChange of #Row 8 and #row 4, roughly 21 minutes.
The perfect output would look like this:
ID Change TimeSpanInMinutes
----------------------------------------
24 Status1_from_0_1 20
36 .....
Although I have some experience with PL/SQL, I am not getting anywhere.
Sample data
I added a couple rows to have some more result data and validate the scenario where there are successive rows with the same status for a given ID.
declare #data table
(
ID int,
Status1 int,
Stamp datetime
)
insert into #data (ID, Status1, Stamp) values
(48, 1, '2020-09-02 09:00:00.000'), --added row
(24, 0, '2020-09-02 09:18:02.233'),
(48, 0, '2020-09-02 09:18:58.540'),
(24, 1, '2020-09-02 09:19:47.233'),
(24, 0, '2020-09-02 09:19:47.587'),
(48, 0, '2020-09-02 09:22:53.923'),
(36, 1, '2020-09-02 09:24:14.343'),
(48, 0, '2020-09-02 09:24:49.670'),
(24, 1, '2020-09-02 09:38:37.820'),
(48, 1, '2020-09-02 10:00:00.000'); --added row
Solution
Uses a common table expression (CTE, cte_data) to fetch the previous record for the same ID (regardless of its status value) with the help of the lag() function. Succeeding rows with the same value as the previous row are removed in the where clause outside the CTE.
with cte_data as
(
select d.ID,
d.Status1,
d.Stamp,
lag(d.Status1) over(partition by d.ID order by d.Stamp) as Status1Prev,
lag(d.Stamp) over(partition by d.ID order by d.Stamp) as StampPrev
from #data d
)
select d.ID,
d.Status1Prev as Status1From,
d.Status1 as Status1To,
sum(datediff(MI, d.StampPrev, d.Stamp)) as StampDiffSumM, --minutes
convert(time(3), dateadd(MS, sum(datediff(MS, d.StampPrev, d.Stamp)), '1900-01-01 00:00:00.000')) as StampDiffSumF --formatted
from cte_data d
where d.Status1 <> d.Status1Prev
and d.Status1Prev is not null
group by d.ID, d.Status1Prev, d.Status1
order by d.ID;
Result
ID Status1From Status1To StampDiffSumM StampDiffSumF
----------- ----------- ----------- ------------- ----------------
24 0 1 20 00:20:35.233
24 1 0 0 00:00:00.353
48 0 1 36 00:35:10.330
48 1 0 18 00:18:58.540