Oracle SQL Hierarchy Summation - sql

I have a table TRANS that contains the following records:
TRANS_ID TRANS_DT QTY
1 01-Aug-2020 5
1 01-Aug-2020 1
1 03-Aug-2020 2
2 02-Aug-2020 1
The expected output:
TRANS_ID TRANS_DT BEGBAL TOTAL END_BAL
1 01-Aug-2020 0 6 6
1 02-Aug-2020 6 0 6
1 03-Aug-2020 6 2 8
2 01-Aug-2020 0 0 0
2 02-Aug-2020 0 1 1
2 03-Aug-2020 1 0 1
Each trans_id starts with a beginning balance of 0 (01-Aug-2020). For succeeding days, the beginning balance is the ending balance of the previous day and so on.
I can create PL/SQL block to create the output. Is it possible to get the output in 1 SQL statement?
Thanks.

Try this following script using CTE-
Demo Here
WITH CTE
AS
(
SELECT DISTINCT A.TRANS_ID,B.TRANS_DT
FROM your_table A
CROSS JOIN (SELECT DISTINCT TRANS_DT FROM your_table) B
),
CTE2
AS
(
SELECT C.TRANS_ID,C.TRANS_DT,SUM(D.QTY) QTY
FROM CTE C
LEFT JOIN your_table D
ON C.TRANS_ID = D.TRANS_ID
AND C.TRANS_DT = D.TRANS_DT
GROUP BY C.TRANS_ID,C.TRANS_DT
ORDER BY C.TRANS_ID,C.TRANS_DT
)
SELECT F.TRANS_ID,F.TRANS_DT,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT < F.TRANS_DT
) BEGBAL,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT = F.TRANS_DT
) TOTAL ,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT <= F.TRANS_DT
) END_BAL
FROM CTE2 F

You can as well do like this (I would assume it's a bit faster): Demo
with
dt_between as (
select mindt + level - 1 as trans_dt
from (select min(trans_dt) as mindt, max(trans_dt) as maxdt from t)
connect by level <= maxdt - mindt + 1
),
dt_for_trans_id as (
select *
from dt_between, (select distinct trans_id from t)
),
qty_change as (
select distinct trans_id, trans_dt,
sum(qty) over (partition by trans_id, trans_dt) as total,
sum(qty) over (partition by trans_id order by trans_dt) as end_bal
from t
right outer join dt_for_trans_id using (trans_id, trans_dt)
)
select
trans_id,
to_char(trans_dt, 'DD-Mon-YYYY') as trans_dt,
nvl(lag(end_bal) over (partition by trans_id order by trans_dt), 0) as beg_bal,
nvl(total, 0) as total,
nvl(end_bal, 0) as end_bal
from qty_change q
order by trans_id, trans_dt
dt_between returns all the days between min(trans_dt) and max(trans_dt) in your data.
dt_for_trans_id returns all these days for each trans_id in your data.
qty_change finds difference for each day (which is TOTAL in your example) and cumulative sum over all the days (which is END_BAL in your example).
The main select takes END_BAL from previous day and calls it BEG_BAL, it also does some formatting of final output.

First of all, you need to generate dates, then you need to aggregate your values by TRANS_DT, and then left join your aggregated data to dates. The easiest way to get required sums is to use analitic window functions:
with dates(dt) as ( -- generating dates between min(TRANS_DT) and max(TRANS_DT) from TRANS
select min(trans_dt) from trans
union all
select dt+1 from dates
where dt+1<=(select max(trans_dt) from trans)
)
,trans_agg as ( -- aggregating QTY in TRANS
select TRANS_ID,TRANS_DT,sum(QTY) as QTY
from trans
group by TRANS_ID,TRANS_DT
)
select -- using left join partition by to get data on daily basis for each trans_id:
dt,
trans_id,
nvl(sum(qty) over(partition by trans_id order by dates.dt range between unbounded preceding and 1 preceding),0) as BEGBAL,
nvl(qty,0) as TOTAL,
nvl(sum(qty) over(partition by trans_id order by dates.dt),0) as END_BAL
from dates
left join trans_agg tr
partition by (trans_id)
on tr.trans_dt=dates.dt;
Full example with sample data:
alter session set nls_date_format='dd-mon-yyyy';
with trans(TRANS_ID,TRANS_DT,QTY) as (
select 1,to_date('01-Aug-2020'), 5 from dual union all
select 1,to_date('01-Aug-2020'), 1 from dual union all
select 1,to_date('03-Aug-2020'), 2 from dual union all
select 2,to_date('02-Aug-2020'), 1 from dual
)
,dates(dt) as ( -- generating dates between min(TRANS_DT) and max(TRANS_DT) from TRANS
select min(trans_dt) from trans
union all
select dt+1 from dates
where dt+1<=(select max(trans_dt) from trans)
)
,trans_agg as ( -- aggregating QTY in TRANS
select TRANS_ID,TRANS_DT,sum(QTY) as QTY
from trans
group by TRANS_ID,TRANS_DT
)
select
dt,
trans_id,
nvl(sum(qty) over(partition by trans_id order by dates.dt range between unbounded preceding and 1 preceding),0) as BEGBAL,
nvl(qty,0) as TOTAL,
nvl(sum(qty) over(partition by trans_id order by dates.dt),0) as END_BAL
from dates
left join trans_agg tr
partition by (trans_id)
on tr.trans_dt=dates.dt;

You can use a recursive query to generate the overall date range, cross join it with the list of distinct tran_id, then bring the table with a left join. The last step is aggregation and window functions:
with all_dates (trans_dt, max_dt) as (
select min(trans_dt), max(trans_dt) from trans group by trans_id
union all
select trans_dt + interval '1' day, max_dt from all_dates where trans_dt < max_dt
)
select
i.trans_id,
d.trans_dt,
coalesce(sum(sum(t.qty)) over(partition by i.trans_id order by d.trans_dt), 0) - coalesce(sum(t.qty), 0) begbal,
coalesce(sum(t.qty), 0) total,
coalesce(sum(sum(t.qty)) over(partition by i.trans_id order by d.trans_dt), 0) endbal
from all_dates d
cross join (select distinct trans_id from trans) i
left join trans t on t.trans_id = i.trans_id and t.trans_dt = d.trans_dt
group by i.trans_id, d.trans_dt
order by i.trans_id, d.trans_dt

Related

Identify date range and merge into max and min dates

I have data ( int, date , date types)
SELECT * FROM
(
VALUES
(1700171048,'2020-12-21','2021-01-03'),
(1700171048,'2021-01-05','2021-01-12'),
(1700171048,'2021-01-13','2021-01-17'),
(1700171048,'2021-01-18','2021-01-19'),
(1700171048,'2021-01-22','2021-01-27'),
(1700171048,'2021-01-28','2021-02-17')
(1700171049,'2020-12-21','2021-01-03'),
(1700171049,'2021-01-04','2021-01-05'),
(1700171049,'2021-01-06','2021-01-17'),
(1700171049,'2021-01-18','2021-01-19'),
(1700171049,'2021-01-20','2021-01-27'),
(1700171049,'2021-01-28','2021-02-17')
) AS c (id1, st, endt )
I need output( i.e. if start and end dates are continuous then make it part of group )
id1 st endt
1700171048 '2020-12-21' , '2021-01-03'
1700171048 '2021-01-05' , '2021-01-19'
1700171048 '2021-01-22' , '2021-02-17'
1700171049 '2020-12-21' to '2021-02-17'
I tried this, won't work.
select id, case when min(b.st) = max(b.endt) + 1 then min(b.st) end,
case when min(b.endt) = min(b.st) + 1 then max(b.st) end
from c a join c b
group by id
This is a type of gaps-and-islands problem. Use lag() to identify if there is an overlap. Then a cumulative sum of when there is no overlaps and aggregation:
select id1, min(st), max(endt)
from (select t.*,
sum(case when prev_endt >= st + interval '-1 day' then 0 else 1 end) over (partition by id1 order by st) as grp
from (select t.*,
lag(endt) over (partition by id1 order by st) as prev_endt
from t
) t
) t
group by id1, grp;
Here is a db<>fiddle.

How to get the validity date range of a price from individual daily prices in SQL

I have some prices for the month of January.
Date,Price
1,100
2,100
3,115
4,120
5,120
6,100
7,100
8,120
9,120
10,120
Now, the o/p I need is a non-overlapping date range for each price.
price,from,To
100,1,2
115,3,3
120,4,5
100,6,7
120,8,10
I need to do this using SQL only.
For now, if I simply group by and take min and max dates, I get the below, which is an overlapping range:
price,from,to
100,1,7
115,3,3
120,4,10
This is a gaps-and-islands problem. The simplest solution is the difference of row numbers:
select price, min(date), max(date)
from (select t.*,
row_number() over (order by date) as seqnum,
row_number() over (partition by price, order by date) as seqnum2
from t
) t
group by price, (seqnum - seqnum2)
order by min(date);
Why this works is a little hard to explain. But if you look at the results of the subquery, you will see how the adjacent rows are identified by the difference in the two values.
SELECT Lag.price,Lag.[date] AS [From], MIN(Lead.[date]-Lag.[date])+Lag.[date] AS [to]
FROM
(
SELECT [date],[Price]
FROM
(
SELECT [date],[Price],LAG(Price) OVER (ORDER BY DATE,Price) AS LagID FROM #table1 A
)B
WHERE CASE WHEN Price <> ISNULL(LagID,1) THEN 1 ELSE 0 END = 1
)Lag
JOIN
(
SELECT [date],[Price]
FROM
(
SELECT [date],Price,LEAD(Price) OVER (ORDER BY DATE,Price) AS LeadID FROM [#table1] A
)B
WHERE CASE WHEN Price <> ISNULL(LeadID,1) THEN 1 ELSE 0 END = 1
)Lead
ON Lag.[Price] = Lead.[Price]
WHERE Lead.[date]-Lag.[date] >= 0
GROUP BY Lag.[date],Lag.[price]
ORDER BY Lag.[date]
Another method using ROWS UNBOUNDED PRECEDING
SELECT price, MIN([date]) AS [from], [end_date] AS [To]
FROM
(
SELECT *, MIN([abc]) OVER (ORDER BY DATE DESC ROWS UNBOUNDED PRECEDING ) end_date
FROM
(
SELECT *, CASE WHEN price = next_price THEN NULL ELSE DATE END AS abc
FROM
(
SELECT a.* , b.[date] AS next_date, b.price AS next_price
FROM #table1 a
LEFT JOIN #table1 b
ON a.[date] = b.[date]-1
)AA
)BB
)CC
GROUP BY price, end_date

Group by in columns and rows, counts and percentages per day

I have a table that has data like following.
attr |time
----------------|--------------------------
abc |2018-08-06 10:17:25.282546
def |2018-08-06 10:17:25.325676
pqr |2018-08-05 10:17:25.366823
abc |2018-08-06 10:17:25.407941
def |2018-08-05 10:17:25.449249
I want to group them and count by attr column row wise and also create additional columns in to show their counts per day and percentages as shown below.
attr |day1_count| day1_%| day2_count| day2_%
----------------|----------|-------|-----------|-------
abc |2 |66.6% | 0 | 0.0%
def |1 |33.3% | 1 | 50.0%
pqr |0 |0.0% | 1 | 50.0%
I'm able to display one count by using group by but unable to find out how to even seperate them to multiple columns. I tried to generate day1 percentage with
SELECT attr, count(attr), count(attr) / sum(sub.day1_count) * 100 as percentage from (
SELECT attr, count(*) as day1_count FROM my_table WHERE DATEPART(week, time) = DATEPART(day, GETDate()) GROUP BY attr) as sub
GROUP BY attr;
But this also is not giving me correct answer, I'm getting all zeroes for percentage and count as 1. Any help is appreciated. I'm trying to do this in Redshift which follows postgresql syntax.
Let's nail the logic before presenting:
with CTE1 as
(
select attr, DATEPART(day, time) as theday, count(*) as thecount
from MyTable
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
select t1.attr, t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
From here you can pivot to create a day by day if you feel the need
I am trying to enhance the query #johnHC btw if you needs for 7days then you have to those days in case when
with CTE1 as
(
select attr, time::date as theday, count(*) as thecount
from t group by attr,time::date
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
,
CTE3 as
(
select t1.attr, EXTRACT(DOW FROM t1.theday) as day_nmbr,t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
)
select CTE3.attr,
max(case when day_nmbr=0 then CTE3.thecount end) as day1Cnt,
max(case when day_nmbr=0 then percentofday end) as day1,
max(case when day_nmbr=1 then CTE3.thecount end) as day2Cnt,
max( case when day_nmbr=1 then percentofday end) day2
from CTE3 group by CTE3.attr
http://sqlfiddle.com/#!17/54ace/20
In case that you have only 2 days:
http://sqlfiddle.com/#!17/3bdad/3 (days descending as in your example from left to right)
http://sqlfiddle.com/#!17/3bdad/5 (days ascending)
The main idea is already mentioned in the other answers. Instead of joining the CTEs for calculating the values I am using window functions which is a bit shorter and more readable I think. The pivot is done the same way.
SELECT
attr,
COALESCE(max(count) FILTER (WHERE day_number = 0), 0) as day1_count, -- D
COALESCE(max(percent) FILTER (WHERE day_number = 0), 0) as day1_percent,
COALESCE(max(count) FILTER (WHERE day_number = 1), 0) as day2_count,
COALESCE(max(percent) FILTER (WHERE day_number = 1), 0) as day2_percent
/*
Add more days here
*/
FROM(
SELECT *, (count::float/count_per_day)::decimal(5, 2) as percent -- C
FROM (
SELECT DISTINCT
attr,
MAX(time::date) OVER () - time::date as day_number, -- B
count(*) OVER (partition by time::date, attr) as count, -- A
count(*) OVER (partition by time::date) as count_per_day
FROM test_table
)s
)s
GROUP BY attr
ORDER BY attr
A counting the rows per day and counting the rows per day AND attr
B for more readability I convert the date into numbers. Here I take the difference between current date of the row and the maximum date available in the table. So I get a counter from 0 (first day) up to n - 1 (last day)
C calculating the percentage and rounding
D pivot by filter the day numbers. The COALESCE avoids the NULL values and switched them into 0. To add more days you can multiply these columns.
Edit: Made the day counter more flexible for more days; new SQL Fiddle
Basically, I see this as conditional aggregation. But you need to get an enumerator for the date for the pivoting. So:
SELECT attr,
COUNT(*) FILTER (WHERE day_number = 1) as day1_count,
COUNT(*) FILTER (WHERE day_number = 1) / cnt as day1_percent,
COUNT(*) FILTER (WHERE day_number = 2) as day2_count,
COUNT(*) FILTER (WHERE day_number = 2) / cnt as day2_percent
FROM (SELECT attr,
DENSE_RANK() OVER (ORDER BY time::date DESC) as day_number,
1.0 * COUNT(*) OVER (PARTITION BY attr) as cnt
FROM test_table
) s
GROUP BY attr, cnt
ORDER BY attr;
Here is a SQL Fiddle.

Retrieve records by continuation of days in oracle

I want to retrieve records where cash deposits are more than 4 totaling to 1000000 during a day and continues for more than 5 days.
I have came up with below query.
SELECT COUNT(a.txamt) AS "txcount"
, SUM(a.txamt) AS "txsum"
, b.custcd
, a.txdate
FROM tb_transactions a
INNER JOIN tb_accounts b
ON a.acctno = b.acctno
WHERE a.cashflowtype = 'CR'
GROUP BY b.custcd, a.txdate
HAVING COUNT(a.txamt)>4 and SUM(a.txamt)>='1000000'
ORDER BY a.txdate;
But I'm stuck on how to fetch the records if the pattern continues for 5 days.
How to achieve the desired result?
Something like:
SELECT *
FROM (
SELECT t.*,
COUNT( txdate ) OVER ( PARTITION BY custcd
ORDER BY txdate
RANGE BETWEEN INTERVAL '0' DAY PRECEDING
AND INTERVAL '4' DAY FOLLOWING ) AS
num_days
FROM (
select count(a.txamt) as "txcount",
sum(a.txamt) as "txsum",
b.custcd,
a.txdate
from tb_transactions a inner join tb_accounts b on a.acctno=b.acctno
where a.cashflowtype='CR'
group by b.custcd, a.txdate
having count(a.txamt)>4 and sum(a.txamt)>=1000000
) t
)
WHERE num_days = 5
order by a.txdate;

Running totals with initial value then adding the totals as stated by the date

Imagine we have a table:
SELECT SUM(A) AS TOTALS,DATE,STUFF FROM TABLE WHERE DATE BETWEEN 'DATESTART' AND 'DATEEND'
GROUP BY DATE,STUFF
Normally this gets the totals as:
totals stuff date
23 x 01.01.1900
3 x 02.01.1900
44 x 06.01.1900
But what if we have the previous the data before the startdate,and i want to add those initial data to my startdate value; for example; from the begining of time i already have a sum value of x lets say 100
so i want my table to start from 123 and add the previous data such as:
123
126
126+44 and so on...
totals stuff date
123 x 01.01.1900
126 x 02.01.1900
170 x 06.01.1900
How can i achieve that?
Source data:
WITH Stocks
AS (
SELECT
Dep.Dept_No ,
SUM(DSL.Metre) AS Metre ,
CONVERT(VARCHAR(10), Date, 112) AS Date
FROM
DS (NOLOCK) DSL
JOIN TBL_Depts (NOLOCK) Dep ON Dep.Dept_No = DSL.Dept
WHERE
1 = 1 AND
DSL.Sil = 0 AND
DSL.Depo IN ( 5000, 5001, 5002, 5003, 5004, 5014, 5018, 5021, 5101, 5109, 5303 ) AND
Dep.Dept_No NOT IN ( 6002 ) AND
Dep.Dept_No IN ( 6000, 6001, 6003, 6004, 6005, 6011, 6024, 6030 ) AND
DSL.Date BETWEEN '2013-06-19' AND '2013-06-20'
GROUP BY
Dep.Dept_No ,
CONVERT(VARCHAR(10), Date, 112)
)
SELECT
Stocks.Metre ,
Dep.Dept AS Dept ,
Stocks.Date
FROM
Stocks
LEFT JOIN TBL_Depts (NOLOCK) Dep ON Stocks.Dept = Dep.Dept
ORDER BY
Stocks.Metre DESC
Any RDBMS with window and analytic functions (SQL Server 2012, PostgreSQL but not MySQL)
SELECT
SumA + SUM(SumARange) OVER (ORDER BY aDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TOTALS,
other, aDate
FROM
(
SELECT
SUM(a) AS SumARange,
other, aDate
FROM
SomeTable
WHERE
aDate BETWEEN '20130101' AND '20130106'
GROUP BY
other, aDate
) X
CROSS JOIN
(
SELECT
SUM(a) AS SumA
FROM
SomeTable
WHERE
aDate < '20130101'
) Y
ORDER BY
aDate;
or
SELECT
SUM(SumA) OVER () + SUM(SumARange) OVER (ORDER BY aDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TOTALS,
other, aDate
FROM
(
SELECT
SUM(CASE WHEN aDate < '20130101' THEN a ELSE 0 END) AS SumA,
SUM(CASE WHEN aDate BETWEEN '20130101' AND '20130106' THEN a ELSE 0 END) AS SumARange,
other, aDate
FROM
SomeTable
WHERE
aDate <= '20130106'
GROUP BY
other, aDate
) X
ORDER BY
aDate;
SQLFiddle example and another
Use option with APPLY operator to calculate the totals. You need also add additional CASE expression in the GROUP BY clause
;WITH cte AS
(
SELECT SUM(a) AS sumA, [stuff], MAX([Date]) AS [Date]
FROM SomeTable
WHERE [Date] <= '20130106'
GROUP BY [stuff], CASE WHEN [Date] <= '20130101' THEN 1 ELSE [Date] END
)
SELECT o.total, [stuff], [Date]
FROM cte c CROSS APPLY (
SELECT SUM(c2.sumA) AS total
FROM cte c2
WHERE c.[Date] >= c2.[Date]
) o
See example on SQLFiddle