Accumulating in SQL - sql

I have a query with results like ID, Value. What I want is to get the values in order of their ids and also calculate the accumulated value in another column. take a look at my simplified code:
declare #TempTable Table
(
ID int,
Value int
)
insert into #TempTable values
(1, 10),
(2, -15),
(3, 12),
(4, 18),
(5, 5)
select t1.ID, t1.Value, SUM(t2.Value) AccValue from #TempTable t1
inner join #TempTable t2 on t1.ID >= t2.ID
group by t1.ID, t1.Value
order by t1.ID
Result:
ID Value AccValue
1 10 10
2 -15 -5
3 12 7
4 18 25
5 5 30
What I have come up with, is to use inner join between the result and itself for that purpose. But for huge amount of data, it's clearly a low performance issue.
Is there any other alternative to do that?

In 2012 version, you can use:
SELECT
id,
Value,
AccValue = SUM(Value) OVER (ORDER BY ID
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW)
FROM
#TempTable ;
For previous versions of SQL-Server, see my answer in this similar question: Recursive SQL- How can I get this table with a running total?, with a cursor solution.
Even better, follow the link to the great article by #Aaron Bertrand, that has a thorough test of various methods to calculate a running total: Best approaches for running totals – updated for SQL Server 2012

You can use recursion:
;WITH x AS
(
SELECT
[ID],
[Value],
bal=[Value]
FROM Table1
WHERE [ID] = 1
UNION ALL
SELECT
y.[ID],
y.[Value],
x.bal+(y.[Value]) as bal
FROM x INNER JOIN Table1 AS y
ON y.[ID] = x.[ID] + 1
)
SELECT
[ID],
[Value],
AccValue= bal
FROM x
order by ID
OPTION (MAXRECURSION 10000);
SQL FIDDLE

The generic SQL way to do this is with a correlated subquery (at least, I think that is the cleanest way):
select t.*,
(select sum(t2.value)
from #TempTable t2
where t2.ID <= t.ID
) AccValue
from #TempTable t
SQL Server 2012 has a cumulative sum function:
select t.*,
sum(t.value) over (order by t.id) as AccValue
from #TempTable t

Related

SQL mimicking analytic LEAD/LAG function with some restrictions

There is a table named test, with one column named amount (number datatype).
There is no PK for this table, and amounts can be repeated.
The table's DDL is below: (created for testing purposes in Oracle 18c xe)
create table test (amount number(20));
insert into test values (20);
insert into test values (10);
insert into test values (30);
insert into test values (20);
insert into test values (10);
insert into test values (40);
insert into test values (15);
insert into test values (40);
The goal is to mimick the LEAD analytical function results ordered by amount, but no analytic (incl. ranking and window functions) can be used. PSM (incl MYSQL stored features, PL/SQL, T-SQL etc.) or some kind of identity tables can neither be used.
The desired output is shown in lead_rows_analytic_amount column:
select
amount,
lead(amount) over (order by amount) as lead_rows_analytic_amount
from test t1;
actual result:
amount lead_rows_analytic_amount
10 10
10 15
15 20
20 20
20 30
30 40
40 40
40
What are some elegant ways to achieve the result taking into account the restrictions set?
The DB is irrelevant here, if the restrictions apply.
I am attaching a stupidly clumsy and direct solution I came up with, but the goal is to get something more elegant (ignoring the performance).
with initial_rn as (
select
amount,t1.rowid,
( select count (*)
from test t2
where
t1.amount >= t2.amount
) as rn
from test t1
)
,prep_table as (
select t1.*,nvl2(repeating_rn,1,0) as repeating_rn_tag,
nvl(( SELECT max(rn)
FROM initial_rn t2
where t2.rn < t1.rn
),0) AS lag_rn
from initial_rn t1
left join (select rn as repeating_rn
from initial_rn
group by rn
having count(*) > 1) t2 on t1.rn = t2.repeating_rn
)
,final_rn as (
select t1.amount,case when repeating_rn_tag = 0 then rn else lag_rn +
( select count (*)
from prep_table t2
where
t1.rowid >= t2.rowid and t1.repeating_rn_tag = 1 and t2.repeating_rn_tag = 1 and t1.rn = t2.rn
)
end as final_rn
from prep_table t1
)
select t1.*,
lead(amount) over (order by amount) as lead_rows_analytic_amount,
(select min(amount)
from test t2
where t2.amount > t1.amount
) as lead_range_amount,
(SELECT MIN(amount)
FROM final_rn t2
where t2.final_rn > t1.final_rn
) AS lead_amount
from final_rn t1
order by amount
;
In Oracle, you can use:
SELECT CASE WHEN LEVEL = 1 THEN amount ELSE PRIOR amount END AS amount,
CASE WHEN LEVEL = 1 THEN NULL ELSE amount END AS lead_amount
FROM (
SELECT amount,
ROWNUM AS rn
FROM (
SELECT amount
FROM test
ORDER BY amount
)
)
WHERE LEVEL = 2
OR LEVEL = 1 AND CONNECT_BY_ISLEAF = 1
CONNECT BY PRIOR rn + 1 = rn
More generally, you can use:
WITH ordered_amounts (amount) AS (
SELECT amount
FROM test
ORDER BY amount
),
indexed_amounts (amount, idx) AS (
SELECT amount,
ROWNUM -- Or any function that gives sequentially increasing values
FROM ordered_amounts
)
SELECT i.amount,
nxt.amount AS lead_amount
FROM indexed_amounts i
LEFT OUTER JOIN indexed_amounts nxt
ON (i.idx + 1 = nxt.idx)
Which, for the sample data, both output:
AMOUNT
LEAD_AMOUNT
10
10
10
15
15
20
20
20
20
30
30
40
40
40
40
null
db<>fiddle here
Ok so just throwing this out there as something you could do, using JSON functionality (support exists in most RDBMS)
This is SQL server syntax:
with v as (
select *
from OpenJson(
(select Concat('[',String_Agg(amount,',')
within group (order by amount),']')from test)
)
)
select value, (
select value
from v v2
where v2.[key]=v.[key]+1
) as lead_rows_analytic_amount
from v
Example fiddle
To contribute to this wonderful collection of solutions how to avoid window functions, I feel it's worth mention Oracle model clause:
with test as (
select column_value as amount
from table(sys.ku$_vcnt(20,10,30,20,10,40,15,40)) -- or your table, I'm just lazy to create fiddle
)
select amount, lead_amount
from (
select *
from (select amount, 0 as lead_amount from test order by amount)
model
dimension by (rownum as rn)
measures (amount, lead_amount)
rules (amount[any] = amount[cv(rn)], lead_amount[any] = amount[cv(rn) + 1])
)
order by amount
(Not sure if it is helpful for you, compared with window functions.)
If you had a primary key (any table should have):
select a.*, (select min(r.amount)
from #test r
where ((r.id <> a.id and r.amount > a.amount)
OR
(r.id > a.id and r.amount=a.amount)
)
) as NextVal
from #test a
order by a.amount, a.id

How to minus current and previous value in SQL Server

Have one table, need to minus one column previous and current amount. Table value is below, need to write syntax for Cal-Amount column
Id Amount Cal-Amount
1 100 0
2 200 0
3 400 0
4 500 0
Cal-Amount calculation formula with sample value
Id Amount Cal-Amount
1 100 (0-100)=100
2 200 (100-200)=100
3 400 (200-400)=200
4 500 (400-500)=100
Need SQL syntax to minus column current and previous value
LAG is one option if you are using SQL Server 2012 or later:
SELECT
Id,
Amount,
LAG(Amount, 1, 0) OVER (ORDER BY Id) - Amount AS [Cal-Amount]
FROM yourTable;
If you are using an earlier version of SQL Server, then we can use a self join:
SELECT
Id,
Amount,
COALESCE(t2.Amount, 0) - t1.Amount AS [Cal-Amount]
FROM yourTable t1
LEFT JOIN yourTable t2
ON t1.Id = t2.Id + 1;
But note that the self join option might only work if the Id values are continuous. LAG is probably the most efficient way to do this, and is also robust to non sequential Id values, so long as the order is correct.
Well, Tim beat me to the lag(), so here's the old-school using join:
select t.Id,t.Amount,t.Amount-isnull(t2.Amount,0) AS [Cal-Amount]
from yourtable t
left join yourtable t2 on t.id=t2.id+1
SQL Server 2012 or newer:
Select
ID, Amount, [Cal-Amount] = Amount - LAG(Amount, 1, 0) OVER (ORDER BY Id)
From
table
or
Select
current.ID, Current.Amount, Current.Amount - Isnull(Prior.Amount, 0)
from
table current
left join
table prior on current.id - 1 = prior.id
You can use the LAG function if your SQL Server >= 2012
declare #t table (id int, amount1 int)
insert into #t
values (1, 100), (2, 200), (3, 400), (4, 500)
select
*, amount1 - LAG(amount1, 1, 0) over (order by id) as CalAmount
from
#t
You can also use apply :
select t.*, t.Amount - coalesce(tt.Amount, 0) as CalAmount
from table t outer apply (
select top (1) *
from table t1
where t1.id < t.id
order by t1.id desc
) tt;

Next/previous record based on current

I have a table which is not sorted by any of column. Is there any way to select next/previous record if I know only Id of current? (I'm using mssql)
Id Label Date
---------------------
1 label1 2011-01-10
7 label2 2011-01-15 -- how to get previous?
5 label3 2011-01-12 -- I know id of this record
10 label10 2011-01-25 -- how to get next?
12 label8 2011-01-13
2 label5 2011-01-29
Thanks in advance!
try this:
VALUES (1, 'label1', '2011-01-10'), (7, 'label2', '2011-01-15'),
(5, 'label3', '2011-01-12'), (10, 'label10', '2011-01-25'),
(12, 'label8', '2011-01-13'), (2, 'label5', '2011-01-29')
select * from table007;
Declare #inptID int=12;
;WITH CTE
as
(
select *, ROW_NUMBER() over (order by (select 0)) as rn
from table007
)
select *
from CTE
where rn in( select rn-1 from CTE where id = #inptID)
union all
select * from CTE where rn in(select rn + 1 from CTE where id = #inptID);
SQL Fiddle Demo
DEMO
If it is not sorted by any column, there is no definitive next or previous record. Data in SQL Server has no order, other than that specified by an ORDER BY clause.
If you really want the previous from the list you enclosed, here is a way.
declare #t table(Id int, Label varchar(10), Date date, s int identity(1,1))
insert #t (id, label, date)
values(1,'label1','2011-01-10'),(7,'label2','2011-01-15'),
(5,'label3','2011-01-12'),(10,'label10','2011-01-25'),
(12,'label8','2011-01-13'),(2,'label5','2011-01-29')
--select the data with a self join
select t1.id as previous_id, t2.id, t2.Label, t2.Date, t3.id, t3.id as next_id
from #t t1
right join
#t t2 on t1.s + 1 = t2.s
left join
#t t3 on t2.s = t3.s - 1

sql finding gaps in a couple number/year

I have a table like:
id
number
year
I want to find "holes", or gaps, not considering the id but only the couple year/number.
There is a gap when, for the same year, there are two non-consecutive numbers, the result being the year and all the numbers between (excluding extremes) those two non-consecutive numbers. Also note that the lower end is always 1 so that if 1 is missing, it is a gap.
For example, having:
id n year
1 1 2012
2 2 2012
3 5 2012
4 2 2010
I want as a result:
3/2012
4/2012
1/2010
The trick to finding missing entries in sequences is to generate a cartesian product of all available combinations in the sequence, then use NOT EXISTS to elimate those that exist. This is hard to do in a non DBMS specific way because all have different ways in which to optmially create a sequence on the fly. For Oracle I use:
SELECT RowNum AS r
FROM Dual
CONNECT BY Level <= MaxRequiredValue;
So, to generate a list of all available year/n pairs I would use:
SELECT d.Year, n.r
FROM ( SELECT year, MAX(n) AS MaxN
FROM T
GROUP BY Year
) d
INNER JOIN
( SELECT RowNum AS r
FROM Dual
CONNECT BY Level <= (SELECT MAX(n) FROM T)
) n
ON r < MaxN;
Where I am getting the Maximum n for each year and joining this to a list of integers from 1 to the highest n of all where this integer lists highest value is less than that years maximium value.
Finally use NOT EXISTS to elimate the values that already exist:
SELECT d.Year, n.r
FROM ( SELECT year, MAX(n) AS MaxN
FROM T
GROUP BY Year
) d
INNER JOIN
( SELECT RowNum AS r
FROM Dual
CONNECT BY Level < (SELECT MAX(n) FROM T)
) n
ON r = MaxN
WHERE NOT EXISTS
( SELECT 1
FROM T
WHERE d.Year = t.Year
AND n.r = t.n
);
Working example on SQL Fiddle
EDIT
Since I couldn't find a non DMBS specific solution I thought I'd better do the decent thing and create some examples for other DBMS.
SQL Server Example
Postgresql Example
My SQL Example
Another option is to use a temporary table like so:
create table #tempTable ([year] int, n int)
insert
into #tempTable
select t.year, 1
from tableName t
group by t.year
while exists(
select *
from tableName t1
where t1.n > (select MAX(t2.n) from #tempTable t2 where t2.year = t1.year)
)
begin
insert
into #tempTable
select t1.year,
(select MAX(t2.n)+1 from #tempTable t2 where t2.year = t1.year)
from tableName t1
where t1.n > (select MAX(t2.n) from #tempTable t2 where t2.year = t1.year)
end
delete t2
from #tempTable t2
inner join tableName t1
on t1.year = t2.year
and t1.n = t2.n
select [year], n
from #tempTable
drop table #tempTable

SQL stored procedure to add up values and stop once the maximum has been reached

I would like to write a SQL query (SQL Server) that will return rows (in a given order) but only up to a given total. My client has paid me a given amount, and I want to return only those rows that are <= to that amount.
For example, if the client paid me $370, and the data in the table is
id amount
1 100
2 122
3 134
4 23
5 200
then I would like to return only rows 1, 2 and 3
This needs to be efficient, since there will be thousands of rows, so a for loop would not be ideal, I guess. Or is SQL Server efficient enough to optimise a stored proc with for loops?
Thanks in advance. Jim.
A couple of options are.
1) Triangular Join
SELECT *
FROM YourTable Y1
WHERE (SELECT SUM(amount)
FROM YourTable Y2
WHERE Y1.id >= Y2.id ) <= 370
2) Recursive CTE
WITH RecursiveCTE
AS (
SELECT TOP 1 id, amount, CAST(amount AS BIGINT) AS Total
FROM YourTable
ORDER BY id
UNION ALL
SELECT R.id, R.amount, R.Total
FROM (
SELECT T.*,
T.amount + Total AS Total,
rn = ROW_NUMBER() OVER (ORDER BY T.id)
FROM YourTable T
JOIN RecursiveCTE R
ON R.id < T.id
) R
WHERE R.rn = 1 AND Total <= 370
)
SELECT id, amount, Total
FROM RecursiveCTE
OPTION (MAXRECURSION 0);
The 2nd one will likely perform better.
In SQL Server 2012 you will be able to so something like
;WITH CTE AS
(
SELECT id,
amount,
SUM(amount) OVER(ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
AS RunningTotal
FROM YourTable
)
SELECT *
FROM CTE
WHERE RunningTotal <=370
Though there will probably be a more efficient way (to stop the scan as soon as the total is reached)
Straight-forward approach :
SELECT a.id, a.amount
FROM table1 a
INNER JOIN table1 b ON (b.id <=a.id)
GROUP BY a.id, a.amount
HAVING SUM(b.amount) <= 370
Unfortunately, it has N^2 performance issue.
something like this:
select id from
(
select t1.id, t1.amount, sum( t2.amount ) s
from tst t1, tst t2
where t2.id <= t1.id
group by t1.id, t1.amount
)
where s < 370