SQL - running total when data already grouped

SQL - running total when data already grouped - sql

I am trying to do a running total for some data, and have seen the easy way to do it. However, I have already grouped some data and this is throwing off my code. I currently have dates and payment types, and the totals that it relates to.
What I have at the moment is:
create table #testdata
(
mdate date,
pmttype varchar(64),
totalpmtamt int
)
insert into #testdata
select getdate()-7, 'DD', 10
union
select getdate() -7, 'SO', 12
union
select getdate()-6, 'DD', 3
union
select getdate()-5, 'DD', 13
union
select getdate()-5, 'SO', 23
union
select getdate()-5, 'PO', 8
What I want to have is:
mdate | paymenttype | totalpmtamt | incrtotal
2016-08-29 | DD | 10 | 10
2016-08-29 | SO | 12 | 22
2016-08-30 | DD | 3 | 25
2016-08-31 | DD | 13 | 38
2016-08-31 | SO | 8 | 46
2016-08-31 | PO | 23 | 69
I've tried adapting other code I've found here into:
select t1.mdate,
t1.pmttype,
t1.totalpmtamt,
SUM(t2.totalpmtamt) as runningsum
from #testdata t1
join #testdata t2 on t1.mdate >= t2.mdate and t1.pmttype >= t2.pmttype
group by t1.mdate, t1.pmttype, t1.totalpmtamt
order by t1.mdate
but all I get is
mdate | paymenttype | totalpmtamt | incrtotal
2016-08-29 | DD | 10 | 10
2016-08-29 | SO | 12 | 22
2016-08-30 | DD | 3 | 13
2016-08-31 | DD | 13 | 26
2016-08-31 | SO | 8 | 34
2016-08-31 | PO | 23 | 69
Can anyone help please?

The ANSI standard way of doing a cumulative sum is:
select t.*, sum(totalpmtamt) over (order by mdate) as runningsum
from #testdata t
order by t.mdate;
Not all databases support this functionality.
If your database doesn't support that functionality, I would go for a correlated subquery:
select t.*,
(select sum(t2.totalpmtamt)
from #testdata t2
where t2.mdate <= t.mdate
) as runningsum
from #testdata
order by t.mdate;

Use the below query for the desired result (for SQL Server).
with cte_1
as
(SELECT *,ROW_NUMBER() OVER(order by mdate ) RNO
FROM #testdata)
SELECT mdate,pmttype,totalpmtamt,(select sum(c2.totalpmtamt)
from cte_1 c2
where c2.RNO <= c1.RNO
) as incrtotal
FROM cte_1 c1
Output :

Sounds like SQL Server.
DECLARE #testdata TABLE
(
mdate DATE ,
pmttype VARCHAR(64) ,
totalpmtamt INT
);
INSERT INTO #testdata
( mdate, pmttype, totalpmtamt )
VALUES ( GETDATE() - 7, 'DD', 10 ),
( GETDATE() - 7, 'SO', 12 ),
( GETDATE() - 6, 'DD', 3 ),
( GETDATE() - 5, 'DD', 13 ),
( GETDATE() - 5, 'SO', 23 ),
( GETDATE() - 5, 'PO', 8 );
SELECT *,
SUM(totalpmtamt) OVER ( ORDER BY mdate ROWS UNBOUNDED PRECEDING )
AS RunningTotal
FROM #testdata t;

Related

Variable value as column name in Snowflake

can I obtain in a query variable value as column name in Snowflake?
SET "CURRENT_YEAR"=YEAR(CURRENT_DATE());
SELECT SUM("AMOUNT") AS "$CURRENT_YEAR" (here I want the value 2021)
FROM "DB"."SCHEMA"."TABLE"
WHERE YEAR("DATE") = $CURRENT_YEAR;

Please try below:
create or replace table test (
date date,
amount int
);
insert into test values
('2021-01-01', 100),
('2022-01-01', 56),
('2022-02-01', 67),
('2021-05-01', 38),
('2023-01-01', 150),
('2021-01-06', 400),
('2021-07-11', 120)
;
SET "CURRENT_YEAR"=YEAR(CURRENT_DATE());
with year_tbl as (
select year(date) as year, amount from test
where year = $CURRENT_YEAR
)
select *
from year_tbl
pivot(sum(amount) for year in ($CURRENT_YEAR)) as yr
;
+------+
| 2021 |
|------|
| 658 |
+------+
If you want different years:
with year_tbl as (
select year(date) as year, amount from test
)
select *
from year_tbl
pivot(sum(amount) for year in (2020, 2021, 2022, 2023)) as yr
;
+------+------+------+------+
| 2020 | 2021 | 2022 | 2023 |
|------+------+------+------|
| NULL | 658 | 123 | 150 |
+------+------+------+------+

did you mean something like this
create or replace table fld_year as
(SELECT current_date() dt, 2021 as fld_year, 1 as AMT UNION ALL
SELECT current_date(),2021 as fld_year, 2 as r_num UNION ALL
SELECT current_date()- 900,2019 as fld_year, 3 as r_num UNION ALL
SELECT current_date()-400,2020 as fld_year, 4 as r_num );
SET "CURRENT_YEAR"=YEAR(CURRENT_DATE());
SELECT SUM(AMT) FROM fld_year WHERE YEAR(dt) = $CURRENT_YEAR;
SELECT * FROM fld_year WHERE YEAR(dt) = $CURRENT_YEAR;

How to return same row multiple times with multiple conditions

My knowledge is pretty basic so your help would be highly appreciated.
I'm trying to return the same row multiple times when it meets the condition (I only have access to select query).
I have a table of more than 500000 records with Customer ID, Start Date and End Date, where end date could be null.
I am trying to add a new column called Week_No and list all rows accordingly. For example if the date range is more than one week, then the row must be returned multiple times with corresponding week number. Also I would like to count overlapping days, which will never be more than 7 (week) per row and then count unavailable days using second table.
Sample data below
t1
ID | Start_Date | End_Date
000001 | 12/12/2017 | 03/01/2018
000002 | 13/01/2018 |
000003 | 02/01/2018 | 11/01/2018
...
t2
ID | Unavailable
000002 | 14/01/2018
000003 | 03/01/2018
000003 | 04/01/2018
000003 | 08/01/2018
...
I cannot pass the stage of adding week no. I have tried using CASE and UNION ALL but keep getting errors.
declare #week01start datetime = '2018-01-01 00:00:00'
declare #week01end datetime = '2018-01-07 00:00:00'
declare #week02start datetime = '2018-01-08 00:00:00'
declare #week02end datetime = '2018-01-14 00:00:00'
...
SELECT
ID,
'01' as Week_No,
'2018' as YEAR,
Start_Date,
End_Date
FROM t1
WHERE (Start_Date <= #week01end and End_Date >= #week01start)
or (Start_Date <= #week01end and End_Date is null)
UNION ALL
SELECT
ID,
'02' as Week_No,
'2018' as YEAR,
Start_Date,
End_Date
FROM t1
WHERE (Start_Date <= #week02end and End_Date >= #week02start)
or (Start_Date <= #week02end and End_Date is null)
...
The new table should look like this
ID | Week_No | Year | Start_Date | End_Date | Overlap | Unavail_Days
000001 | 01 | 2018 | 12/12/2017 | 03/01/2018 | 3 |
000002 | 02 | 2018 | 13/01/2018 | | 2 | 1
000003 | 01 | 2018 | 02/01/2018 | 11/01/2018 | 6 | 2
000003 | 02 | 2018 | 02/01/2018 | 11/01/2018 | 4 | 1
...

business wise i cannot understand what you are trying to achieve. You can use the following code though to calculate your overlapping days etc. I did it the way you asked, but i would recommend a separate table, like a Time dimension to produce a "cleaner" solution
/*sample data set in temp table*/
select '000001' as id, '2017-12-12'as start_dt, ' 2018-01-03' as end_dt into #tmp union
select '000002' as id, '2018-01-13 'as start_dt, null as end_dt union
select '000003' as id, '2018-01-02' as start_dt, '2018-01-11' as end_dt
/*calculate week numbers and week diff according to dates*/
select *,
DATEPART(WK,start_dt) as start_weekNumber,
DATEPART(WK,end_dt) as end_weekNumber,
case
when DATEPART(WK,end_dt) - DATEPART(WK,start_dt) > 0 then (DATEPART(WK,end_dt) - DATEPART(WK,start_dt)) +1
else (52 - DATEPART(WK,start_dt)) + DATEPART(WK,end_dt)
end as WeekDiff
into #tmp1
from
(
SELECT *,DATEADD(DAY, 2 - DATEPART(WEEKDAY, start_dt), CAST(start_dt AS DATE)) [start_dt_Week_Start_Date],
DATEADD(DAY, 8 - DATEPART(WEEKDAY, start_dt), CAST(start_dt AS DATE)) [startdt_Week_End_Date],
DATEADD(DAY, 2 - DATEPART(WEEKDAY, end_dt), CAST(end_dt AS DATE)) [end_dt_Week_Start_Date],
DATEADD(DAY, 8 - DATEPART(WEEKDAY, end_dt), CAST(end_dt AS DATE)) [end_dt_Week_End_Date]
from #tmp
) s
/*cte used to create duplicates when week diff is over 1*/
;with x as
(
SELECT TOP (10) rn = ROW_NUMBER() --modify the max you want
OVER (ORDER BY [object_id])
FROM sys.all_columns
ORDER BY [object_id]
)
/*final query*/
select --*
ID,
start_weekNumber+ (r-1) as Week,
DATEPART(YY,start_dt) as [YEAR],
start_dt,
end_dt,
null as Overlap,
null as unavailable_days
from
(
select *,
ROW_NUMBER() over (partition by id order by id) r
from
(
select d.* from x
CROSS JOIN #tmp1 AS d
WHERE x.rn <= d.WeekDiff
union all
select * from #tmp1
where WeekDiff is null
) a
)a_ext
order by id,start_weekNumber
--drop table #tmp1,#tmp
The above will produce the results you want except the overlap and unavailable columns. Instead of just counting weeks, i added the number of week in the year using start_dt, but you can change that if you don't like it:
ID Week YEAR start_dt end_dt Overlap unavailable_days
000001 50 2017 2017-12-12 2018-01-03 NULL NULL
000001 51 2017 2017-12-12 2018-01-03 NULL NULL
000001 52 2017 2017-12-12 2018-01-03 NULL NULL
000002 2 2018 2018-01-13 NULL NULL NULL
000003 1 2018 2018-01-02 2018-01-11 NULL NULL
000003 2 2018 2018-01-02 2018-01-11 NULL NULL

Display data for all date ranges including missing dates

I'm having a issue with dates. I have a table with given from and to dates for an employee. For an evaluation, I'd like to display each date of the month with corresponding values from the second sql table.
SQL Table:
EmpNr | datefrom | dateto | hours
0815 | 01.01.2019 | 03.01.2019 | 15
0815 | 05.01.2019 | 15.01.2019 | 15
0815 | 20.01.2019 | 31.12.9999 | 40
The given employee (0815) worked during 01.01.-15.01. 15 hours, and during 20.01.-31.01. 40 hours
I'd like to have the following result:
0815 | 01.01.2019 | 15
0815 | 02.01.2019 | 15
0815 | 03.01.2019 | 15
0815 | 04.01.2019 | NULL
0815 | 05.01.2019 | 15
...
0815 | 15.01.2019 | 15
0815 | 16.01.2019 | NULL
0815 | 17.01.2019 | NULL
0815 | 18.01.2019 | NULL
0815 | 19.01.2019 | NULL
0815 | 20.01.2019 | 40
0815 | 21.01.2019 | 40
...
0815 | 31.01.2019 | 40
as for the dates, we have:
declare #year int = 2019, #month int = 1;
WITH numbers
as
(
Select 1 as value
UNion ALL
Select value + 1 from numbers
where value + 1 <= Day(EOMONTH(datefromparts(#year,#month,1)))
)
SELECT b.empnr, b.hours, datefromparts(#year,#month,numbers.value) Datum FROM numbers left outer join
emptbl b on b.empnr = '0815' and (datefromparts(#year,#month,numbers.value) >= b.datefrom and datefromparts(#year,#month,numbers.value) <= case b.dateto )
which is working quite well, yet I have the odd issue, that this code is only shoes the dates between 01.01.2019 and 03.01.2019
thank you very much in advance!

Did you check, if datefrom and dateto is in correct range?
Minimum value of DateTime field is 1753-01-01 and maximum value is 9999-12-31.
Look at your source table to check initial values.

The recursive CTE needs to begin with MIN(datefrom) and MAX(dateto):
DECLARE #t TABLE (empnr INT, datefrom DATE, dateto DATE, hours INT);
INSERT INTO #t VALUES
(815, '2019-01-01', '2019-01-03', 15),
(815, '2019-01-05', '2019-01-15', 15),
(815, '2019-01-20', '9999-01-01', 40),
-- another employee
(999, '2018-01-01', '2018-01-31', 15),
(999, '2018-03-01', '2018-03-31', 15),
(999, '2018-12-01', '9999-01-01', 40);
WITH rcte AS (
SELECT empnr
, MIN(datefrom) AS refdate
, ISNULL(NULLIF(MAX(dateto), '9999-01-01'), CURRENT_TIMESTAMP) AS maxdate -- clamp year 9999 to today
FROM #t
GROUP BY empnr
UNION ALL
SELECT empnr
, DATEADD(DAY, 1, refdate)
, maxdate
FROM rcte
WHERE refdate < maxdate
)
SELECT rcte.empnr
, rcte.refdate
, t.hours
FROM rcte
LEFT JOIN #t AS t ON rcte.empnr = t.empnr AND rcte.refdate BETWEEN t.datefrom AND t.dateto
ORDER BY rcte.empnr, rcte.refdate
OPTION (MAXRECURSION 1000) -- approx 3 years
Demo on db<>fiddle

It could be in your select, try:
SELECT b.empnr, b.hours, datefromparts(#year,#month,numbers.value) Datum
FROM numbers
LEFT OUTER JOIN emptbl b ON b.empnr = '0815' AND
datefromparts(#year,#month,numbers.value) BETWEEN b.datefrom AND b.dateto

Your CTE produces only 31 number and therefore it is showing only January dates.
declare #year int = 2019, #month int = 1;
WITH numbers
as
(
Select 1 as value
UNion ALL
Select value + 1 from numbers
where value + 1 <= Day(EOMONTH(datefromparts(#year,#month,1)))
)
SELECT *
FROM numbers
https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=a24e58ef4ce522d3ec914f90907a0a9e
You can try below code,
with t0 (i) as (select 0 union all select 0 union all select 0),
t1 (i) as (select a.i from t0 a ,t0 b ),
t2 (i) as (select a.i from t1 a ,t1 b ),
t3 (srno) as (select row_number()over(order by a.i) from t2 a ,t2 b ),
tbldt(dt) as (select dateadd(day,t3.srno-1,'01/01/2019') from t3)
select tbldt.dt
from tbldt
where tbldt.dt <= b.dateto -- put your condition here
https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=b16469908b323b8d1b98d77dd09bab3d

SQL breakout date range to rows

I am trying to take given date ranges found in a data set and divide them into unique rows for each day in the range (example below). Doing the opposite in SQL is pretty straight forward, but I am struggling to achieve the desired query output.
Beginning data:
ITEM START_DATE END_DATE
A 1/1/2015 1/5/2015
B 2/5/2015 2/7/2015
Desired query output:
ITEM DATE_COVERED
A 1/1/2015
A 1/2/2015
A 1/3/2015
A 1/4/2015
A 1/5/2015
B 2/5/2015
B 2/6/2015
B 2/7/2015

The fastest way will be some tally table:
DECLARE #t TABLE
(
ITEM CHAR(1) ,
START_DATE DATE ,
END_DATE DATE
)
INSERT INTO #t
VALUES ( 'A', '1/1/2015', '1/5/2015' ),
( 'B', '2/5/2015', '2/7/2015' )
;WITH cte AS(SELECT -1 + ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) d FROM
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n) CROSS JOIN
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n) CROSS JOIN
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t3(n) CROSS JOIN
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t4(n))
SELECT t.ITEM, ca.DATE_COVERED FROM #t t
CROSS APPLY(SELECT DATEADD(dd, d, t.START_DATE) AS DATE_COVERED
FROM cte
WHERE DATEADD(dd, d, t.START_DATE) BETWEEN t.START_DATE AND t.END_DATE) ca
ORDER BY t.ITEM, ca.DATE_COVERED

Query:
SQLFiddleExample
SELECT t.ITEM,
DATEADD(day,n.number, t.START_DATE) AS DATE_COVERED
FROM Table1 t,
(SELECT number
FROM master..spt_values
WHERE [type] = 'P') n
WHERE START_DATE <= DATEADD(day,n.number, t.START_DATE)
AND END_DATE >= DATEADD(day,n.number, t.START_DATE)
Result:
| ITEM | DATE_COVERED |
|------|--------------|
| A | 2015-01-01 |
| A | 2015-01-02 |
| A | 2015-01-03 |
| A | 2015-01-04 |
| A | 2015-01-05 |
| B | 2015-02-05 |
| B | 2015-02-06 |
| B | 2015-02-07 |

NOTE: this only works if the difference between your startdate and enddate is a maximum of 2047 days (master..spt_values only allows 0..2047 range of values)
select item, dateadd(d,v.number,d.start_date) adate
from begindata d
join master..spt_values v on v.type='P'
and v.number between 0 and datediff(d, start_date, end_date)
order by adate;
I'd like to say I did this myself but I got the code from this
Here is a fiddle with your expected result

TRY THIS...
CREATE TABLE Table1
([ITEM] varchar(1), [START_DATE] date, [END_DATE] date)
;
INSERT INTO Table1
([ITEM], [START_DATE], [END_DATE])
VALUES ('A', '2015-01-01', '2015-01-05'), ('B', '2015-02-05', 2015-02-07');
WITH Days
AS ( SELECT ITEM, START_DATE AS [Date], 1 AS [level] from Table1
UNION ALL
SELECT TABLE1.ITEM, DATEADD(DAY, 1, [Date]), [level] + 1
FROM Days,Table1
WHERE DAYS.ITEM=TABLE1.ITEM AND [Date] < END_DATE )
SELECT distinct [Date]
FROM Days
DEMO

List of years between two dates

I have a table with columns for a start- and enddate.
My goal is to get a list of each year in that timespan for each row, so
+-------------------------+
| startdate | enddate |
+------------+------------+
| 2004-08-01 | 2007-01-08 |
| 2005-06-02 | 2007-05-08 |
+------------+------------+
should output this:
+-------+
| years |
+-------+
| 2004 |
| 2005 |
| 2006 |
| 2007 |
| 2005 |
| 2006 |
| 2007 |
+-------+
I have problems now to create the years in between the two dates. My first approach was to use a UNION (order of dates is irrelevant), but the years in between are missing in this case...
Select
Extract(Year From startdate)
From
table1
Union
Select
Extract(Year From enddate)
From
table1
Thanks for any advises!

Row Generator technique
SQL> WITH DATA1 AS(
2 SELECT TO_DATE('2004-08-01','YYYY-MM-DD') STARTDATE, TO_DATE('2007-01-08','YYYY-MM-DD') ENDDATE FROM DUAL UNION ALL
3 SELECT TO_DATE('2005-06-02','YYYY-MM-DD') STARTDATE, TO_DATE('2007-05-08','YYYY-MM-DD') ENDDATE FROM DUAL
4 ),
5 DATA2 AS(
6 SELECT EXTRACT(YEAR FROM STARTDATE) ST, EXTRACT(YEAR FROM ENDDATE) ED FROM DATA1
7 ),
8 data3
9 AS
10 (SELECT level-1 line
11 FROM DUAL
12 CONNECT BY level <=
13 (SELECT MAX(ed-st) FROM data2
14 )
15 )
16 SELECT ST+LINE FROM
17 DATA2, DATA3
18 WHERE LINE <= ED-ST
19 ORDER BY 1
20 /
ST+LINE
----------
2004
2005
2005
2006
2006
2007
6 rows selected.
SQL>

Try this Query
; with CTE as
(
select datepart(year, '2005-12-25') as yr
union all
select yr + 1
from CTE
where yr < datepart(year, '2013-11-14')
)
select yr
from CTE

Try this:
Create a table with years as follow:
CREATE TABLE tblyears(y int)
INSERT INTO tblyears VALUES (1900);
INSERT INTO tblyears VALUES (1901);
INSERT INTO tblyears VALUES (1902);
and so on until
INSERT INTO tblyears VALUES (2100)
So, you'll write this query:
SELECT y.y
FROM tblyears y
JOIN table1 t
ON y.y >= EXTRACT(year from startdate)
AND y.y <= EXTRACT(year from enddate)
ORDER BY y.y
Show SqlFiddle

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL - running total when data already grouped - sql

Use the below query for the desired result (for SQL Server). with cte_1 as (SELECT *,ROW_NUMBER() OVER(order by mdate ) RNO FROM #testdata) SELECT mdate,pmttype,totalpmtamt,(select sum(c2.totalpmtamt) from cte_1 c2 where c2.RNO <= c1.RNO ) as incrtotal FROM cte_1 c1 Output :

Related

Variable value as column name in Snowflake

How to return same row multiple times with multiple conditions

Display data for all date ranges including missing dates

SQL breakout date range to rows

List of years between two dates

Categories

Resources