SQL Server - TSQL: cumulative count - sql

So I have the following query:
Select Year(DATEADD(MONTH,3,date)) as Year, Month(DATEADD(MONTH,3,date)) as Month, location, COUNT (agent_login_id)
from Agents
where Location = 'syd'
Group by Location, Year(DATEADD(MONTH,3,date)), Month(DATEADD(MONTH,3,date))
it outputs:
Year Month location Count
2013 1 SYD 1
2013 3 SYD 11
2013 4 SYD 2
2013 5 SYD 2
2013 8 SYD 3
2013 9 SYD 1
2013 10 SYD 4
2013 11 SYD 7
2013 12 SYD 7
2014 1 SYD 3
2014 2 SYD 1
But I need to do a cumulative count so for example this is what I need it to look like
Year Month location Count Cumulative count
2013 1 SYD 1 1
2013 3 SYD 11 12
2013 4 SYD 2 14
How can I do this?

I'll use #t to represent your current query
For what I'll call 'full accumulation' (based on original question)
-- 2012
;WITH t AS
(
SELECT *, rn = ROW_NUMBER() OVER(PARTITION BY l ORDER BY y, m)
FROM #t
)
SELECT y, m, c, ISNULL(a + LAG(a,2) OVER(PARTITION BY l ORDER BY y, m), a) a
FROM (
SELECT t1.y, t1.l, t1.m, t1.c, SUM(t2.c) a
FROM t t1
JOIN t t2 ON t2.rn <= t1.rn AND t2.l = t1.l
GROUP BY t1.y, t1.l, t1.m, t1.c
) t
-- 2005+
;WITH t AS
(
SELECT *, rn = ROW_NUMBER() OVER(PARTITION BY l ORDER BY y, m)
FROM #t
)
SELECT t1.y, t1.l, t1.m, t1.c, ISNULL(SUM(t2.c)+SUM(t3.c), MIN(t1.c)) a
FROM t t1
JOIN t t2 ON t2.rn <= t1.rn AND t2.l = t1.l
LEFT JOIN t t3 ON t3.rn < t2.rn-1
GROUP BY t1.y, t1.l, t1.m, t1.c
Edit
For a normal running total (after question was corrected)
--2005+
;WITH t AS
(
SELECT *, rn = ROW_NUMBER() OVER(PARTITION BY l ORDER BY y, m)
FROM #t
)
SELECT t1.y, t1.l, t1.m, t1.c, SUM(t2.c) a
FROM t t1
JOIN t t2 ON t2.rn <= t1.rn AND t2.l = t1.l
GROUP BY t1.y, t1.l, t1.m, t1.c
Note: If in your current query you calculated the first day of the month instead of using YEAR() and MONTH() (leaving these calculations to the end if required) then you could avoid using ROW_NUMBER() which may see some performance improvements

Related

SQL: Same date but different values

I have a table that looks like this
Station
year
month
day
number
A1
1990
1
1
50
A1
1990
1
1
60
A1
1990
1
2
55
A1
1990
1
3
10
A1
1990
1
4
40
In example , the query result will like below table
for same station and date
Station
year
month
day
number
A1
1990
1
1
50
A1
1990
1
1
60
How to set a proper SQL for it?
If I understand correctly, you want rows where the first four columns are duplicated. A simple method uses count(*):
select t.*
from (select t.*,
count(*) over (partition by station, year, month, date) as cnt
from t
) t
where cnt > 1;
Assuming your table have a primary key column called id, we can also try using exists logic here:
SELECT t1.*
FROM yourTable t1
WHERE EXISTS (SELECT 1 FROM yourTable t2
WHERE t2.Station = t1.Station AND t2.year = t1.year AND
t2.month = t1.month AND t2.day = t1.day AND t2.id <> t1.id);
If you don't have such an id column, then we could also use aggregation here:
SELECT t1.*
FROM yourTable t1
INNER JOIN
(
SELECT Station, year, month, day
FROM yourTable
GROUP BY Station, year, month, day
HAVING COUNT(*) > 1
) t2
ON t2.Station = t1.Station AND t2.year = t1.year AND t2.month = t1.month AND
t2.day = t1.day;
You can use exists clause to find exact duplicate with different number as follows:
Select t.*
From your_table t
Where exists
(select 1 from your_table tt
Where tt.station = t.station
And tt.year = t.year
And tt.month = t.month
And tt.date = t.date
And tt.number <> t.number)

subtract data from two columns and result into one column.`

I have data like below
id year marks id year marks
1 2017 80 1 2018 100
2 2017 60 2 2018 70
3 2017 500 3 2018 600
My result should be values as 20, 10, 100 in Difference column.
All this data should be in a single row.
Unless I'm missing something, you just want to take a difference of the two marks columns:
SELECT
t1.id, t1.year, t1.marks AS marks_2017, t2.id, t2.year, t2.marks AS marks_2018,
t2.marks - t1.marks AS diff
FROM yourTable t1
INNER JOIN yourTable t2
ON t1.id = t2.id AND t1.year = 2017 AND t2.year = 2018;
Somehow, I think you want:
select id, sum(case when year = 2018 then marks else - marks end) as diff
from t
where year in (2017, 2018)
group by id;
SELECT id, year, marks, id, year, new_marks, new_marks - marks AS diff
FROM yourTable where year = '2018'
select
t1.*, t2.year, t2.marks, t2.marks - t1.marks as diff
from
test t1
inner join
test t2
on t1.id = t2.id and t1.year = 2017 and t2.year = 2018
Demo

SQL Server Group By Query Select first row each group

I am trying to do this query. This is what I have.
My table is: Table
StudyID FacultyID Year Access1 Access2 Access3
1 1 2014 4 8 5
1 2 2014 8 4 7
1 1 2013 5 4 4
2 3 2014 4 6 5
2 5 2013 5 8 10
2 4 2014 5 5 7
3 7 2013 9 4 7
I want to group by StudyID and Year and get the minimum value of each field Access1 Access2 and Access3 and show only the last year, I mean for each group the first row.
Here is the Result.
StudyID Year Access1 Access2 Access3
1 2014 4 4 5
2 2014 4 5 5
3 2013 9 4 7
This is my Query:
SELECT DISTINCT T.StudyID, T.Year, MIN(T.Access1), MIN(T.Access2), MIN(T.Access3)
FROM T
GROUP BY T.StudyID, T.Year
ORDER BY T.StudyID, T.Year DESC
I also tried with this one.
;WITH MyQuery AS ( SELECT DISTINCT T.StudyID, T.Year, MIN(T.Access1), MIN(T.Access2), MIN(T.Access3),ROW_NUMBER() OVER (PARTITION BY T.StudyID, T.Year ORDER BY T.StudyID, T.Year DESC) AS rownumber
FROM T GROUP BY T.StudyID, T.Year ORDER BY T.StudyID , T.Year DESC ) SELECT * FROM MyQuery WHERE rownumber = 1
Any success, I know I am missing something...but dont know what?
Thanks in advance!!!!
You can GROUP BY StudyID, Year and then in an outer query select the first row from each StudyID, Year group:
SELECT StudyID, Year, minAccess1, minAccess2, minAccess3
FROM (
SELECT StudyID, Year, min(Access1) minAccess1, min(Access2) minAccess2,
min(Access3) minAccess3,
ROW_NUMBER() OVER (PARTITION BY StudyID ORDER BY Year DESC) AS rn
FROM mytable
GROUP BY StudyID, Year ) t
WHERE t.rn = 1
ROW_NUMBER is used to assign an ordering number to each StudyID group according to Year values. The row with the maximum Year value is assigned a rn = 1.
Try this:
SELECT DISTINCT T.StudyID, T.Year, MIN(T.Access1), MIN(T.Access2), MIN(T.Access3)
FROM myTable T
WHERE T.Year = (SELECT MAX(T2.Year) FROM myTable T2 WHERE T2.StudyID = T.StudyID)
GROUP BY T.StudyID
Its giving the result you wanted in SQLite, but perhaps in SQL-Server needs some alias I'm not sure. Can't test it right now.
This is giving the answer you want
SELECT DISTINCT T.StudyID, T.Year, MIN(T.Access1) as Access1, MIN(T.Access2) as Access2, MIN(T.Access3) as Access3
FROM T T
WHERE T.Year = (SELECT MAX(T2.Year) FROM T T2 WHERE StudyID = T.StudyID)
GROUP BY T.StudyID, T.Year
Order by 1

SQL MIN Datetime based on first occuranceof a value in another column

This is what I have
ID Name DateTime Value Group
1 Mark 1/1/2010 0 1
2 Mark 1/2/2010 1 1
3 Mark 1/3/2010 0 1
4 Mark 1/4/2010 0 2
40 Mark 1/5/2010 1 2
5 Mark 1/9/2010 1 2
6 Mark 1/6/2010 1 2
7 Kelly 1/1/2010 0 3
8 Kelly 1/2/2010 1 3
9 Kelly 1/3/2010 1 3
10 Nancy 1/4/2010 0 4
11 Nancy 1/5/2010 0 4
12 Nancy 1/6/2010 1 5
13 Nancy 1/7/2010 0 5
What I want is to get the rows per "name" per "group" with minimum datetime after the value becomes 1. From the above example, I would need to get
3 Mark 1/3/2010 0 1
6 Mark 1/6/2010 1 2
9 Kelly 1/3/2010 1 3
13 Nancy 1/7/2010 0 5
Based on the description of your rules, I believe the output will actually be a bit different since 2010-01-05 was the first DateTime where the Value = 1 for Group 2 for Mark.
ID Name DateTime Value Group
3 Mark 2010-01-03 0 1
6 Mark 2010-01-06 1 2
9 Kelly 2010-01-03 1 3
13 Nancy 2010-01-07 0 5
The below code will work as demonstrated in this SQLFiddle.
SELECT sub.ID
, sub.Name
, sub.[DateTime]
, sub.Value
, sub.[Group]
FROM
(SELECT t.ID
, t.Name
, t.[DateTime]
, t.Value
, t.[Group]
, SequentialOrder = ROW_NUMBER() OVER
(PARTITION BY t.Name, t.[Group]
ORDER BY t.[DateTime])
FROM Test t
JOIN
(SELECT Name
, [Group]
, MinimumDateTime = MIN([DateTime])
FROM Test
WHERE Value = 1
GROUP BY Name
, [Group]) mint
ON t.Name = mint.Name
AND t.[Group] = mint.[Group]
WHERE t.[DateTime] > mint.MinimumDateTime) sub
WHERE sub.SequentialOrder = 1
ORDER BY ID;
Below is my query and it goes on assumption that records are received in order of their dates
WITH TBL_1 AS
(
SELECT A.*, ROW_NUMBER() OVER(PARTITION BY NAME, GROUP ORDER BY DATE) AS RN
FROM TABLE
WHERE (NAME, GROUP) IN
(SELECT NAME, GROUP FROM TABLE WHERE VALUE = 1)
),
TBL_2 AS
(
SELECT * FROM TBL_1 WHERE VALUE = 1
),
TBL_3 AS
(
SELECT A.*
FROM TBL_1 AS A
INNER JOIN TBL_2 AS B
ON B.NAME = A.NAME
AND B.GROUP = A.GROUP
AND A.RN > B.RN
)
SELECT *
FROM TBL_3
WHERE (NAME, GROUP, DATE) IN
(SELECT NAME, GROUP, MIN(DATE) FROM TBL_3 GROUP BY NAME, GROUP)
In SQL Server 2012 you can do this:
SELECT * FROM (
SELECT DISTINCT
ID,
Name,
DateTime,
Value,
Gr,
LAG(ID) OVER (PARTITION BY Name, Gr ORDER BY DateTime) F
FROM (
SELECT
ID,
Name,
DateTime,
Value,
Gr,
CASE WHEN LAG(Value) OVER (PARTITION BY Name, Gr ORDER BY DateTime) = 1 THEN 1 ELSE 0 END F
FROM
T
) TT
WHERE F = 1
) TT WHERE F IS NULL
ORDER BY Gr, Name, DateTime
Fiddle: http://www.sqlfiddle.com/#!6/5a0fa2/19
using window functions:
with cte as (
select
*,
row_number() over(partition by [Group], Name order by [DateTime]) as rn,
dense_rank() over(order by [Group], Name) as rnk
from Table1
)
select c1.*
from cte as c1
inner join cte as c2 on c2.rn = c1.rn - 1 and c2.rnk = c1.rnk and c2.Value = 1
where
not exists (select * from cte as c3 where c3.rn <= c1.rn - 2 and c3.rnk = c1.rnk and c3.Value = 1)
or apply:
select t1.*
from Table1 as t1
cross apply (
select top 1 t2.Value, t2.DateTime
from Table1 as t2
where
t2.[Group] = t1.[Group] and t2.Name = t1.Name and
t2.[DateTime] < t1.[DateTime]
order by t2.[Datetime] desc
) as t2
where
t2.Value = 1 and
not exists (
select *
from Table1 as t3
where
t3.[Group] = t1.[Group] and t3.Name = t1.Name and
t3.[DateTime] < t2.[DateTime] and t3.Value = 1
)
sql fiddle demo
update forgot to mention that your output seems to be incorrect - there should id = 6 instead of 5 in second row (see sql fiddle).

Accumulate a summarized column

I could need some help with a SQL statement. So I have the table "cont" which looks like that:
cont_id name weight
----------- ---------- -----------
1 1 10
2 1 20
3 2 40
4 2 15
5 2 20
6 3 15
7 3 40
8 4 60
9 5 10
10 6 5
I then summed up the weight column and grouped it by the name:
name wsum
---------- -----------
2 75
4 60
3 55
1 30
5 10
6 5
And the result should have a accumulated column and should look like that:
name wsum acc_wsum
---------- ----------- ------------
2 75 75
4 60 135
3 55 190
1 30 220
5 10 230
6 5 235
But I didn't manage to get the last statement working..
edit: this Statement did it (thanks Gordon)
select t.*,
(select sum(wsum) from (select name, SUM(weight) wsum
from cont
group by name)
t2 where t2.wsum > t.wsum or (t2.wsum = t.wsum and t2.name <= t.name)) as acc_wsum
from (select name, SUM(weight) wsum
from cont
group by name) t
order by wsum desc
So, the best way to do this is using cumulative sum:
select t.*,
sum(wsum) over (order by wsum desc) as acc_wsum
from (<your summarized query>) t
The order by clause makes this cumulative.
If you don't have that capability (in SQL Server 2012 and Oracle), a correlated subquery is an easy way to do it, assuming the summed weights are distinct values:
select t.*,
(select sum(wsum) from (<your summarized query>) t2 where t2.wsum >= t.wsum) as acc_wsum
from (<your summarized query>) t
This should work in all dialects of SQL. To work with situations where the accumulated weights might have duplicates:
select t.*,
(select sum(wsum) from (<your summarized query>) t2 where t2.wsum > t.wsum or (t2.wsum = t.wsum and t2.name <= t.name) as acc_wsum
from (<your summarized query>) t
try this
;WITH CTE
AS
(
SELECT *,
ROW_NUMBER() OVER(ORDER BY wsum) rownum
FROM #table1
)
SELECT
c1.name,
c1.wsum,
acc_wsum= (SELECT SUM(c2.wsum)
FROM cte c2
WHERE c2.rownum <= c1.rownum)
FROM CTE c1;
or you can join instead of using subquery
;WITH CTE
AS
(
SELECT *,
ROW_NUMBER() OVER(ORDER BY usercount) rownum
FROM #table1
)
SELECT
c1.name,
c1.wsum,
acc_wsum= SUM(c2.wsum)
FROM CTE c1
INNER JOIN CTE c2 ON c2.rownum <= c1.rownum
GROUP BY c1.name, c1.wsum;