SQL Cumulative Count - sql

I have table with departments. I need to count how many people are within which dept. This is easily done by
SELECT DEPT,
COUNT(*) as 'Total'
FROM SR
GROUP BY DEPT;
Now I need to also do cumulative count as below:
I have found some SQL to count running total, but not case like this one. Could you provide me some advice in this case, please?

Here's a way to do it with a CTE instead of a cursor:
WITH Base AS
(
SELECT ROW_NUMBER() OVER (ORDER BY [Count] DESC) RowNum,
[Dept],
[Count]
FROM SR
)
SELECT SR.Dept, SR.Count, SUM(SR2.[Count]) Total
FROM Base SR
INNER JOIN Base SR2
ON SR2.RowNum <= SR.RowNum
GROUP BY SR.Dept, SR.Count
ORDER BY SR.[Count] DESC
Note that this is ordering by descending Count like your sample result does. If there's some other column that's not shown that should be used for ordering just replace Count in each of the ORDER BY clauses.
SQL Fiddle Demo

I think you can use some temporary / variable table for this, and use solution from here:
declare #Temp table (rn int identity(1, 1) primary key, dept varchar(128), Total int)
insert into #Temp (dept, Total)
select
dept, count(*) as Total
from SR
group by dept
;with cte as (
select T.dept, T.Total, T.Total as Cumulative, T.rn
from #Temp as T
where T.rn = 1
union all
select T.dept, T.Total, T.Total + C.Cumulative as Cumulative, T.rn
from cte as C
inner join #Temp as T on T.rn = C.rn + 1
)
select C.dept, C.Total, C.Cumulative
from cte as C
option (maxrecursion 0)
sql fiddle demo
There're some other solutions, but this one is fastest for SQL Server 2008, I think.

If it is possible to add an identity column to the table - then the solution is easier;
create table #SQLCumulativeCount
(
id int identity(1,1),
Dept varchar(100),
Count int
)
insert into #SQLCumulativeCount (Dept,Count) values ('PMO',106)
insert into #SQLCumulativeCount (Dept,Count) values ('Finance',64)
insert into #SQLCumulativeCount (Dept,Count) values ('Operations',41)
insert into #SQLCumulativeCount (Dept,Count) values ('Infrastructure',22)
insert into #SQLCumulativeCount (Dept,Count) values ('HR',21)
select *,
sum(Count) over(order by id rows unbounded preceding) as Cumulative
from #SQLCumulativeCount

Related

Auto increment the column value by 1 conditionally in select query in sql

I am having input table like below:
and in a select query without alteration of table need an output like:
drop table if exists T;
create table T(id int, nm nvarchar(10))
GO
insert T(id, nm) values (1,'r'),(2,'r'),(3,null),(4,'r')
SELECT * FROM T
GO
-- solution:
select
id, nm,
CASE WHEN nm is not null then count(nm) over (order by id) ELSE NULL END
from T
GO
compare execution plan of all solutions (using SQL 2017) :-)
My solution 21%; LukStorms solution 38%; Ian-Fogelman solution 41%
Choose your solution after you test in your specific server!
You can calculate a row_number partitioned on whether "nm" is null, then only show the calculated row_number when "nm" is not null.
Example snippet:
declare #T table (id int identity(1,1) primary key, nm varchar(8));
insert into #T (nm) values ('R'),('R'),(null),('R');
select *,
iif(nm is null,null,row_number() over (partition by iif(nm is null,1,0) order by id)) as [Count]
from #T
order by id
;WITH CTE AS
(
SELECT ID,
ROW_NUMBER() OVER(PARTITION BY 1 ORDER BY ID) AS [COUNT]
FROM [TABLE] WHERE NM IS NOT NULL
)
SELECT S.ID,
S.NM,
CTE.[COUNT]
FROM [TABLE] AS S LEFT JOIN CTE AS CTE ON S.ID = CTE.ID
You can start with a CTE that adds a ROW_NUMBER() column and filters out rows WHERE 'nm' IS NULL.
Then SELECT from your table and OUTER JOIN to the CTE, using the ROW_NUMBER column to populate your Count column.

aggregation according to different conditions on same column

I have a table #tbl like below, i need to write a query like if there are more than 3 records availble
for particular cid then avg(val of particular cid ) for particular cid should be dispalyed against each id and if there are less than
3 records availble for particular cid then avg(val of all records availble).
Please suggest.
declare #tbl table(id int, cid int, val float )
insert into #tbl
values(1,100,20),(2,100,30),(3,100,25),(4,100,31),(5,100,50),
(6,200,30),(7,200,30),(8,300,90)
Your description is not clear, but I believe you need windowed functions:
WITH cte AS (
SELECT *, COUNT(*) OVER(PARTITION BY cid) AS cnt
FROM #tbl
)
SELECT id, (SELECT AVG(val) FROM cte) AS Av
FROM cte
WHERE cnt <=3
UNION ALL
SELECT id, AVG(val) OVER(PARTITION BY cid) AS Av
FROM cte
WHERE cnt > 3
ORDER BY id;
DBFiddle Demo
EDIT:
SELECT id,
CASE WHEN COUNT(*) OVER(PARTITION BY cid) <= 3 THEN AVG(val) OVER()
ELSE AVG(val) OVER(PARTITION BY cid)
END
FROM #tbl
ORDER BY id;
DBFiddle Demo2
You can try with the following. First calculate the average for each Cid depending in it's number of occurences, then join each Cid with the Id to display all table.
;WITH CidAverages AS
(
SELECT
T.cid,
Average = CASE
WHEN COUNT(1) >= 3 THEN AVG(T.val)
ELSE (SELECT AVG(Val) FROM #tbl) END
FROM
#tbl AS T
GROUP BY
T.cid
)
SELECT
T.*,
C.Average
FROM
#tbl AS T
INNER JOIN CidAverages AS C ON T.cid = C.cid
Given the clarifications in comments, I am thinking this is the intention
declare #tbl table(id int, cid int, val float )
insert into #tbl
values(1,100,20),(2,100,30),(3,100,25),(4,100,31),(5,100,50),
(6,200,30),(7,200,30),(8,300,90);
select distinct
cid
, case
when count(*) over (partition by cid) > 3 then avg(val) over (partition by cid)
else avg (val) over (partition by 1)
end as avg
from #tbl;
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=fdf4c4457220ec64132de7452a034976
cid avg
100 31.2
200 38.25
300 38.25
There are a number of aspects of a query like this that when run at scale though are going to be pretty bad on the query plan, I'd want to test this at a larger scale and tune before using.
The description was not clear on what happened if it was exactly 3, it mentions 'more than 3' and 'less than 3' - within this code the 'more than' was used to determine which category it was in, and less than interpreted to mean 'less than or equal to 3'

2 rows differences

I would like to get 2 consecutive rows from an SQL table.
One of the columns storing UNIX datestamp and between 2 rows the difference only this value.
For example:
id_int dt_int
1. row 8211721 509794233
2. row 8211722 509794233
I need only those rows where dt_int the same (edited)
Do you want both lines to be shown?
A solution could be this:
with foo as
(
select
*
from (values (8211721),(8211722),(8211728),(8211740),(8211741)) a(id_int)
)
select
id_int
from
(
select
id_int
,id_int-isnull(lag(id_int,1) over (order by id_int) ,id_int-6) prev
,isnull(lead(id_int,1) over (order by id_int) ,id_int+6)-id_int nxt
from foo
) a
where prev<=5 or nxt<=5
We use lead and lag, to find the differences between rows, and keep the rows where there is less than or equal to 5 for the row before or after.
If you use 2008r2, then lag and lead are not available. You could use rownumber in stead:
with foo as
(
select
*
from (values (8211721),(8211722),(8211728),(8211740),(8211741)) a(id_int)
)
, rownums as
(
select
id_int
,row_number() over (order by id_int) rn
from foo
)
select
id_int
from
(
select
cur.id_int
,cur.id_int-prev.id_int prev
,nxt.id_int-cur.id_int nxt
from rownums cur
left join rownums prev
on cur.rn-1=prev.rn
left join rownums nxt
on cur.rn+1=nxt.rn
) a
where isnull(prev,6)<=5 or isnull(nxt,6)<=5
Assuming:
lead() analytical function available.
ID_INT is what we need to sort by to determine table order...
you may need to partition by some value lead(ID_int) over(partition by SomeKeysuchasOrderNumber order by ID_int asc) so that orders and dates don't get mixed together.
.
WITH CTE AS (
SELECT A.*
, lead(ID_int) over ([missing partition info] ORDER BY id_Int asc) - id_int as ID_INT_DIFF
FROM Table A)
SELECT *
FROM CTE
WHERE ID_INT_DIFF < 5;
You can try it. This version works on SQL Server 2000 and above. Today I don not a more recent SQL Server to write on.
declare #t table (id_int int, dt_int int)
INSERT #T SELECT 8211721 , 509794233
INSERT #T SELECT 8211722 , 509794233
INSERT #T SELECT 8211723 , 509794235
INSERT #T SELECT 8211724 , 509794236
INSERT #T SELECT 8211729 , 509794237
INSERT #T SELECT 8211731 , 509794238
;with cte_t as
(SELECT
ROW_NUMBER() OVER (ORDER BY id_int) id
,id_int
,dt_int
FROM #t),
cte_diff as
( SELECT
id_int
,dt_int
,(SELECT TOP 1 dt_int FROM cte_t b WHERE a.id < b.id) dt_int1
,dt_int - (SELECT TOP 1 dt_int FROM cte_t b WHERE a.id < b.id) Difference
FROM cte_t a
)
SELECT DISTINCT id_int , dt_int FROM #t a
WHERE
EXISTS(SELECT 1 FROM cte_diff b where b.Difference =0 and a.dt_int = b.dt_int)

Getting top 2 rows in each group without row_number() in SQL Server

I am looking for a simple query to get result of 2 rows with latest invoice date in each group. Although this task can be accomplished by a row_number() that you can see in below code ,I need an alternative to this with minimum complexity.
Code :
create table #tt
(
id int,
invoiceDT datetime
)
insert into #tt
values(1,'01-01-2016 00:12'),(1,'01-02-2016 06:16'),(1,'01-01-2016 00:16')
,(2,'01-01-2016 01:12'),(2,'04-02-2016 06:16'),(2,'01-06-2016 00:16')
select *
from (
SELECT id,invoiceDT,row_number() over(partition by id order by invoiceDT desc) as rownum
FROM #tt
)tmp
where rownum <=2
I need same result that is returned by above query
Please suggest an alternative.
Strange request, but here you go:
WITH CTE as
(
SELECT distinct id FROM #tt t1
)
SELECT x.*
FROM CTE
CROSS APPLY
(
SELECT top 2 *
FROM #tt
WHERE CTE.id = id
ORDER BY invoiceDT desc
) x

Delete one row from same rows

I have a table T with (first, second) columns. I have two rows with first=1 and second=2. I would like to delete just one of the rows. How do I do that?
;WITH CTE AS
(
SELECT TOP 1 *
FROM YourTable
WHERE first=1 and second=2
)
DELETE FROM CTE;
Or if SQL Server 2000
DELETE T
FROM (
SELECT TOP 1 *
FROM YourTable
WHERE [first]=1 and [second]=2
) T;
Then add a primary key.
You can use ROW_NUMBER().
DECLARE #T as Table(First int , Second int )
INsert Into #T
Values (1,2),
(1,2)
SELECT * FROM #T
;WITH CTE as
(SELECT ROW_NUMBER() over (order by first,second) rn , * from #T)
DELETE FROM CTE where rn = 1
select * from #T
If you change rn to include Partition by
ROW_NUMBER() over (PARTITION BY first, second order by first,second)
and change the where to be WHERE RN <> 1
you could use this as a general solution to remove any dupes on First, Second