SQL windows function partition by null values - sql

I have a data-set:
year id
NULL 123
NULL 124
NULL 125
1932 126
1932 127
1933 128
1933 129
1934 130
I would like to create a running count, where I have the group of year with NULL values as one group and the other group with non-null values, namely.
year count
NULL 3
1932 2
1933 4
1934 5
I have tried to do this by union of two windows function data set, namely:
select distinct year,
count(id) over (order by year asc)
from data
where year is null
union
select distinct year,
count(id) over (order by year asc)
from data
where year is not null;
I was wondering if there is a cleaner way of doing this such as:
select distinct year,
count(id) over (partition by <whether year is null condition> order by year
asc)
from data;
MY sql version is db2.

try this:
DECLARE #tab TABLE(year INT, id INT)
INSERT INTO #tab VALUES( NULL,123)
INSERT INTO #tab VALUES(NULL,124)
INSERT INTO #tab VALUES(NULL,125)
INSERT INTO #tab VALUES(1932,126)
INSERT INTO #tab VALUES(1932,127)
INSERT INTO #tab VALUES(1933,128)
INSERT INTO #tab VALUES(1933,129)
INSERT INTO #tab VALUES(1934,130)
SELECT D.year, MAX(D.RN)Count
FROM(
SELECT year,SUM(1) OVER(PARTITION BY CASE WHEN year IS NULL THEN 1 ELSE 0 END ORDER BY id) RN FROM #tab
)D
GROUP BY D.year
Output:
year Count
NULL 3
1932 2
1933 4
1934 5

Union all will get your required output, its hard way to achieve this but yet would get output
declare #table table (year int, id int)
insert #table
(year,id)
select
NULL , 123 union all
select NULL , 124 union all
select NULL , 125 union all
select 1932 , 126 union all
select 1932 , 127 union all
select 1933 , 128 union all
select 1933 , 129 union all
select 1934 , 130
select Runningtotal, year from
(
select SUM(count) over (order by year) RunningTotal ,year from
(
select count(*) count,year from #table group by year ) x
where year is not null
union all
select SUM(count) over (order by year) Runningtotal ,year from
(
select count(*) count,year from #table group by year ) x
where year is null
) y order by year

Here is an option which does not use analytic functions:
SELECT DISTINCT t1.col,
CASE WHEN t1.col IS NULL
THEN
(SELECT COUNT(*) FROM data t2 WHERE t1.year IS NULL AND t2.year IS NULL)
ELSE
(SELECT COUNT(t2.id) FROM data t2
WHERE t1.year = t2.year OR (t2.id <= t1.id AND t2.year IS NOT NULL))
END cnt
FROM data t1;

Method 1 :
select distinct year, f3.NB
from tmpxx f1
left outer join lateral
(
select count(*) NB from tmpxx f2
where f1.year is null and f2.year is null or
f1.year>=f2.year
) f3 on 1=1

Method 2:
select distinct year,
( select count(*) NB from tmpxx f2
where f1.year is null and f2.year is null or f1.year>=f2.year
) nb
from tmpxx f1

Related

How can I select distinct by one column?

I have a table with the columns below, and I need to get the values if COD is duplicated, get the non NULL on VALUE column. If is not duplicated, it can get a NULL VALUE. Like the example:
I'm using SQL SERVER.
This is what I get:
COD ID VALUE
28 1 NULL
28 2 Supermarket
29 1 NULL
29 2 School
29 3 NULL
30 1 NULL
This is what I want:
COD ID VALUE
28 2 Supermarket
29 2 School
30 1 NULL
What I'm tryin' to do:
;with A as (
(select DISTINCT COD,ID,VALUE from CodId where ID = 2)
UNION
(select DISTINCT COD,ID,NULL from CodId where ID != 2)
)select * from A order by COD
You can try this.
DECLARE #T TABLE (COD INT, ID INT, VALUE VARCHAR(20))
INSERT INTO #T
VALUES(28, 1, NULL),
(28, 2 ,'Supermarket'),
(29, 1 ,NULL),
(29, 2 ,'School'),
(29, 3 ,NULL),
(30, 1 ,NULL)
;WITH CTE AS (
SELECT *, RN= ROW_NUMBER() OVER (PARTITION BY COD ORDER BY VALUE DESC) FROM #T
)
SELECT COD, ID ,VALUE FROM CTE
WHERE RN = 1
Result:
COD ID VALUE
----------- ----------- --------------------
28 2 Supermarket
29 2 School
30 1 NULL
Another option is to use the WITH TIES clause in concert with Row_Number()
Example
Select top 1 with ties *
from YourTable
Order By Row_Number() over (Partition By [COD] order by Value Desc)
Returns
COD ID VALUE
28 2 Supermarket
29 2 School
30 1 NULL
I would use GROUP BY and JOIN. If there is no NOT NULL value for a COD than it should be resolved using the OR in JOIN clause.
SELECT your_table.*
FROM your_table
JOIN (
SELECT COD, MAX(value) value
FROM your_table
GROUP BY COD
) gt ON your_table.COD = gt.COD and (your_table.value = gt.value OR gt.value IS NULL)
If you may have more than one non null value for a COD this will work
drop table MyTable
CREATE TABLE MyTable
(
COD INT,
ID INT,
VALUE VARCHAR(20)
)
INSERT INTO MyTable
VALUES (28,1, NULL),
(28,2,'Supermarket'),
(28,3,'School'),
(29,1,NULL),
(29,2,'School'),
(29,3,NULL),
(30,1,NULL);
WITH Dups AS
(SELECT COD FROM MyTable GROUP BY COD HAVING count (*) > 1 )
SELECT MyTable.COD,MyTable.ID,MyTable.VALUE FROM MyTable
INNER JOIN dups ON MyTable.COD = Dups.COD
WHERE value IS NOT NULL
UNION
SELECT MyTable.COD,MyTable.ID,MyTable.VALUE FROM MyTable
LEFT JOIN dups ON MyTable.COD = Dups.COD
WHERE dups.cod IS NULL

Complex join in sql with top 10 row

Table1:
Id Word Frequency
1 A 1
2 B 5
Table2:
Id Word SecondWord SecondFrequency
1 A A1 1
2 A A2 5
3 A A3 10
4 A A4 9
5 A A5 20
6 B B1 5
7 B B2 8
8 B B3 50
9 B B4 40
10 B B5 68
Required output
Top 3 record from “Table2” with Order by SecondFrequency Desc
Ex.
Word Frequency SecondWord SecondFrequency
A 1 A5 20
A 1 A3 10
A 1 A4 9
B 5 B5 68
B 5 B3 50
B 5 B4 40
How can i get the desire output
Use ROWNUMBER function based on second frequency for get you required result:
CREATE TABLE #Table1(Id TINYINT, Word VARCHAR(1),Frequency TINYINT)
CREATE TABLE #Table2(Id TINYINT, Word VARCHAR(1),SecondWord
VARCHAR(2),SecondFrequency TINYINT)
INSERT INTO #Table1(Id, Word ,Frequency)
SELECT 1,'A',1 UNION ALL
SELECT 2,'B',5
INSERT INTO #Table2(Id, Word ,SecondWord ,SecondFrequency)
SELECT 1,'A','A1',1 UNION ALL
SELECT 2,'A','A2',5 UNION ALL
SELECT 3,'A','A3',10 UNION ALL
SELECT 4,'A','A4',9 UNION ALL
SELECT 5,'A','A5',20 UNION ALL
SELECT 6,'B','B1',5 UNION ALL
SELECT 7,'B','B2',8 UNION ALL
SELECT 8,'B','B3',50 UNION ALL
SELECT 9,'B','B4',40 UNION ALL
SELECT 10,'B','B5',68
SELECT *
FROM
(
SELECT ROW_NUMBER() OVER(PARTITION BY #Table1.Word ORDER BY
SecondFrequency DESC ) RNo ,#Table1.Word ,#Table1.Frequency,
SecondWord ,SecondFrequency
FROM #Table1
JOIN #Table2 ON #Table1.Word = #Table2.Word
) A
WHERE RNo BETWEEN 1 AND 3
you can use Row Number. By using Row Number you can give each row with the same 'word' a number based on their SecondFrequency. those number will be reset if the 'word' is changed.
;with cte as
(
select *, ROW_NUMBER() OVER (PARTITION BY Word ORDER BY SecondFrequency DESC) AS RowNumber from table2
)
select A.Word, B.Frequency, A.SecondWord, A.SecondFrequency
from cte A left join table1 B
on A.Word = B.Word
where A.RowNumber < 4
Inner Join with Row_Number() will help in this case !!!
CREATE TABLE #Table1
(
Id INT
,Word VARCHAR(10)
,Frequency INT
)
INSERT INTO #Table1 SELECT 1,'A',1
UNION SELECT 2,'B',5
CREATE TABLE #Table2
(
Id INT
,Word VARCHAR(10)
,SecondWord VARCHAR(10)
,SecondFrequency INT
)
INSERT INTO #Table2 SELECT
1,'A','A1',1 UNION ALL SELECT
2,'A','A2',5 UNION ALL SELECT
3,'A','A3',10 UNION ALL SELECT
4,'A','A4',9 UNION ALL SELECT
5,'A','A5',20 UNION ALL SELECT
6,'B','B1',5 UNION ALL SELECT
7,'B','B2',8 UNION ALL SELECT
8,'B','B3',50 UNION ALL SELECT
9,'B','B4',40 UNION ALL SELECT
10,'B','B5',68
SELECT * FROM #Table1
SELECT * FROM #Table2
SELECT X.Word,X.Frequency,X.SecondWord,X.SecondFrequency
FROM
(SELECT T1.Word,T1.Frequency,T2.SecondWord,T2.SecondFrequency,ROW_NUMBER() OVER(PARTITION BY T1.WORD ORDER BY T2.SecondFrequency desc) as RN
FROM #Table1 T1
JOIN #Table2 T2
ON T1.Word = T2.Word
) AS X
WHERE X.RN<=3
get the top 3 rows from Table_2
join the Table_1
the syntax is : ROW_NUMBER() OVER(PARTITION BY COL1 ORDER BY COL2) AS num
COL1 is the column to group and COL2 is the column to sort , num is the sorted number to be used to limit the results
SELECT t2.Word,
t1.Frequency,
t2.SecondWord,
t2.SecondFrequency
FROM
(SELECT *
FROM
(SELECT Word,
SecondWord,
SecondFrequency,
ROW_NUMBER() over(PARTITION BY Word
ORDER BY SecondFrequency DESC) AS num
FROM Table_2) T
WHERE T.num <= 3 ) t2
JOIN Table_1 AS t1 ON t2.Word = t1.Word
ORDER BY t2.SecondFrequency DESC;

How to get running sum of a column in sql server

Hi I have a column with name Qty from table Bills
i want a column that show the running sum of Qty column like this :
Qty Run_Sum
1 1
2 3
3 6
4 10
5 15
Suggest me some appropriate method to make running some thankx
if you RDBMS supports window function,
for SQL Server 2012
SELECT Qty,
SUM(Qty) OVER (ORDER BY Qty) AS CumulativeTOTAL
FROM tableName
SQLFiddle Demo
for SQL Server 2008
SELECT a.Qty, (SELECT SUM(b.Qty)
FROM TableName b
WHERE b.Qty <= a.Qty)
FROM TableName a
ORDER BY a.Qty;
SQLFiddle Demo
SQLFiddle demo
SELECT Qty,
SUM(Qty) OVER (ORDER BY Qty) Run_Sum
FROM t ORDER BY Qty
For SQLServer prior to 2012:
select Qty,
(select sum(Qty) from t where Qty<=t1.Qty)
from t t1 order by Qty
SQLFiddle demo
Or also you can do it without subquery:
select t1.Qty, sum(t2.Qty)
from t t1
join t t2 on (t1.Qty>=t2.Qty)
group by t1.Qty
order by t1.Qty
SQLFiddle demo
Here's a sample using Oracle/analytical functions:
select id, qty, sum(qty) over(order by id asc) run_sum
from test;
http://www.sqlfiddle.com/#!4/3d149/1
Check this
DECLARE #TEMP table
(
ID int IDENTITY(1,1),
QUANTITY int
)
INSERT INTO #TEMP
SELECT 1 UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 4 UNION ALL
SELECT 8 UNION ALL
SELECT 7 UNION ALL
SELECT 5 UNION ALL
SELECT 1
SELECT t.QUANTITY AS Qty, SUM(t1.QUANTITY) AS Run_Sum
FROM #TEMP t
INNER JOIN #TEMP t1
ON t1.ID <= t.ID
GROUP BY t.ID, t.QUANTITY
ORDER BY t.ID
;with cte as (
select top 1 Qty, Qty as RunningSum
from Bills
order by Qty
union all
select t.Qty, cte.RunningSum + t.Qty
from cte
inner join Bills t on cte.Qty + 1 = t.Qty
)
select * from cte
#mahmud:
See what this gives
DECLARE #Bills table
(
QUANTITY int
)
INSERT INTO #Bills
SELECT 2 UNION ALL
SELECT 6 UNION ALL
SELECT 7 UNION ALL
SELECT 1 UNION ALL
SELECT 3 UNION ALL
SELECT -5 UNION ALL
SELECT 5 UNION ALL
select 1
;with cte as (
select top 1 QUANTITY, QUANTITY as RunningSum
from #Bills
order by QUANTITY
union all
select t.QUANTITY, cte.RunningSum + t.QUANTITY
from cte
inner join #Bills t on cte.QUANTITY + 1 = t.QUANTITY
)
select * from cte

Is it possible to write a sql query that is grouped based on a running total of a column?

It would be easier to explain with an example. Suppose I wanted to get at most 5 items per group.
My input would be a table looking like this:
Item Count
A 2
A 3
A 3
B 4
B 4
B 5
C 1
And my desired output would look like this:
Item Count
A 5
A>5 3
B 4
B>5 9
C 1
An alternative output that I could also work with would be
Item Count RunningTotal
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
I can use ROW_NUMBER() to get the top X records in each group, however my requirement is to get the top X items for each group, not X records. My mind is drawing a blank as to how to do this.
declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.Item, t1.Count, sum(t2.count) as RunningTotal from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
Result:
Item Count RunningTotal
---- ----------- ------------
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
Considering the clarifications from your comment, you should be able to produce the second kid of output from your post by running this query:
select t.Item
, t.Count
, (select sum(tt.count)
from mytable tt
where t.item=tt.item and (tt.creating_user_priority < t.creating_user_priority or
( tt.creating_user_priority = t.creating_user_priority and tt.created_date < t.createdDate))
) as RunningTotal
from mytable t
declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.row, t1.Item, t1.Count, sum(t2.count) as RunningTotal
into #RunTotal
from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
alter table #RunTotal
add GrandTotal int
update rt
set GrandTotal = gt.Total
from #RunTotal rt
left join (
select Item, sum(Count) Total
from #RunTotal rt
group by Item) gt
on rt.Item = gt.Item
select Item, max(RunningTotal)
from #RunTotal
where RunningTotal <= 5
group by Item
union
select a.Item + '>5', total - five
from (
select Item, max(GrandTotal) total
from #RunTotal
where GrandTotal > 5
group by Item
) a
left join (
select Item, max(RunningTotal) five
from #RunTotal
where RunningTotal <= 5
group by Item
) b
on a.Item = b.Item
I've updated the accepted answer and got your desired result.
SELECT Item, SUM(Count)
FROM mytable t
GROUP BY Item
HAVING SUM(Count) <=5
UNION
SELECT Item, 5
FROM mytable t
GROUP BY Item
HAVING SUM(Count) >5
UNION
SELECT t2.Item + '>5', Sum(t2.Count) - 5
FROM mytable t2
GOUP BY Item
HAVING SUM(Count) > 5
ORDER BY 1, 2
select 'A' as Name, 2 as Cnt
into #tmp
union all select 'A',3
union all select 'A',3
union all select 'B',4
union all select 'B',4
union all select 'B',5
union all select 'C',1
select Name, case when sum(cnt) > 5 then 5 else sum(cnt) end Cnt
from #tmp
group by Name
union
select Name+'>5', sum(cnt)-5 Cnt
from #tmp
group by Name
having sum(cnt) > 5
Here is what I have so far. I know it's not complete but... this should be a good starting point.
I can get your second output by using a temp table and an update pass:
DECLARE #Data TABLE
(
ID INT IDENTITY(1,1) PRIMARY KEY
,Value VARCHAR(5)
,Number INT
,Total INT
)
INSERT INTO #Data (Value, Number) VALUES ('A',2)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',5)
INSERT INTO #Data (Value, Number) VALUES ('C',1)
DECLARE
#Value VARCHAR(5)
,#Count INT
UPDATE #Data
SET
#Count = Total = CASE WHEN Value = #Value THEN Number + #Count ELSE Number END
,#Value = Value
FROM #Data AS D
SELECT
Value
,Number
,Total
FROM #Data
There may be better ways, but this should work.

Coalesce over Rows in MSSQL 2008,

I'm trying to determine the best approach here in MSSQL 2008.
Here is my sample data
TransDate Id Active
-------------------------
1/18 1pm 5 1
1/18 2pm 5 0
1/18 3pm 5 Null
1/18 4pm 5 1
1/18 5pm 5 0
1/18 6pm 5 Null
If grouped by Id and ordered by the TransDate, I want the last Non Null Value for the Active Column, and the MAX of TransDate
SELECT MAX(TransDate) AS TransDate,
Id,
--LASTNonNull(Active) AS Active
Here would be the results:
TransDate Id Active
---------------------
1/18 6pm 5 0
It would be like a Coalesce but over the rows, instead of two values/columns.
There would be many other columns that would also have this similiar method applied, so I really don't want to make a seperate join for each of the columns.
Any ideas?
I'd probably use a correlated sub query.
SELECT MAX(TransDate) AS TransDate,
Id,
(SELECT TOP (1) Active
FROM T t2
WHERE t2.Id = t1.Id
AND Active IS NOT NULL
ORDER BY TransDate DESC) AS Active
FROM T t1
GROUP BY Id
A way without
SELECT
Id,
MAX(TransDate) AS TransDate,
CAST(RIGHT(MAX(CONVERT(CHAR(23),TransDate,121) + CAST(Active AS CHAR(1))),1) AS BIT) AS Active,
/*You can probably figure out a more efficient thing to
compare than the above depending on your data. e.g.*/
CAST(MAX(DATEDIFF(SECOND,'19500101',TransDate) * CAST(10 AS BIGINT) + Active)%10 AS BIT) AS Active2
FROM T
GROUP BY Id
Or following the comments would cross apply work better for you?
WITH T (TransDate, Id, Active, SomeOtherColumn) AS
(
select GETDATE(), 5, 1, 'A' UNION ALL
select 1+GETDATE(), 5, 0, 'B' UNION ALL
select 2+GETDATE(), 5, null, 'C' UNION ALL
select 3+GETDATE(), 5, 1, 'D' UNION ALL
select 4+GETDATE(), 5, 0, 'E' UNION ALL
select 5+GETDATE(), 5, null,'F'
),
T1 AS
(
SELECT MAX(TransDate) AS TransDate,
Id
FROM T
GROUP BY Id
)
SELECT T1.TransDate,
Id,
CA.Active AS Active,
CA.SomeOtherColumn AS SomeOtherColumn
FROM T1
CROSS APPLY (SELECT TOP (1) Active, SomeOtherColumn
FROM T t2
WHERE t2.Id = T1.Id
AND Active IS NOT NULL
ORDER BY TransDate DESC) CA
This example should help, using analytical functions Max() OVER and Row_Number() OVER
create table tww( transdate datetime, id int, active bit)
insert tww select GETDATE(), 5, 1
insert tww select 1+GETDATE(), 5, 0
insert tww select 2+GETDATE(), 5, null
insert tww select 3+GETDATE(), 5, 1
insert tww select 4+GETDATE(), 5, 0
insert tww select 5+GETDATE(), 5, null
select maxDate as Transdate, id, Active
from (
select *,
max(transdate) over (partition by id) maxDate,
ROW_NUMBER() over (partition by id
order by case when active is not null then 0 else 1 end, transdate desc) rn
from tww
) x
where rn=1
Another option, quite expensive, would be doing it through XML. For educational purposes only
select
ID = n.c.value('#id', 'int'),
trandate = n.c.value('(data/transdate)[1]', 'datetime'),
active = n.c.value('(data/active)[1]', 'bit')
from
(select xml=convert(xml,
(select id [#id],
( select *
from tww t
where t.id=tww.id
order by transdate desc
for xml path('data'), type)
from tww
group by id
for xml path('node'), root('root'), elements)
)) x cross apply xml.nodes('root/node') n(c)
It works on the principle that the XML generated has each record as a child node of the ID. Null columns have been omitted, so the first column found using xpath (child/columnname) is the first non-null value similar to COALESCE.
You could use a subquery:
SELECT MAX(TransDate) AS TransDate
, Id
, (
SELECT TOP 1 t2.Active
FROM YourTable t2
WHERE t1.id = t2.id
and t2.Active is not null
ORDER BY
t2.TransDate desc
)
FROM YourTable t1
I created a temp table named #temp to test my solution, and here is what I came up with:
transdate id active
1/1/2011 12:00:00 AM 5 1
1/2/2011 12:00:00 AM 5 0
1/3/2011 12:00:00 AM 5 null
1/4/2011 12:00:00 AM 5 1
1/5/2011 12:00:00 AM 5 0
1/6/2011 12:00:00 AM 5 null
1/1/2011 12:00:00 AM 6 2
1/2/2011 12:00:00 AM 6 3
1/3/2011 12:00:00 AM 6 null
1/4/2011 12:00:00 AM 6 2
1/5/2011 12:00:00 AM 6 null
This query...
select max(a.transdate) as transdate, a.id, (
select top (1) b.active
from #temp b
where b.active is not null
and b.id = a.id
order by b.transdate desc
) as active
from #temp a
group by a.id
Returns these results.
transdate id active
1/6/2011 12:00:00 AM 5 0
1/5/2011 12:00:00 AM 6 2
Assuming a table named "test1", how about using ROW_NUMBER, OVER and PARTITION BY?
SELECT transdate, id, active FROM
(SELECT transdate, ROW_NUMBER() OVER(PARTITION BY id ORDER BY transdate desc) AS rownumber, id, active
FROM test1
WHERE active is not null) a
WHERE a.rownumber = 1