Coalesce over Rows in MSSQL 2008, - sql

I'm trying to determine the best approach here in MSSQL 2008.
Here is my sample data
TransDate Id Active
-------------------------
1/18 1pm 5 1
1/18 2pm 5 0
1/18 3pm 5 Null
1/18 4pm 5 1
1/18 5pm 5 0
1/18 6pm 5 Null
If grouped by Id and ordered by the TransDate, I want the last Non Null Value for the Active Column, and the MAX of TransDate
SELECT MAX(TransDate) AS TransDate,
Id,
--LASTNonNull(Active) AS Active
Here would be the results:
TransDate Id Active
---------------------
1/18 6pm 5 0
It would be like a Coalesce but over the rows, instead of two values/columns.
There would be many other columns that would also have this similiar method applied, so I really don't want to make a seperate join for each of the columns.
Any ideas?

I'd probably use a correlated sub query.
SELECT MAX(TransDate) AS TransDate,
Id,
(SELECT TOP (1) Active
FROM T t2
WHERE t2.Id = t1.Id
AND Active IS NOT NULL
ORDER BY TransDate DESC) AS Active
FROM T t1
GROUP BY Id
A way without
SELECT
Id,
MAX(TransDate) AS TransDate,
CAST(RIGHT(MAX(CONVERT(CHAR(23),TransDate,121) + CAST(Active AS CHAR(1))),1) AS BIT) AS Active,
/*You can probably figure out a more efficient thing to
compare than the above depending on your data. e.g.*/
CAST(MAX(DATEDIFF(SECOND,'19500101',TransDate) * CAST(10 AS BIGINT) + Active)%10 AS BIT) AS Active2
FROM T
GROUP BY Id
Or following the comments would cross apply work better for you?
WITH T (TransDate, Id, Active, SomeOtherColumn) AS
(
select GETDATE(), 5, 1, 'A' UNION ALL
select 1+GETDATE(), 5, 0, 'B' UNION ALL
select 2+GETDATE(), 5, null, 'C' UNION ALL
select 3+GETDATE(), 5, 1, 'D' UNION ALL
select 4+GETDATE(), 5, 0, 'E' UNION ALL
select 5+GETDATE(), 5, null,'F'
),
T1 AS
(
SELECT MAX(TransDate) AS TransDate,
Id
FROM T
GROUP BY Id
)
SELECT T1.TransDate,
Id,
CA.Active AS Active,
CA.SomeOtherColumn AS SomeOtherColumn
FROM T1
CROSS APPLY (SELECT TOP (1) Active, SomeOtherColumn
FROM T t2
WHERE t2.Id = T1.Id
AND Active IS NOT NULL
ORDER BY TransDate DESC) CA

This example should help, using analytical functions Max() OVER and Row_Number() OVER
create table tww( transdate datetime, id int, active bit)
insert tww select GETDATE(), 5, 1
insert tww select 1+GETDATE(), 5, 0
insert tww select 2+GETDATE(), 5, null
insert tww select 3+GETDATE(), 5, 1
insert tww select 4+GETDATE(), 5, 0
insert tww select 5+GETDATE(), 5, null
select maxDate as Transdate, id, Active
from (
select *,
max(transdate) over (partition by id) maxDate,
ROW_NUMBER() over (partition by id
order by case when active is not null then 0 else 1 end, transdate desc) rn
from tww
) x
where rn=1
Another option, quite expensive, would be doing it through XML. For educational purposes only
select
ID = n.c.value('#id', 'int'),
trandate = n.c.value('(data/transdate)[1]', 'datetime'),
active = n.c.value('(data/active)[1]', 'bit')
from
(select xml=convert(xml,
(select id [#id],
( select *
from tww t
where t.id=tww.id
order by transdate desc
for xml path('data'), type)
from tww
group by id
for xml path('node'), root('root'), elements)
)) x cross apply xml.nodes('root/node') n(c)
It works on the principle that the XML generated has each record as a child node of the ID. Null columns have been omitted, so the first column found using xpath (child/columnname) is the first non-null value similar to COALESCE.

You could use a subquery:
SELECT MAX(TransDate) AS TransDate
, Id
, (
SELECT TOP 1 t2.Active
FROM YourTable t2
WHERE t1.id = t2.id
and t2.Active is not null
ORDER BY
t2.TransDate desc
)
FROM YourTable t1

I created a temp table named #temp to test my solution, and here is what I came up with:
transdate id active
1/1/2011 12:00:00 AM 5 1
1/2/2011 12:00:00 AM 5 0
1/3/2011 12:00:00 AM 5 null
1/4/2011 12:00:00 AM 5 1
1/5/2011 12:00:00 AM 5 0
1/6/2011 12:00:00 AM 5 null
1/1/2011 12:00:00 AM 6 2
1/2/2011 12:00:00 AM 6 3
1/3/2011 12:00:00 AM 6 null
1/4/2011 12:00:00 AM 6 2
1/5/2011 12:00:00 AM 6 null
This query...
select max(a.transdate) as transdate, a.id, (
select top (1) b.active
from #temp b
where b.active is not null
and b.id = a.id
order by b.transdate desc
) as active
from #temp a
group by a.id
Returns these results.
transdate id active
1/6/2011 12:00:00 AM 5 0
1/5/2011 12:00:00 AM 6 2

Assuming a table named "test1", how about using ROW_NUMBER, OVER and PARTITION BY?
SELECT transdate, id, active FROM
(SELECT transdate, ROW_NUMBER() OVER(PARTITION BY id ORDER BY transdate desc) AS rownumber, id, active
FROM test1
WHERE active is not null) a
WHERE a.rownumber = 1

Related

SQL windows function partition by null values

I have a data-set:
year id
NULL 123
NULL 124
NULL 125
1932 126
1932 127
1933 128
1933 129
1934 130
I would like to create a running count, where I have the group of year with NULL values as one group and the other group with non-null values, namely.
year count
NULL 3
1932 2
1933 4
1934 5
I have tried to do this by union of two windows function data set, namely:
select distinct year,
count(id) over (order by year asc)
from data
where year is null
union
select distinct year,
count(id) over (order by year asc)
from data
where year is not null;
I was wondering if there is a cleaner way of doing this such as:
select distinct year,
count(id) over (partition by <whether year is null condition> order by year
asc)
from data;
MY sql version is db2.
try this:
DECLARE #tab TABLE(year INT, id INT)
INSERT INTO #tab VALUES( NULL,123)
INSERT INTO #tab VALUES(NULL,124)
INSERT INTO #tab VALUES(NULL,125)
INSERT INTO #tab VALUES(1932,126)
INSERT INTO #tab VALUES(1932,127)
INSERT INTO #tab VALUES(1933,128)
INSERT INTO #tab VALUES(1933,129)
INSERT INTO #tab VALUES(1934,130)
SELECT D.year, MAX(D.RN)Count
FROM(
SELECT year,SUM(1) OVER(PARTITION BY CASE WHEN year IS NULL THEN 1 ELSE 0 END ORDER BY id) RN FROM #tab
)D
GROUP BY D.year
Output:
year Count
NULL 3
1932 2
1933 4
1934 5
Union all will get your required output, its hard way to achieve this but yet would get output
declare #table table (year int, id int)
insert #table
(year,id)
select
NULL , 123 union all
select NULL , 124 union all
select NULL , 125 union all
select 1932 , 126 union all
select 1932 , 127 union all
select 1933 , 128 union all
select 1933 , 129 union all
select 1934 , 130
select Runningtotal, year from
(
select SUM(count) over (order by year) RunningTotal ,year from
(
select count(*) count,year from #table group by year ) x
where year is not null
union all
select SUM(count) over (order by year) Runningtotal ,year from
(
select count(*) count,year from #table group by year ) x
where year is null
) y order by year
Here is an option which does not use analytic functions:
SELECT DISTINCT t1.col,
CASE WHEN t1.col IS NULL
THEN
(SELECT COUNT(*) FROM data t2 WHERE t1.year IS NULL AND t2.year IS NULL)
ELSE
(SELECT COUNT(t2.id) FROM data t2
WHERE t1.year = t2.year OR (t2.id <= t1.id AND t2.year IS NOT NULL))
END cnt
FROM data t1;
Method 1 :
select distinct year, f3.NB
from tmpxx f1
left outer join lateral
(
select count(*) NB from tmpxx f2
where f1.year is null and f2.year is null or
f1.year>=f2.year
) f3 on 1=1
Method 2:
select distinct year,
( select count(*) NB from tmpxx f2
where f1.year is null and f2.year is null or f1.year>=f2.year
) nb
from tmpxx f1

SQL Grouping by first digit from sets of record

I need your help in SQL
I have a set of records of Cost center ID below.
what I want to do is to segregate/group them by inserting column to distinguish the category.
as you can see all digits start in 7 is belong to the bold digits.
my expected out is on below image also.
You can as the below:
DECLARE #Tbl TABLE (ID INT)
INSERT INTO #Tbl
VALUES
(735121201),
(735120001),
(5442244),
(735141094),
(735141097),
(4008060),
(735117603),
(40100000),
(735142902),
(735151199),
(4010070)
;WITH TableWithRowId
AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY(SELECT NULL)) RowId,
ID
FROM
#Tbl
), TempTable
AS
(
SELECT T.RowId + 1 AS RowId FROM TableWithRowId T
WHERE
LEFT(T.ID, 1) != 7
), ResultTable
AS
(
SELECT
T.RowId ,
T.ID,
DENSE_RANK() OVER (ORDER BY (SELECT TOP 1 A.RowId FROM TempTable A WHERE A.RowId > T.RowId ORDER BY A.RowId)) AS Flag
FROM TableWithRowId T
)
SELECT * FROM ResultTable
Result:
RowId ID Flag
----------- ----------- ----------
1 735121201 1
2 735120001 1
3 5442244 1
4 735141094 2
5 735141097 2
6 4008060 2
7 735117603 3
8 40100000 3
9 735142902 4
10 735151199 4
11 4010070 4
The following query is similer with NEER's
;WITH test_table(CenterID)AS(
SELECT '735121201' UNION ALL
SELECT '735120001' UNION ALL
SELECT '5442244' UNION ALL
SELECT '735141094' UNION ALL
SELECT '735141097' UNION ALL
SELECT '4008060' UNION ALL
SELECT '735117603' UNION ALL
SELECT '40100000' UNION ALL
SELECT '735142902' UNION ALL
SELECT '735151199' UNION ALL
SELECT '4010070'
),t1 AS (
SELECT *,ROW_NUMBER()OVER(ORDER BY(SELECT 1)) AS rn,CASE WHEN LEFT(t.CenterID,1)='7' THEN 1 ELSE 0 END AS isSeven
FROM test_table AS t
),t2 AS(
SELECT t1.*,ROW_NUMBER()OVER(ORDER BY t1.rn) AS toFilter
FROM t1 LEFT JOIN t1 AS pt ON pt.rn=t1.rn-1
WHERE pt.CenterID IS NULL OR (t1.isSeven=1 AND pt.isSeven=0)
)
SELECT t1.CenterID,x.toFilter FROM t1
CROSS APPLY(SELECT TOP 1 t2.toFilter FROM t2 WHERE t2.rn<=t1.rn ORDER BY rn desc) x
CenterID toFilter
--------- --------------------
735121201 1
735120001 1
5442244 1
735141094 2
735141097 2
4008060 2
735117603 3
40100000 3
735142902 4
735151199 4
4010070 4

select distinct list of ids from table with earliest value in same table

I have the following table,
SDate Id Balance
2016-01-01 ABC 3
2016-01-01 DEF 7
2016-01-01 GHI 2
2016-02-01 ABC 6
2016-02-01 DEF 4
2016-02-01 GHI 8
2016-02-01 XYZ 12
I need to write a query that gives me a distinct list of Id's over a date range (so in this example SDate >= '2016-01-01' and SDate <= '2016-02-01') but also give me the earliest balance so the result from the table above I would like to see is,
Id Balance
ABC 3
DEF 7
GHI 2
XYZ 12
Is this possible?
UPDATE
Sorry I should have specified that for each date the Id is unique.
You can do this with a derived table that first works out the minimum SDate value for each Id value. Using this you then join back to your original table to find the Balance for the row that matches those values:
declare #t table(SDate date,Id nvarchar(3),Balance int);
insert into #t values ('2016-01-01','ABC',3),('2016-01-01','DEF',7),('2016-01-01','GHI',2),('2016-02-01','ABC',6),('2016-02-01','DEF',4),('2016-02-01','GHI',8),('2016-02-01','XYZ',12);
declare #StartDate date = '20160101';
declare #EndDate date = '20160201';
with d as
(
select Id
,min(SDate) as MinSDate
from #t
where SDate between #StartDate and #EndDate
group by id
)
select d.Id
,t.Balance
from d
inner join #t t
on(d.Id = t.Id
and d.MinSDate = t.SDate
);
Output:
Id | Balance
----+--------
ABC | 3
DEF | 7
GHI | 2
XYZ | 12
This should be possible with a window function - all you have to do is
partition by id
assign a row number, and
select the top row for each id
Example:
select id,
balance
from (
select id,
balance,
row_number() over( partition by id order by SDate ) as row_num
from table1
where SDate between '2016-01-01' and '2016-02-01'
) as a
where row_num = 1
Note: the advantage of this method is it is a lot more flexible. Say you wanted the 2 oldest records, you could just change to where row_num <= 2.
Analytic row_number() should be the fastest
select *
from (
select
t.*,
row_number() over (partition by Id order by SDate) rn
from your_table t
) t where rn = 1;
You can achieve this with a self join, which may not be the fastest or most elegant solution:
CREATE TABLE #SOPostSample
(
SDate DATE ,
Id NVARCHAR(5) ,
Balance INT
);
INSERT INTO #SOPostSample
( SDate, Id, Balance )
VALUES ( '2016-01-01', 'ABC', 3 ),
( '2016-01-01', 'DEF', 7 ),
( '2016-01-01', 'GHI', 2 ),
( '2016-02-01', 'ABC', 6 ),
( '2016-02-01', 'DEF', 4 ),
( '2016-02-01', 'GHI', 8 ),
( '2016-02-01', 'XYZ', 12 );
SELECT t1.Id ,
MIN(t2.Balance) Balance
FROM #SOPostSample t1
INNER JOIN #SOPostSample t2 ON t1.Id = t2.Id
GROUP BY t1.Id ,
t2.SDate
HAVING t2.SDate = MIN(t1.SDate);
DROP TABLE #SOPostSample;
Produces:
id Balance
============
ABC 3
DEF 7
GHI 2
XYZ 12
This works for the sample data, but please test with more data as I just wrote it quickly.
This should work, Top 1 just inserted for safety, should not be needed if SDate and Id are unique in combination
SELECT o.Id ,
( SELECT TOP 1
Balance
FROM tbl
WHERE Id = o.Id
AND SDate = MIN(o.SDate)
) Balance
FROM tbl o
GROUP BY Id
HAVING sDate BETWEEN '20160101' AND '20160201';
You can use sub-query
SELECT Id ,
( SELECT TOP 1
Balance
FROM [TableName] AS T1
WHERE T1.Id = [TableName].Id
ORDER BY SDate
) AS Balance
FROM [TableName]
GROUP BY Id;

Compare getdate() with two different fields

I have 2 tables:
T1 T2
id Effdate E_id DOB
-------------- ------------
1 20161212 2 1950-02-16 00:12:24
2 20130124 5 1978-01-16 10:14:30
I want to compare getdate() < Maxdate(effdate, DOB)?
I am getting datetime conversion error.
for example : getdate() < MAXDATE( 20161212 , 1950-02-16 00:12:24)
expected result should be from table T1:
id Effdate
--------------
1 20161212
If id in both tables are correspondent on id = E_id you can UNION them and GROUP BY id:
;WITH T1 AS (
SELECT 1 id,
CAST('20161212' as varchar(10)) Effdate
UNION ALL
SELECT 2,
'20130124'
), T2 AS (
SELECT 1 E_id,
CAST('1950-02-16 00:12:24' as datetime) DOB
UNION ALL
SELECT 2,
'1978-01-16 10:14:30'
)
SELECT id,
MAX(CAST(Effdate as datetime)) as MD
FROM (
SELECT *
FROM T1
UNION ALL
SELECT *
FROM T2
) t
GROUP BY id
HAVING MAX(CAST(Effdate as datetime)) >= GETDATE()
Will bring you expected result

Is it possible to write a sql query that is grouped based on a running total of a column?

It would be easier to explain with an example. Suppose I wanted to get at most 5 items per group.
My input would be a table looking like this:
Item Count
A 2
A 3
A 3
B 4
B 4
B 5
C 1
And my desired output would look like this:
Item Count
A 5
A>5 3
B 4
B>5 9
C 1
An alternative output that I could also work with would be
Item Count RunningTotal
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
I can use ROW_NUMBER() to get the top X records in each group, however my requirement is to get the top X items for each group, not X records. My mind is drawing a blank as to how to do this.
declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.Item, t1.Count, sum(t2.count) as RunningTotal from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
Result:
Item Count RunningTotal
---- ----------- ------------
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
Considering the clarifications from your comment, you should be able to produce the second kid of output from your post by running this query:
select t.Item
, t.Count
, (select sum(tt.count)
from mytable tt
where t.item=tt.item and (tt.creating_user_priority < t.creating_user_priority or
( tt.creating_user_priority = t.creating_user_priority and tt.created_date < t.createdDate))
) as RunningTotal
from mytable t
declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.row, t1.Item, t1.Count, sum(t2.count) as RunningTotal
into #RunTotal
from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
alter table #RunTotal
add GrandTotal int
update rt
set GrandTotal = gt.Total
from #RunTotal rt
left join (
select Item, sum(Count) Total
from #RunTotal rt
group by Item) gt
on rt.Item = gt.Item
select Item, max(RunningTotal)
from #RunTotal
where RunningTotal <= 5
group by Item
union
select a.Item + '>5', total - five
from (
select Item, max(GrandTotal) total
from #RunTotal
where GrandTotal > 5
group by Item
) a
left join (
select Item, max(RunningTotal) five
from #RunTotal
where RunningTotal <= 5
group by Item
) b
on a.Item = b.Item
I've updated the accepted answer and got your desired result.
SELECT Item, SUM(Count)
FROM mytable t
GROUP BY Item
HAVING SUM(Count) <=5
UNION
SELECT Item, 5
FROM mytable t
GROUP BY Item
HAVING SUM(Count) >5
UNION
SELECT t2.Item + '>5', Sum(t2.Count) - 5
FROM mytable t2
GOUP BY Item
HAVING SUM(Count) > 5
ORDER BY 1, 2
select 'A' as Name, 2 as Cnt
into #tmp
union all select 'A',3
union all select 'A',3
union all select 'B',4
union all select 'B',4
union all select 'B',5
union all select 'C',1
select Name, case when sum(cnt) > 5 then 5 else sum(cnt) end Cnt
from #tmp
group by Name
union
select Name+'>5', sum(cnt)-5 Cnt
from #tmp
group by Name
having sum(cnt) > 5
Here is what I have so far. I know it's not complete but... this should be a good starting point.
I can get your second output by using a temp table and an update pass:
DECLARE #Data TABLE
(
ID INT IDENTITY(1,1) PRIMARY KEY
,Value VARCHAR(5)
,Number INT
,Total INT
)
INSERT INTO #Data (Value, Number) VALUES ('A',2)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',5)
INSERT INTO #Data (Value, Number) VALUES ('C',1)
DECLARE
#Value VARCHAR(5)
,#Count INT
UPDATE #Data
SET
#Count = Total = CASE WHEN Value = #Value THEN Number + #Count ELSE Number END
,#Value = Value
FROM #Data AS D
SELECT
Value
,Number
,Total
FROM #Data
There may be better ways, but this should work.