Getting the minimum of column on the basis of other field - sql

I have a scenario wherein I have
Id|rank| date
1 | 7 |07/08/2015
1 | 7 |09/08/2015
1 | 8 |16/08/2015
1 | 8 |17/08/2015
1 | 7 |19/08/2015
1 | 7 |15/08/2015
2 | 7 |01/08/2015
2 | 7 |02/08/2015
2 | 8 |16/08/2015
2 | 8 |17/08/2015
2 | 7 |26/08/2015
2 | 7 |28/08/2015
My desired solution is
1 | 7 |07/08/2015
1 | 8 |16/08/2015
1 | 7 |15/08/2015
2 | 7 |01/08/2015
2 | 8 |16/08/2015
2 | 7 |26/08/2015
i.e for each block of id and rank I want the minimum of date.
I have tried using while loop as there are thousands of records it is taking 2 hours to load.Is there any other way to do please suggest.

For each row give unique row number using necessary order. (As I get Id is more important than date and date is more important than rank).
Join resulting table to itself using row numbers shifted by one row (d1.RowNum = d2.RowNum+1).
Select only rows that are joined to "other block" rows (d1.Id <> d2.Id or d1.Rank <> d2.rank).
Depending on shifting direction and selected table either maximal or minimal date will be selected.
Don't forget "edge case" - row that due to shifting can't be joined (that's why not inner join and d1.RowNum = 1 condition used).
;WITH dataWithRowNums as (
select Id, Rank, Date,
RowNum = ROW_NUMBER() OVER (ORDER BY Id,date,rank)
from YourTable
)
select d1.Id, d1.Rank, d1.Date
from dataWithRowNums d1
left join dataWithRowNums d2
on d1.RowNum = d2.RowNum+1 and (d1.Id <> d2.Id or d1.Rank <> d2.rank)
where not d2.Id is null or d1.RowNum = 1
This code returns result bit different from yours:
Id Rank Date
1 7 2015-08-07
1 8 2015-08-16
1 7 2015-08-19 <-- you've got here 2015-08-15
2 7 2015-08-01
2 8 2015-08-16
2 7 2015-08-26
As block (Rank 8 Id 1) have started at 16/08 so row 15/08 for rank 7 is related to first block (rank7 Id1).
If you still need your sorting (so 15/08 rank 7 is related to second block (rank7 id1)) then you should provide your own RowSorting data and then ask here about another solution for another task )

Here is the query using row_number()
;WITH cte_rec
as (SELECT Id,Rank,Date
,ROW_NUMBER()OVER (partition by Id,Rank ORDER BY date) as RNO
FROM YourTable)
SELECT Id,Rank,Date
FROM cte_rec
WHERE RNO =1

This is what I have tried and is running as expected
create table #temp
(
iden int identity(1,1),
ID int,
[rank] int,
[date] date,
dr_id int,
rownum_id int,
grouprecord int
)
Insert into #temp(id,rank,date)
select 1 , 7 ,'07/08/2015'
union all select 1 , 7 ,'09/08/2015'
union all select 1 , 8 ,'08/16/2015'
union all select 1 , 8 ,'08/17/2015'
union all select 1 , 7 ,'08/19/2015'
union all select 1 , 7 ,'08/15/2015'
union all select 2 , 7 ,'08/01/2015'
union all select 2 , 7 ,'08/02/2015'
union all select 2 , 8 ,'08/16/2015'
union all select 2 , 8 ,'08/17/2015'
union all select 2 , 7 ,'08/26/2015'
union all select 2 , 7 ,'08/28/2015'
update t1
set dr_id = t2.rn
from #temp t1 inner join
(select iden, dense_rank() over(order by id) as rn from #temp) t2
on t1.iden = t2.iden
update t1
set rownum_id = t2.rn
from #temp t1 inner join
(select iden, row_number() over(partition by dr_id order by id) as rn from #temp) t2
on t1.iden = t2.iden
select *,row_number() over(order by iden)rn into #temp1 from
(
select t2.*
from #temp t1 inner join #temp t2
on (t1.dr_id = t2.dr_id or t2.dr_id = (t1.dr_id +1) ) and ( t1.rank<>t2.rank or t2.dr_id = (t1.dr_id +1) )
and t2.iden = t1.iden + 1
)a
declare #id int,#miniden int,#maxiden int,#maxid int
set #id = 1
select #maxid = max(iden) from #temp
while exists(select 1 from #temp1 where rn = #id)
begin
Select #miniden = iden from #temp1
where rn = #id
Select #maxiden = iden from #temp1
where rn = #id+1
update #temp
set grouprecord = #id +1
where iden between #miniden and #maxiden
IF(#maxiden IS NULL)
BEGIN
Update #temp
set grouprecord = #id +1
where iden between #miniden and #maxid
END
set #id = #id + 1
SET #miniden =NULL
SET #maxiden = NULL
end
UPDATE #TEMP
SET GROUPRECORD = 1
WHERE GROUPRECORD IS NULL
select min(date) as mindate,grouprecord from #temp
group by grouprecord
Thanks everyone the help :)

Related

Rolling Average in SQL with Partition [duplicate]

declare #t table
(
id int,
SomeNumt int
)
insert into #t
select 1,10
union
select 2,12
union
select 3,3
union
select 4,15
union
select 5,23
select * from #t
the above select returns me the following.
id SomeNumt
1 10
2 12
3 3
4 15
5 23
How do I get the following:
id srome CumSrome
1 10 10
2 12 22
3 3 25
4 15 40
5 23 63
select t1.id, t1.SomeNumt, SUM(t2.SomeNumt) as sum
from #t t1
inner join #t t2 on t1.id >= t2.id
group by t1.id, t1.SomeNumt
order by t1.id
SQL Fiddle example
Output
| ID | SOMENUMT | SUM |
-----------------------
| 1 | 10 | 10 |
| 2 | 12 | 22 |
| 3 | 3 | 25 |
| 4 | 15 | 40 |
| 5 | 23 | 63 |
Edit: this is a generalized solution that will work across most db platforms. When there is a better solution available for your specific platform (e.g., gareth's), use it!
The latest version of SQL Server (2012) permits the following.
SELECT
RowID,
Col1,
SUM(Col1) OVER(ORDER BY RowId ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Col2
FROM tablehh
ORDER BY RowId
or
SELECT
GroupID,
RowID,
Col1,
SUM(Col1) OVER(PARTITION BY GroupID ORDER BY RowId ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Col2
FROM tablehh
ORDER BY RowId
This is even faster. Partitioned version completes in 34 seconds over 5 million rows for me.
Thanks to Peso, who commented on the SQL Team thread referred to in another answer.
For SQL Server 2012 onwards it could be easy:
SELECT id, SomeNumt, sum(SomeNumt) OVER (ORDER BY id) as CumSrome FROM #t
because ORDER BY clause for SUM by default means RANGE UNBOUNDED PRECEDING AND CURRENT ROW for window frame ("General Remarks" at https://msdn.microsoft.com/en-us/library/ms189461.aspx)
Let's first create a table with dummy data:
Create Table CUMULATIVESUM (id tinyint , SomeValue tinyint)
Now let's insert some data into the table;
Insert Into CUMULATIVESUM
Select 1, 10 union
Select 2, 2 union
Select 3, 6 union
Select 4, 10
Here I am joining same table (self joining)
Select c1.ID, c1.SomeValue, c2.SomeValue
From CumulativeSum c1, CumulativeSum c2
Where c1.id >= c2.ID
Order By c1.id Asc
Result:
ID SomeValue SomeValue
-------------------------
1 10 10
2 2 10
2 2 2
3 6 10
3 6 2
3 6 6
4 10 10
4 10 2
4 10 6
4 10 10
Here we go now just sum the Somevalue of t2 and we`ll get the answer:
Select c1.ID, c1.SomeValue, Sum(c2.SomeValue) CumulativeSumValue
From CumulativeSum c1, CumulativeSum c2
Where c1.id >= c2.ID
Group By c1.ID, c1.SomeValue
Order By c1.id Asc
For SQL Server 2012 and above (much better performance):
Select
c1.ID, c1.SomeValue,
Sum (SomeValue) Over (Order By c1.ID )
From CumulativeSum c1
Order By c1.id Asc
Desired result:
ID SomeValue CumlativeSumValue
---------------------------------
1 10 10
2 2 12
3 6 18
4 10 28
Drop Table CumulativeSum
A CTE version, just for fun:
;
WITH abcd
AS ( SELECT id
,SomeNumt
,SomeNumt AS MySum
FROM #t
WHERE id = 1
UNION ALL
SELECT t.id
,t.SomeNumt
,t.SomeNumt + a.MySum AS MySum
FROM #t AS t
JOIN abcd AS a ON a.id = t.id - 1
)
SELECT * FROM abcd
OPTION ( MAXRECURSION 1000 ) -- limit recursion here, or 0 for no limit.
Returns:
id SomeNumt MySum
----------- ----------- -----------
1 10 10
2 12 22
3 3 25
4 15 40
5 23 63
Late answer but showing one more possibility...
Cumulative Sum generation can be more optimized with the CROSS APPLY logic.
Works better than the INNER JOIN & OVER Clause when analyzed the actual query plan ...
/* Create table & populate data */
IF OBJECT_ID('tempdb..#TMP') IS NOT NULL
DROP TABLE #TMP
SELECT * INTO #TMP
FROM (
SELECT 1 AS id
UNION
SELECT 2 AS id
UNION
SELECT 3 AS id
UNION
SELECT 4 AS id
UNION
SELECT 5 AS id
) Tab
/* Using CROSS APPLY
Query cost relative to the batch 17%
*/
SELECT T1.id,
T2.CumSum
FROM #TMP T1
CROSS APPLY (
SELECT SUM(T2.id) AS CumSum
FROM #TMP T2
WHERE T1.id >= T2.id
) T2
/* Using INNER JOIN
Query cost relative to the batch 46%
*/
SELECT T1.id,
SUM(T2.id) CumSum
FROM #TMP T1
INNER JOIN #TMP T2
ON T1.id > = T2.id
GROUP BY T1.id
/* Using OVER clause
Query cost relative to the batch 37%
*/
SELECT T1.id,
SUM(T1.id) OVER( PARTITION BY id)
FROM #TMP T1
Output:-
id CumSum
------- -------
1 1
2 3
3 6
4 10
5 15
Select
*,
(Select Sum(SOMENUMT)
From #t S
Where S.id <= M.id)
From #t M
You can use this simple query for progressive calculation :
select
id
,SomeNumt
,sum(SomeNumt) over(order by id ROWS between UNBOUNDED PRECEDING and CURRENT ROW) as CumSrome
from #t
There is a much faster CTE implementation available in this excellent post:
http://weblogs.sqlteam.com/mladenp/archive/2009/07/28/SQL-Server-2005-Fast-Running-Totals.aspx
The problem in this thread can be expressed like this:
DECLARE #RT INT
SELECT #RT = 0
;
WITH abcd
AS ( SELECT TOP 100 percent
id
,SomeNumt
,MySum
order by id
)
update abcd
set #RT = MySum = #RT + SomeNumt
output inserted.*
For Ex: IF you have a table with two columns one is ID and second is number and wants to find out the cumulative sum.
SELECT ID,Number,SUM(Number)OVER(ORDER BY ID) FROM T
Once the table is created -
select
A.id, A.SomeNumt, SUM(B.SomeNumt) as sum
from #t A, #t B where A.id >= B.id
group by A.id, A.SomeNumt
order by A.id
The SQL solution wich combines "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" and "SUM" did exactly what i wanted to achieve.
Thank you so much!
If it can help anyone, here was my case. I wanted to cumulate +1 in a column whenever a maker is found as "Some Maker" (example). If not, no increment but show previous increment result.
So this piece of SQL:
SUM( CASE [rmaker] WHEN 'Some Maker' THEN 1 ELSE 0 END)
OVER
(PARTITION BY UserID ORDER BY UserID,[rrank] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Cumul_CNT
Allowed me to get something like this:
User 1 Rank1 MakerA 0
User 1 Rank2 MakerB 0
User 1 Rank3 Some Maker 1
User 1 Rank4 Some Maker 2
User 1 Rank5 MakerC 2
User 1 Rank6 Some Maker 3
User 2 Rank1 MakerA 0
User 2 Rank2 SomeMaker 1
Explanation of above: It starts the count of "some maker" with 0, Some Maker is found and we do +1. For User 1, MakerC is found so we dont do +1 but instead vertical count of Some Maker is stuck to 2 until next row.
Partitioning is by User so when we change user, cumulative count is back to zero.
I am at work, I dont want any merit on this answer, just say thank you and show my example in case someone is in the same situation. I was trying to combine SUM and PARTITION but the amazing syntax "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" completed the task.
Thanks!
Groaker
Above (Pre-SQL12) we see examples like this:-
SELECT
T1.id, SUM(T2.id) AS CumSum
FROM
#TMP T1
JOIN #TMP T2 ON T2.id < = T1.id
GROUP BY
T1.id
More efficient...
SELECT
T1.id, SUM(T2.id) + T1.id AS CumSum
FROM
#TMP T1
JOIN #TMP T2 ON T2.id < T1.id
GROUP BY
T1.id
Try this
select
t.id,
t.SomeNumt,
sum(t.SomeNumt) Over (Order by t.id asc Rows Between Unbounded Preceding and Current Row) as cum
from
#t t
group by
t.id,
t.SomeNumt
order by
t.id asc;
Try this:
CREATE TABLE #t(
[name] varchar NULL,
[val] [int] NULL,
[ID] [int] NULL
) ON [PRIMARY]
insert into #t (id,name,val) values
(1,'A',10), (2,'B',20), (3,'C',30)
select t1.id, t1.val, SUM(t2.val) as cumSum
from #t t1 inner join #t t2 on t1.id >= t2.id
group by t1.id, t1.val order by t1.id
Without using any type of JOIN cumulative salary for a person fetch by using follow query:
SELECT * , (
SELECT SUM( salary )
FROM `abc` AS table1
WHERE table1.ID <= `abc`.ID
AND table1.name = `abc`.Name
) AS cum
FROM `abc`
ORDER BY Name

Select non existing Numbers from Table each ID

I‘m new in learning TSQL and I‘m struggling getting the numbers that doesn‘t exist in my table each ID.
Example:
CustomerID Group
1 1
3 1
6 1
4 2
7 2
I wanna get the ID which does not exist and select them like this
CustomerID Group
2 1
4 1
5 1
5 2
6 2
....
..
The solution by usin a cte doesn‘t work well or inserting first the data and do a not exist where clause.
Any Ideas?
If you can live with ranges rather than a list with each one, then an efficient method uses lead():
select group_id, (customer_id + 1) as first_missing_customer_id,
(next_ci - 1) as last_missing_customer_id
from (select t.*,
lead(customer_id) over (partition by group_id order by customer_id) as next_ci
from t
) t
where next_ci <> customer_id + 1
Cross join 2 recursive CTEs to get all the possible combinations of [CustomerID] and [Group] and then LEFT join to the table:
declare #c int = (select max([CustomerID]) from tablename);
declare #g int = (select max([Group]) from tablename);
with
customers as (
select 1 as cust
union all
select cust + 1
from customers where cust < #c
),
groups as (
select 1 as gr
union all
select gr + 1
from groups where gr < #g
),
cte as (
select *
from customers cross join groups
)
select c.cust as [CustomerID], c.gr as [Group]
from cte c left join tablename t
on t.[CustomerID] = c.cust and t.[Group] = c.gr
where t.[CustomerID] is null
and c.cust > (select min([CustomerID]) from tablename where [Group] = c.gr)
and c.cust < (select max([CustomerID]) from tablename where [Group] = c.gr)
See the demo.
Results:
> CustomerID | Group
> ---------: | ----:
> 2 | 1
> 4 | 1
> 5 | 1
> 5 | 2
> 6 | 2

Update sort Column linked with group

How can i change the position of one row to change the order
Best to explain with example
I have following table with statuses
Id Name StatusOrder StatusGroup
1 Open 1 1
2 Start 2 1
3 Load 3 1
4 Close 4 1
5 Begin 1 2
6 Open 2 2
7 Close 3 2
I would like to Switch from group one only Status order 2 with 3.
The jump can be more than one row, ex. its also possible that within the same group the order from open moves to status order 3
Sow when i do following select
SELECT * FROM Status WHERE (StatusGroup =1)
Result Set:
Id Name StatusOrder StatusGroup
1 Open 1 1
3 Load 2 1
2 Start 3 1
4 Close 4 1
5 Begin 1 2
6 Open 2 2
7 Close 3 2
I already found example with following article but i do not succeed in it to intgrate that only for one group the order changes
Using a sort order column in a database table
How Can help me?
If correctly understood, here you go:
QUERY
create table #t
(
Id INT,
Name VARCHAR(20),
StatusOrder INT,
StatusGroup INT
)
insert into #t values
(1 ,'Open', 1 , 1),
(2 ,'Start', 2 , 1),
(3 ,'Load', 3 , 1),
(4 ,'Close', 4 , 1),
(5 ,'Begin', 1 , 2),
(6 ,'Open', 2 , 2),
(7 ,'Close', 3 , 2)
;with cte as (
select *, row_number() over(partition by StatusGroup order by Id) rn
from #t
)
select case when StatusOrder = 2 then 3 when StatusOrder = 3 then 2 else Id end as Id,
case when StatusOrder = 2 then 'Load' when StatusOrder = 3 then 'Start' else Name end as Name,
StatusOrder,
StatusGroup
from cte
where rn = id
union all
select Id, Name, StatusOrder, StatusGroup
from cte
where rn <> id
drop table #t
OUTPUT
Id Name StatusOrder StatusGroup
1 Open 1 1
3 Load 2 1
2 Start 3 1
4 Close 4 1
5 Begin 1 2
6 Open 2 2
7 Close 3 2
UPDATE
So if you have table where you need update records you can do something like:
;with cte as (
select *, row_number() over(partition by StatusGroup order by Id) rn
from #t
)
update t
set t.Id = (case when cte.StatusOrder = 2 then 3
when cte.StatusOrder = 3 then 2 else t.Id end),
t.Name = (case when cte.StatusOrder = 2 then 'Load'
when cte.StatusOrder = 3 then 'Start' else t.Name end)
from cte
join #t t on cte.id = t.id
where cte.rn = cte.id

SQL to get next not null value in column

How can I get next not null value in column? I have MSSQL 2012 and table with only one column. Like this:
rownum Orig
------ ----
1 NULL
2 NULL
3 9
4 NULL
5 7
6 4
7 NULL
8 9
and I need this data:
Rownum Orig New
------ ---- ----
1 NULL 9
2 NULL 9
3 9 9
4 NULL 7
5 7 7
6 4 4
7 NULL 5
8 9 5
Code to start:
declare #t table (rownum int, orig int);
insert into #t values (1,NULL),(2,NULL),(3,9),(4,NULL),(5,7),(6,4),(7,NULL),(8,9);
select rownum, orig from #t;
One method is to use outer apply:
select t.*, t2.orig as newval
from #t t outer apply
(select top 1 t2.*
from #t t2
where t2.id >= t.id and t2.orig is not null
order by t2.id
) t2;
One way you can do this with window functions (in SQL Server 2012+) is to use a cumulative max on id, in inverse order:
select t.*, max(orig) over (partition by nextid) as newval
from (select t.*,
min(case when orig is not null then id end) over (order by id desc) as nextid
from #t
) t;
The subquery gets the value of the next non-NULL id. The outer query then spreads the orig value over all the rows with the same id (remember, in a group of rows with the same nextid, only one will have a non-NULL value for orig).

SQL: Update a field looping on unique values of another field

I've a table with two fileds. I want to update ID_ELEM with a counter for each unique value of ID_TASS.
This should be the correct output table:
ID_TASS, ID_ELEM
1 , POL_1
1 , POL_2
1 , POL_3
2 , POL_1
2 , POL_2
3 , POL_1
4 , POL_1
4 , POL_2
4 , POL_3
4 , POL_4
Please, any help is welcome! Thanks
I'm at all sure if it will work:
UPDATE tbl
SET ID_ELEM = rn
FROM
( SELECT ctid AS id
, ROW_NUMBER() OVER (PARTITION BY ID_TASS) AS rn
FROM tbl
) AS tmp
WHERE tbl.ctid = tmp.ctid
or maybe this:
UPDATE tbl
SET ID_ELEM = ROW_NUMBER() OVER (PARTITION BY ID_TASS)
very ugly but works on mysql
SET #rank:=0;
update t1, (select id_tass, count(*) as c
from t1
group by id_tass) T
set
t1.t1.id_elem = case
when #rank >= T.c then
#rank:=0
else #rank
end,
t1.id_elem = #rank:=#rank+1
where t1.id_tass = T.id_tass
If you want;
ID_TASS ID_ELEM
1 3
1 3
2 2
2 2
3 1
4 4
4 4
4 4
4 4
1 3
You can (t-sql);
update tbl
set ID_ELEM = (select COUNT(ID_TASS) from tbl where tbl.ID_TASS = T.ID_TASS)
from tbl T