SQL Server: issue with adapting Rank statement in Select - sql

I have a dynamic stored procedure that start as follows with the declaration of a temp table and then an insert statement.
Can someone here tell me how I need to adapt the following line so that it creates a rank based on the groupCount (desc) instead of by Count?
When I just say groupCount instead of Count then it returns:
Invalid column name 'groupCount'
The line in question:
RANK() OVER(ORDER BY COUNT(*) desc, <sel>) [Rank],
My procedure (first part):
SET #sql = N' DECLARE #temp AS TABLE
(
ranking int,
item nvarchar(100),
totalCount int,
matchCount int,
groupCount int,
groupName nvarchar(100)
)
INSERT INTO #temp
(
ranking,
item,
totalCount,
matchCount,
groupCount,
groupName
)
SELECT RANK() OVER(ORDER BY COUNT(*) desc, <sel>) [Rank],
<sel>,
COUNT(*) AS totalCount,
SUM(CASE WHEN suggestedAction = recommendation THEN 1 ELSE 0 END) AS matchCount,
ROUND(100 * AVG(CASE WHEN suggestedAction = recommendation THEN 1.0 ELSE 0.0 END), 0) AS groupCount,
''currentMonth'' AS groupName
FROM LogEsc
WHERE dateEsc LIKE ''' + #date0 + '%''
AND EID LIKE ''PE%''
GROUP BY <sel>
ORDER BY groupCount desc, <sel>
-- ...
Many thanks in advance for any help with this, Tim.

You can't use the alias.
use
ORDER BY ROUND(100 * AVG(CASE WHEN suggestedAction = recommendation THEN 1.0 ELSE 0.0 END), 0)

Related

Is there a way to separate query results in SQL Server Management Studio (SSMS)?

I got a simple query which return a results from an OrderLine table. Is there a way to visually separate the query results to make it easier to read, like in the image shown here?
SELECT [OrderNo], [LineNo]
FROM [OrderLine]
Results:
drop table if exists #OrderLine;
select object_id as OrderNo, abs(checksum(newid())) as [LineNo]
into #OrderLine
from sys.columns;
-- ... results to text (ctrl+T)?
select OrderNo, [LineNo],
case when lead(OrderNo, 1) over(partition by OrderNo order by OrderNo) = OrderNo then '' else replicate('-', 11) + char(10) end
from #OrderLine;
--inject NULL
select case when [LineNo] is null and flag=2 then null else TheOrderNo end as OrderNo, [LineNo]
from
(
select OrderNo AS TheOrderNo, [LineNo], 1 as flag
from #OrderLine
union all
select distinct OrderNo, NULL, 2
from #OrderLine
) as src
order by TheOrderNo, flag;
You could execute multiple queries like so:
DECLARE #i int = 1
DECLARE #OrderNo
DECLARE #OrderNos TABLE (
Idx smallint Primary Key IDENTITY(1,1)
, OrderNo int
)
INSERT #OrderNos
SELECT distinct [OrderNo] FROM [OrderLine]
WHILE (#i <= (SELECT MAX(idx) FROM #employee_table))
BEGIN
SET #OrderNo = (SELECT [OrderNo] FROM [OrderNos] WHERE [Idx] = #i)
SELECT [OrderNo], [LineNo]
FROM [OrderLine]
WHERE [OrderNo] = #OrderNo
SET #i = #i + 1
END
You can not directly do that. Perhaps add a new column that is either null or has a value in it so that it is alternating.
Something like this:
select ..., case when rank() over (order by OrderNO) % 2 then 'XXX' else null end
from....
The % 2 is a 'modulo' operator...

Selecting data from table where sum of values in a column equal to the value in another column

Sample data:
create table #temp (id int, qty int, checkvalue int)
insert into #temp values (1,1,3)
insert into #temp values (2,2,3)
insert into #temp values (3,1,3)
insert into #temp values (4,1,3)
According to data above, I would like to show exact number of lines from top to bottom where sum(qty) = checkvalue. Note that checkvalue is same for all the records all the time. Regarding the sample data above, the desired output is:
Id Qty checkValue
1 1 3
2 2 3
Because 1+2=3 and no more data is needed to show. If checkvalue was 4, we would show the third record: Id:3 Qty:1 checkValue:4 as well.
This is the code I am handling this problem. The code is working very well.
declare #checkValue int = (select top 1 checkvalue from #temp);
declare #counter int = 0, #sumValue int = 0;
while #sumValue < #checkValue
begin
set #counter = #counter + 1;
set #sumValue = #sumValue + (
select t.qty from
(
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (ORDER BY id ASC) AS rownumber,
id,qty,checkvalue
FROM #temp
) AS foo
WHERE rownumber = #counter
) t
)
end
declare #sql nvarchar(255) = 'select top '+cast(#counter as varchar(5))+' * from #temp'
EXECUTE sp_executesql #sql, N'#counter int', #counter = #counter;
However, I am not sure if this is the best way to deal with it and wonder if there is a better approach. There are many professionals here and I'd like to hear from them about what they think about my approach and how we can improve it. Any advice would be appreciated!
Try this:
select id, qty, checkvalue from (
select t1.*,
sum(t1.qty) over (partition by t2.id) [sum]
from #temp [t1] join #temp [t2] on t1.id <= t2.id
) a where checkvalue = [sum]
Smart self-join is all you need :)
For SQL Server 2012, and onwards, you can easily achieve this using ROWS BETWEEN in your OVER clause and the use of a CTE:
WITH Running AS(
SELECT *,
SUM(qty) OVER (ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS RunningQty
FROM #temp t)
SELECT id, qty, checkvalue
FROM Running
WHERE RunningQty <= checkvalue;
One basic improvement is to try & reduce the no. of iterations. You're incrementing by 1, but if you repurpose the logic behind binary searching, you'd get something close to this:
DECLARE #RoughAverage int = 1 -- Some arbitrary value. The closer it is to the real average, the faster things should be.
DECLARE #CheckValue int = (SELECT TOP 1 checkvalue FROM #temp)
DECLARE #Sum int = 0
WHILE 1 = 1 -- Refer to BREAK below.
BEGIN
SELECT TOP (#RoughAverage) #Sum = SUM(qty) OVER(ORDER BY id)
FROM #temp
ORDER BY id
IF #Sum = #CheckValue
BREAK -- Indicating you reached your objective.
ELSE
SET #RoughAverage = #CheckValue - #Sum -- Most likely incomplete like this.
END
For SQL 2008 you can use recursive cte. Top 1 with ties limits result with first combination. Remove it to see all combinations
with cte as (
select
*, rn = row_number() over (order by id)
from
#temp
)
, rcte as (
select
i = id, id, qty, sumV = qty, checkvalue, rn
from
cte
union all
select
a.id, b.id, b.qty, a.sumV + b.qty, a.checkvalue, b.rn
from
rcte a
join cte b on a.rn + 1 = b.rn
where
a.sumV < b.checkvalue
)
select
top 1 with ties id, qty, checkvalue
from (
select
*, needed = max(case when sumV = checkvalue then 1 else 0 end) over (partition by i)
from
rcte
) t
where
needed = 1
order by dense_rank() over (order by i)

Select empty if previous cell value and current cell value is same in SQL Server

I have a situation to remove the repeating values from table based on the user name in a SQL Server stored procedure. My table look like shown in this screenshot:
I am expecting the output to be like:
Any help will be appreciated.
Use LAG Function which support for SQL 2012+ as below, I have use ID as primary key extra in your table
DECLARE #tblTest AS Table
(
ID INT,
[Date] DateTime,
[User] VARCHAR(50),
SourceIp VARCHAR(50),
Destination VARCHAR(50),
Port INT,
Duration VARCHAR(50)
)
INSERT INTO #tblTest VALUES(1,'12/27/2017 17:22','Jackson','192.168.1.1','192.168.2.1',1500,'0:38:30')
INSERT INTO #tblTest VALUES(2,'12/27/2017 23:19','Jackson','192.168.1.20','192.168.2.1',1500,'0:38:30')
INSERT INTO #tblTest VALUES(3,'12/27/2017 16:39','Manu','192.168.1.10','192.168.2.2',256,'1:00:36')
INSERT INTO #tblTest VALUES(4,'12/27/2017 21:22','Soma','192.168.5.20','192.168.2.3',888,'1:01:22')
INSERT INTO #tblTest VALUES(5,'12/27/2017 21:22','Soma','192.168.5.20','192.168.2.4',4120,'1:01:22')
INSERT INTO #tblTest VALUES(6,'12/27/2017 21:22','Soma','192.168.5.20','192.168.2.5',22,'1:01:22')
INSERT INTO #tblTest VALUES(7,'12/27/2017 8:58','Vick','192.168.1.27','192.168.2.50',22,'0:05:51')
INSERT INTO #tblTest VALUES(8,'12/27/2017 15:48','Vick','192.168.1.27','192.168.2.50',22,'0:05:51')
SELECT
CASE WHEN [Date] = lag([Date]) OVER (ORDER BY ID) THEN NULL ELSE [Date] END [Date] ,
CASE WHEN [User] = lag([User]) OVER (ORDER BY ID) THEN '' ELSE [User] END [User],
CASE WHEN SourceIp = lag(SourceIp) OVER (ORDER BY ID) THEN '' ELSE SourceIp END SourceIp,
CASE WHEN Destination = lag(Destination) OVER (ORDER BY ID) THEN '' ELSE Destination END Destination,
CASE WHEN Port = lag(Port) OVER (ORDER BY ID) THEN NULL ELSE Port END Port,
CASE WHEN Duration = lag(Duration) OVER (ORDER BY ID) THEN '' ELSE Duration END Duration
FROM #tblTest
Output:
You would typically handle this in your presentation layer, but you may try the following query:
SELECT
Date,
CASE WHEN ROW_NUMBER() OVER (PARTITION BY User ORDER BY Date) = 1
THEN User ELSE '' END AS User,
CASE WHEN ROW_NUMBER() OVER (PARTITION BY User ORDER BY Date) = 1
THEN [Source IP] ELSE '' END AS [Source IP],
CASE WHEN ROW_NUMBER() OVER (PARTITION BY User ORDER BY Date) = 1
THEN Destination ELSE '' END AS Destination,
CASE WHEN ROW_NUMBER() OVER (PARTITION BY User ORDER BY Date) = 1
THEN Port ELSE '' END AS Port,
CASE WHEN ROW_NUMBER() OVER (PARTITION BY User ORDER BY Date) = 1
THEN Duration ELSE '' END AS Duration
FROM yourTable;
Note: It is not clear from your sample output whether you expect the Source IP for all records or not. If not, then just modify my query.
This might be a viable option if your version of SQL Server does not support LAG.
DECLARE #TAB TABLE(ID INT,NAME VARCHAR(20),DTE DATETIME)
INSERT INTO #TAB
SELECT 1,'RAJIV',GETDATE()
UNION ALL
SELECT 2,'RAJIV',GETDATE()
UNION ALL
SELECT 3,'RAVI',GETDATE()
UNION ALL
SELECT 4,'RAVI',GETDATE()
UNION ALL
SELECT 5,'AJAY',GETDATE()
Select statement:
SELECT
A.ID
,CASE WHEN A.R = 1 THEN A.NAME ELSE '' END AS NAME
,A.DTE
FROM (
SELECT ROW_NUMBER() OVER(PARTITION BY NAME ORDER BY ID) AS R,*
FROM #TAB) A
ORDER BY A.ID
Output:
ID NAME DTE
1 RAJIV 2018-02-06 11:04:13.257
2 2018-02-06 11:04:13.257
3 RAVI 2018-02-06 11:04:13.257
4 2018-02-06 11:04:13.257
5 AJAY 2018-02-06 11:04:13.257

Getting Status Count for an item with many statuses

Lets say I have the following table variable:
DECLARE #DevicesAndStatuses TABLE (Id BIGINT,[Status] INT);
DECLARE #myId BIGINT;
SET #myId = 1;
Inside above table I can have thousands of Ids(Id can be repeated) and statuses ranging between 1-50. What is the most efficient way of getting the count of all the statuses for a particular Id?
The traditional method which I have is as follows:
SELECT
(SELECT COUNT(*) FROM #DevicesAndStatuses WHERE Id = #myId AND [Status] = 1) AS Status1,
(SELECT COUNT(*) FROM #DevicesAndStatuses WHERE Id = #myId AND [Status] = 2) AS Status2,
(SELECT COUNT(*) FROM #DevicesAndStatuses WHERE Id = #myId AND [Status] = 3) AS Status3,
(SELECT COUNT(*) FROM #DevicesAndStatuses WHERE Id = #myId AND [Status] = 4) AS Status4,
...
(SELECT COUNT(*) FROM #DevicesAndStatuses WHERE Id = #myId AND [Status] = 50) AS Status50,
FROM #DevicesAndStatuses WHERE Id = #myId
Are there potentially any better solution for getting the count of all the statuses [1-50] for a particular id?
Final result should be a single row containing 50 columns showing the count() of every status as Status1,Status2,...,Status50.*
My first suggestion is to use a group by:
SELECT status, count(*)
FROM #DevicesAndStatuses
WHERE Id = #myId
GROUP BY status;
The simplest way to get the information you want, but in multiple rows.
If you want multiple columns, then use conditional aggregation:
SELECT SUM(CASE WHEN [Status] = 1 THEN 1 ELSE 0 END) AS Status1,
SUM(CASE WHEN [Status] = 2 THEN 1 ELSE 0 END) AS Status2,
SUM(CASE WHEN [Status] = 3 THEN 1 ELSE 0 END) AS Status3,
SUM(CASE WHEN [Status] = 4 THEN 1 ELSE 0 END) AS Status4,
. . .
FROM #DevicesAndStatuses
WHERE Id = #myId
Sure:
SELECT Status, COUNT(*)
FROM #DevicesAndStatuses
WHERE Id = #myId
GROUP BY Status
This returns all Status values for Id = #myId, and their count - in one simple statement
You'll need to use a Dynamic Pivot Query to achieve this:
I've done it using a generic example but poke me if you need a more specific version. You'll need to use a Temp Table instead of a Table Variable though.
The STUFF command is there to remove the , from the beginning of the strings.
CREATE TABLE #Items
(
Item INT IDENTITY(1,1),
[Status] INT
)
INSERT #Items
(Status)
VALUES
(1),(1),(1),(1),(1),(1),(1),(2),(2),(2),(2),(3),(3),(4),(4),(4),(4),(4),(4),(4),(4),(5);
DECLARE #StatusList NVARCHAR(MAX) = N'',
#SumSelector NVARCHAR(MAX) = N''
SELECT #StatusList = CONCAT(#StatusList, N',', QUOTENAME(s.Status)),
#SumSelector = CONCAT(#SumSelector, N',', N'SUM(', QUOTENAME(s.Status), N') AS Status_', s.Status)
FROM (SELECT DISTINCT [Status] FROM #Items) AS s
SELECT #StatusList = STUFF(#StatusList, 1, 1, N''),
#SumSelector = STUFF(#SumSelector, 1, 1, N'')
DECLARE #StatusPivotQuery NVARCHAR(MAX) = CONCAT(N'
SELECT ', #SumSelector, N'
FROM #Items AS s
PIVOT
(
COUNT(s.[Status])
FOR s.[Status] IN(', #StatusList, N')
) AS pvt ')
EXEC sys.sp_executesql #StatusPivotQuery
DROP TABLE #Items
Here you go
SELECT MAX(id) AS Id, status, COUNT(*)
FROM #DevicesAndStatuses
WHERE Id = #myId
GROUP BY status;
or
SELECT id AS Id, status, COUNT(*)
FROM #DevicesAndStatuses
WHERE Id = #myId
GROUP BY id,status;

Should I use APPLY in this context

Code included is a simplified version of our situation; the production table equivalent to #MyExample has 20 fields all of which need medians calculating therefore the second part of the script becomes very long - not a huge hard-ship but is there a more compact solution?
I've no experience with APPLY or custom FUNCTIONs but is this a situation where we should create a FUNCTION for the median and then use APPLY I'm guessing not as apply is applied to each row?
/*
DROP TABLE #MyExample
DROP TABLE #mediantable
*/
CREATE TABLE #MyExample
(
customer char(5),
amountPeriodA numeric(36,8),
amountPeriodB numeric(36,8),
amountPeriodC numeric(36,8)
)
INSERT INTO #MyExample
values
('a',10,20,30),
('b',5,10,15),
('c',500,100,150),
('d',5,1,1),
('e',5,1,15),
('f',5,10,150),
('g',5,100,1500)
SELECT
[Period] = 'amountPeriodA',
[Median] = AVG(x.amountPeriodA)
INTO #mediantable
FROM (
SELECT
r.customer,
r.amountPeriodA,
[RowASC] = ROW_NUMBER() OVER(ORDER BY r.amountPeriodA ASC, customer ASC),
[RowDESC] = ROW_NUMBER() OVER(ORDER BY r.amountPeriodA DESC, customer DESC)
FROM #MyExample r
) x
WHERE RowASC IN (RowDESC, ROWDESC-1, ROWDESC+1)
union
SELECT
[Period] = 'amountPeriodB',
[Median] = AVG(x.amountPeriodB)
FROM (
SELECT
r.customer,
r.amountPeriodB,
[RowASC] = ROW_NUMBER() OVER(ORDER BY r.amountPeriodB ASC, customer ASC),
[RowDESC] = ROW_NUMBER() OVER(ORDER BY r.amountPeriodB DESC, customer DESC)
FROM #MyExample r
) x
WHERE RowASC IN (RowDESC, ROWDESC-1, ROWDESC+1)
union
SELECT
[Period] = 'amountPeriodC',
[Median] = AVG(x.amountPeriodC)
FROM (
SELECT
r.customer,
r.amountPeriodC,
[RowASC] = ROW_NUMBER() OVER(ORDER BY r.amountPeriodC ASC, customer ASC),
[RowDESC] = ROW_NUMBER() OVER(ORDER BY r.amountPeriodC DESC, customer DESC)
FROM #MyExample r
) x
WHERE RowASC IN (RowDESC, ROWDESC-1, ROWDESC+1)
SELECT *
FROM #mediantable
Building on my previous reply I arrived on this which is a lot easier (and shorter) to expand for the number of columns and even runs a bit faster (probably a lot faster in case of 20+ columns!). However, it returns the results horizontally instead of vertically. This can be 'solved' again using UNPIVOT.
I've done the operation in 2 parts using an intermediate #result table; but you could easily do it in a single statement using a subquery or CTE.
DECLARE #rowcount int
DECLARE #first int
DECLARE #last int
DECLARE #divider numeric(36,8)
SELECT #rowcount = COUNT(*) FROM #MyExample
SELECT #first = (CASE WHEN #rowcount % 2 = 1 THEN (#rowcount + 1) / 2 ELSE (#rowcount / 2) END),
#last = (CASE WHEN #rowcount % 2 = 1 THEN (#rowcount + 1) / 2 ELSE (#rowcount / 2) + 1 END),
#divider = (CASE WHEN #rowcount % 2 = 1 THEN 1 ELSE 2 END)
SELECT amountPeriodA = SUM(amountPeriodA) / #divider,
amountPeriodB = SUM(amountPeriodB) / #divider,
amountPeriodC = SUM(amountPeriodC) / #divider
INTO #result
FROM
(
SELECT amountPeriodA = ((CASE WHEN ROW_NUMBER() OVER(ORDER BY amountPeriodA ASC, customer ASC) IN (#first, #last) THEN amountPeriodA ELSE 0.00 END)),
amountPeriodB = ((CASE WHEN ROW_NUMBER() OVER(ORDER BY amountPeriodB ASC, customer ASC) IN (#first, #last) THEN amountPeriodB ELSE 0.00 END)),
amountPeriodC = ((CASE WHEN ROW_NUMBER() OVER(ORDER BY amountPeriodC ASC, customer ASC) IN (#first, #last) THEN amountPeriodC ELSE 0.00 END))
FROM #MyExample
)t
and then
SELECT [Period], [Amount]
FROM #result as x
UNPIVOT ( [Amount] FOR Period IN (amountPeriodA, amountPeriodB, amountPeriodC)) As unpvt
I was thinking along the lines of :
DECLARE #rowcount int
DECLARE #first int
DECLARE #last int
SELECT #rowcount = COUNT(*) FROM #MyExample
SELECT #first = (CASE WHEN #rowcount % 2 = 1 THEN (#rowcount + 1) / 2 ELSE (#rowcount / 2) END),
#last = (CASE WHEN #rowcount % 2 = 1 THEN (#rowcount + 1) / 2 ELSE (#rowcount / 2) + 1 END)
SELECT [Period],
[Median] = AVG(Amount)
FROM (SELECT [Period] = 'amountPeriodA',
Amount = amountPeriodA,
rownbr = ROW_NUMBER() OVER(ORDER BY amountPeriodA ASC, customer ASC)
FROM #MyExample
UNION ALL
SELECT [Period] = 'amountPeriodB',
Amount = amountPeriodB,
rownbr = ROW_NUMBER() OVER(ORDER BY amountPeriodB ASC, customer ASC)
FROM #MyExample
UNION ALL
SELECT [Period] = 'amountPeriodC',
Amount = amountPeriodC,
rownbr = ROW_NUMBER() OVER(ORDER BY amountPeriodC ASC, customer ASC)
FROM #MyExample
) r
WHERE rownbr IN (#first, #last)
GROUP BY [Period]
Which seems to work well, is a bit less typing and turns out to be a bit faster too.... but it's still 'big'.
PS: Use UNION ALL rather than UNION as otherwise the server will try to make the end-result into 'distinct' records which in this case is not needed. (Period makes it unique anyway !)