Using a custom aggregate function in a GROUP BY?

Using a custom aggregate function in a GROUP BY? - sql

I have a simple MEDIAN calculation function:
IF OBJECT_ID(N'COMPUTEMEDIAN', N'FN') IS NOT NULL
DROP FUNCTION dbo.COMPUTEMEDIAN;
GO
CREATE FUNCTION dbo.COMPUTEMEDIAN(#VALUES NVARCHAR(MAX))
RETURNS DECIMAL
WITH EXECUTE AS CALLER
AS
BEGIN
DECLARE #SQL NVARCHAR(MAX)
DECLARE #MEDIAN DECIMAL
SET #MEDIAN = 0.0;
DECLARE #MEDIAN_TEMP TABLE (RawValue DECIMAL);
-- This is the Killer!
INSERT INTO #MEDIAN_TEMP
SELECT s FROM master.dbo.Split(',', #VALUES) OPTION(MAXRECURSION 0)
SELECT #MEDIAN =
(
(SELECT MAX(RawValue) FROM
(SELECT TOP 50 PERCENT RawValue FROM #MEDIAN_TEMP ORDER BY RawValue) AS BottomHalf)
+
(SELECT MIN(RawValue) FROM
(SELECT TOP 50 PERCENT RawValue FROM #MEDIAN_TEMP ORDER BY RawValue DESC) AS TopHalf)
) / 2
--PRINT #SQL
RETURN #MEDIAN;
END;
GO
However, my table is of the following form:
CREATE TABLE #TEMP (GroupName VARCHAR(MAX), Value DECIMAL)
INSERT INTO #TEMP VALUES ('A', 1.0)
INSERT INTO #TEMP VALUES ('A', 2.0)
INSERT INTO #TEMP VALUES ('A', 3.0)
INSERT INTO #TEMP VALUES ('A', 4.0)
INSERT INTO #TEMP VALUES ('B', 10.0)
INSERT INTO #TEMP VALUES ('B', 11.0)
INSERT INTO #TEMP VALUES ('B', 12.0)
SELECT * FROM #TEMP
DROP TABLE #TEMP
What is the best way to invoke the MEDIAN function on this table using a GROUP BY on the id column? So, I am looking for something like this:
SELECT id, COMPUTEMEDIAN(Values)
FROM #TEMP
GROUP BY id
My current approach involves using XMLPATH to combine all values resulting from a GROUP BY operation into a large string and then passing it to the function but this involves the String splitting operation and for large strings this just slows down everything. Any suggestions?

Since you're using SQL Server 2008, I would suggest writing the aggregate function as a CLR function.
http://msdn.microsoft.com/en-us/library/91e6taax(v=vs.80).aspx
Also, people have asked this question before. Perhaps their answers would be helpful
Function to Calculate Median in Sql Server

EDIT: I can confirm this works very very well on a large database (30,000 values)
Hmm... Just came across this so the following works perfectly fine but am not sure how expensive it can turn out to be:
SELECT
GroupName,
AVG(Value)
FROM
(
SELECT
GroupName,
cast(Value as decimal(5,2)) Value,
ROW_NUMBER() OVER (
PARTITION BY GroupName
ORDER BY Value ASC) AS RowAsc,
ROW_NUMBER() OVER (
PARTITION BY GroupName
ORDER BY Value DESC) AS RowDesc
FROM #TEMP SOH
) x
WHERE
RowAsc IN (RowDesc, RowDesc - 1, RowDesc + 1)
GROUP BY GroupName
ORDER BY GroupName;

No need to use a user defined function! Here's how I would do it:
CREATE TABLE #TEMP (id VARCHAR(MAX), Value DECIMAL)
INSERT INTO #TEMP VALUES('A', 1.0)
INSERT INTO #TEMP VALUES('A', 2.0)
INSERT INTO #TEMP VALUES('A', 3.0)
INSERT INTO #TEMP VALUES('A', 4.0)
INSERT INTO #TEMP VALUES('B', 10.0)
INSERT INTO #TEMP VALUES('B', 11.0)
INSERT INTO #TEMP VALUES('B', 12.0)
SELECT
(SELECT TOP 1 Value
FROM (SELECT TOP(calcs.medianIndex) Value
FROM #temp
WHERE #temp.ID = calcs.ID ORDER BY Value ASC) AS subSet
ORDER BY subSet.Value DESC), ID
FROM
(SELECT
CASE WHEN count(*) % 2 = 1 THEN count(*)/2 + 1
ELSE count(*)/2
END AS medianIndex,
ID
FROM #TEMP
GROUP BY ID) AS calcs
DROP TABLE #TEMP
Might want to double check the behavior when there is an even number of records.
EDIT: After reviewing your work in your Median function, I realize that my answer basically just moved your work out of the function and into your regular query. So... why does your median calculation have to be inside of the user-defined function? It seems alot
more difficult that way.

Related

Sql Server While Loop with Changing Condition

I have a User Table in my database that contains two fields
user_id
manager_id
I am trying to construct a query to list all of the manager_ids that are associated with a user_id in a hierarchical structure.
So if i give a user_id, i will get that users manager, followed by that persons manager all the way to the very top.
So far i have tried but it doesnt give what i need:
WITH cte(user_id, manager_id) as (
SELECT user_id, manager_id
FROM user
WHERE manager_id=#userid
UNION ALL
SELECT u.user_id, u.manager_id,
FROM user u
INNER JOIN cte c on e.manager_id = c.employee_id
)
INSERT INTO #tbl (manager_id)
select user_id, manager_id from cte;
If anyone can point me in the right direction that would be great.
I thought about a While loop but this may not be very efficient and im not too sure how to implement that.

OP asked for a while loop, and while (ha, pun) this may not be the best way... Ask and you shall receive. (:
Here is sample data I created (in the future, please provide this):
CREATE TABLE #temp (userID int, managerID int)
INSERT INTO #temp VALUES (1, 3)
INSERT INTO #temp VALUES (2, 3)
INSERT INTO #temp VALUES (3, 7)
INSERT INTO #temp VALUES (4, 6)
INSERT INTO #temp VALUES (5, 7)
INSERT INTO #temp VALUES (6, 9)
INSERT INTO #temp VALUES (7, 10)
INSERT INTO #temp VALUES (8, 10)
INSERT INTO #temp VALUES (9, 10)
INSERT INTO #temp VALUES (10, 12)
INSERT INTO #temp VALUES (11, 12)
INSERT INTO #temp VALUES (12, NULL)
While Loop:
CREATE TABLE #results (userID INT, managerID INT)
DECLARE #currentUser INT = 1 -- Would be your parameter!
DECLARE #maxUser INT
DECLARE #userManager INT
SELECT #maxUser = MAX(userID) FROM #temp
WHILE #currentUser <= #maxUser
BEGIN
SELECT #userManager = managerID FROM #temp WHERE userID = #currentUser
INSERT INTO #results VALUES (#currentUser, #userManager)
SET #currentUser = #userManager
END
SELECT * FROM #results
DROP TABLE #temp
DROP TABLE #results

Get rid of this column list in your CTE declaration that has nothing to do with the columns you are actually selecting in the CTE:
WITH cte(employee_id, name, reports_to_emp_no, job_number) as (
Just make it this:
WITH cte as (

I recommend recursive solution:
WITH Parent AS
(
SELECT * FROM user WHERE user_id=#userId
UNION ALL
SELECT T.* FROM user T
JOIN Parent P ON P.manager_id=T.user_id
)
SELECT * FROM Parent
To see demo, run following:
SELECT * INTO #t FROM (VALUES (1,NULL),(2,1),(3,2),(4,1)) T(user_id,manager_id);
DECLARE #userId int = 3;
WITH Parent AS
(
SELECT * FROM #t WHERE user_id=#userId
UNION ALL
SELECT T.* FROM #t T
JOIN Parent P ON P.manager_id=T.user_id
)
SELECT * FROM Parent

Deleting records that are similar with previous one SQL Server

I am looking for a query which fetches me the data that is different compared to the previous row,
A sample code (with table creation and data)
create table #temp
(id int, eid int, name char(10),estid int, ecid int, epid int, etc char(5) )
insert into #temp values (1,1,'a',1,1,1,'a')
insert into #temp values (2,1,'a',1,1,1,'a')
insert into #temp values (3,1,'a',2,1,1,'a')
insert into #temp values (4,1,'a',1,1,1,'a')
insert into #temp values (5,1,'a',1,1,1,'a')
insert into #temp values (6,1,'a',1,2,1,'a')
insert into #temp values (7,1,'a',1,1,1,'a')
insert into #temp values (8,1,'a',2,1,1,'a')
insert into #temp values (9,1,'a',1,1,1,'a')
insert into #temp values (10,1,'a',1,1,1,'a')
insert into #temp values (11,2,'a',1,1,1,'a')
insert into #temp values (12,2,'a',1,1,1,'a')
insert into #temp values (13,2,'a',2,1,1,'a')
insert into #temp values (14,2,'a',1,1,1,'a')
insert into #temp values (15,2,'a',1,1,1,'a')
insert into #temp values (16,2,'a',1,2,1,'a')
insert into #temp values (17,2,'a',1,1,1,'a')
insert into #temp values (18,2,'a',2,1,1,'a')
insert into #temp values (19,2,'a',1,1,1,'a')
insert into #temp values (20,2,'a',1,1,1,'a')
I tried with some ways of getting the data as the way that i expected
SELECT * INTo #Temp_Final
FROM #temp
WHERE #temp.%%physloc%%
NOT IN (SELECT Min(b.%%physloc%%)
FROM #temp b
GROUP BY eid,name,estid,ecid,epid,etc)
ORDER BY id
SELECT * FROM #temp WHERE id not in (SELECT id FROM #Temp_Final) ORDER BY id
But i wasn't getting the result as i expected...
This is how the result needs to be
select * from #temp where id in (1,3,4,6,7,8,9,11,13,14,16,17,18,19)

You can do this with a simple self-join and appropriate comparison:
select t.*
from #temp t left outer join
#temp tprev
on t.id = tprev.id + 1
where tprev.id is null or
t.name <> tprev.name or
t.estid <> tprev.estid or
t.ecid <> tprev.ecid or
t.epid <> tprev.epid or
t.etc <> tprev.etc;
This assumes that the ids are sequential with no gaps. If the ids are not, you can get the previous id using a correlated subquery or the lag() function.
Your title says "delete" but the question seems to just want the list of such rows. You can phrase this as a delete query if you need to.

For SQL Server 2012 (SQL Fiddle)
WITH CTE
AS (SELECT *,
LAG(eid) OVER (ORDER BY id) AS prev_eid,
LAG(name) OVER (ORDER BY id) AS prev_name,
LAG(estid) OVER (ORDER BY id) AS prev_estid,
LAG(ecid) OVER (ORDER BY id) AS prev_ecid,
LAG(epid) OVER (ORDER BY id) AS prev_epid,
LAG(etc) OVER (ORDER BY id) AS prev_etc
FROM #temp)
DELETE FROM CTE
WHERE EXISTS (SELECT eid,
name,
estid,
ecid,
epid,
etc
INTERSECT
SELECT prev_eid,
prev_name,
prev_estid,
prev_ecid,
prev_epid,
prev_etc)

select
t.id,
t.eid,
t.name,
t.estid,
t.ecid,
t.epid,
t.etc
from #temp t
left join #temp d
on d.id = t.id-1
and d.eid = t.eid
and d.name = t.name
and d.estid = t.estid
and d.ecid = t.ecid
and d.epid = t.epid
and d.etc = t.etc
where d.id is null

Any standard SQL expression to sort results base on IN expression [duplicate]

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
SQL - order by list order
Is there any standard SQL expression to sort results exactly base on IN expression. For example to return the results of the following query so that 2, 4, 6, 8 are returned serially?
SELECT * FROM SOMETABLE WHERE ID IN (2, 4, 6, 8)

The closest thing I can think of is doing a JOIN instead of an IN to a table with the original order with their ordinal rank
SELECT * FROM SOMETABLE INNER JOIN SOMETABLE2 ... etc
ORDER BY SOMETABLE2.original

If you have full controlled over your SQL rendering, then use a CASE expression:
ORDER BY CASE ID
-- render WHEN clauses in the desired order
WHEN 2 THEN 1
WHEN 4 THEN 2
WHEN 6 THEN 3
WHEN 8 THEN 4
END

Assuming you can pass the ids as a fixed delimited string, you can do the following :
-- Populate a number table
DECLARE #Temp Table(Number int)
INSERT INTO #Temp VALUES(1)
INSERT INTO #Temp VALUES(2)
INSERT INTO #Temp VALUES(3)
INSERT INTO #Temp VALUES(4)
INSERT INTO #Temp VALUES(5)
INSERT INTO #Temp VALUES(6)
INSERT INTO #Temp VALUES(7)
INSERT INTO #Temp VALUES(8)
INSERT INTO #Temp VALUES(9)
INSERT INTO #Temp VALUES(10)
SELECT TOP 8000 Number = IDENTITY(int,1,1) INTO [dbo].Numberos FROM #TEMP t1, #TEMP t2, #TEMP t3, #TEMP t4
ALTER TABLE [dbo].[Numbers] ADD CONSTRAINT [PK_Numbers] PRIMARY KEY CLUSTERED ([Number])
-- This function splits a fixed delimited string into a table object
CREATE FUNCTION [dbo].[fixstring_single](#str text, #itemlen tinyint)
RETURNS TABLE
AS
RETURN (SELECT listpos = n.Number,
str = substring(#str, (#itemlen + 1) * (n.Number - 1) + 1, #itemlen)
FROM Numbers n
WHERE n.Number <= (datalength(#str)+1) / (#itemlen+1))
DECLARE #ids varchar(100);
SET #ids = '00002 00001 00004';
-- Join to the number table ordered by the posisiton in the ids string
SELECT * FROM TestT INNER JOIN fixstring_single(#ids, 5) S ON TestT.ID = S.Str ORDER BY S.listpos

SQL Server Simple Group by query

I have a simple problem , Although i believe its simple , am not able to figure out the same.
Consider i have the below table with exactly same data as given below :
CREATE TABLE #temp
(
link varchar(255),
number INT,
fname varchar(255)
)
insert into #temp VALUES ('abc',1,'f1')
insert into #temp VALUES ('abc',2,'f2')
insert into #temp VALUES ('abc',3,'f3')
insert into #temp VALUES ('abc',4,'f6')
insert into #temp VALUES ('abc',10,'f100')
insert into #temp VALUES ('abe',-1,'f0')
insert into #temp VALUES ('abe',1,'f1')
insert into #temp VALUES ('abe',2,'f2')
insert into #temp VALUES ('abe',3,'f3')
insert into #temp VALUES ('abe',4,'f6')
insert into #temp VALUES ('abe',20,'f200')
insert into #temp VALUES ('cbe',-1,'f0')
insert into #temp VALUES ('cbe',1,'f1')
insert into #temp VALUES ('cbe',2,'f2')
insert into #temp VALUES ('cbe',3,'f3')
Now for a given link , i need to get the max 'number' and the corresponding 'fname' which has the max 'number' for the given 'link'.
1)Ex : if link is 'abc' , output should be
abc, 10, f100
2)Ex : if link if 'abe' , Output should be
abe, 20, f200
3)Now link can be also given as a pattern , like (link like 'ab%') , so output should be
abc, 10, f100
abe, 20, f200
4)if (link like 'cb%') , so output should be
cbe, 3, f3
Any help in writing this group by query. I have a solution using CAST and string concat like below , but that seems to be in-efficient.
select link,number,fname from #temp
where link like 'ab%' and link+'_'+CAST(number AS varchar(255))
in (select link+'_'+CAST(MAX(number) AS varchar(255)) from #temp
group by link)
Thanks..

Using a self join:
SELECT x.link,
x.number,
x.fname
FROM #temp x
JOIN (SELECT t.link,
MAX(t.number) AS max_number
FROM #temp t
GROUP BY t.link) y ON y.link = x.link
AND y.max_number = x.number
Using a CTE and ROW_NUMBER (SQL Server 2005+):
WITH cte AS (
SELECT x.link,
x.number,
x.fname,
ROW_NUMBER() OVER(PARTITION BY x.link
ORDER BY x.number DESC) rank
FROM #temp x)
SELECT c.link,
c.number,
c.fname
FROM cte c
WHERE c.rank = 1

Select records with order of IN clause

I have
SELECT * FROM Table1 WHERE Col1 IN(4,2,6)
I want to select and return the records with the specified order which i indicate in the IN clause
(first display record with Col1=4, Col1=2, ...)
I can use
SELECT * FROM Table1 WHERE Col1 = 4
UNION ALL
SELECT * FROM Table1 WHERE Col1 = 6 , .....
but I don't want to use that, cause I want to use it as a stored procedure and not auto generated.

I know it's a bit late but the best way would be
SELECT *
FROM Table1
WHERE Col1 IN( 4, 2, 6 )
ORDER BY CHARINDEX(CAST(Col1 AS VARCHAR), '4,2,67')
Or
SELECT CHARINDEX(CAST(Col1 AS VARCHAR), '4,2,67')s_order,
*
FROM Table1
WHERE Col1 IN( 4, 2, 6 )
ORDER BY s_order

You have a couple of options. Simplest may be to put the IN parameters (they are parameters, right) in a separate table in the order you receive them, and ORDER BY that table.

The solution is along this line:
SELECT * FROM Table1
WHERE Col1 IN(4,2,6)
ORDER BY
CASE Col1
WHEN 4 THEN 1
WHEN 2 THEN 2
WHEN 6 THEN 3
END

select top 0 0 'in', 0 'order' into #i
insert into #i values(4,1)
insert into #i values(2,2)
insert into #i values(6,3)
select t.* from Table1 t inner join #i i on t.[in]=t.[col1] order by i.[order]

Replace the IN values with a table, including a column for sort order to used in the query (and be sure to expose the sort order to the calling application):
WITH OtherTable (Col1, sort_seq)
AS
(
SELECT Col1, sort_seq
FROM (
VALUES (4, 1),
(2, 2),
(6, 3)
) AS OtherTable (Col1, sort_seq)
)
SELECT T1.Col1, O1.sort_seq
FROM Table1 AS T1
INNER JOIN OtherTable AS O1
ON T1.Col1 = O1.Col1
ORDER
BY sort_seq;
In your stored proc, rather than a CTE, split the values into table (a scratch base table, temp table, function that returns a table, etc) with the sort column populated as appropriate.

I have found another solution. It's similar to the answer from onedaywhen, but it's a little shorter.
SELECT sort.n, Table1.Col1
FROM (VALUES (4), (2), (6)) AS sort(n)
JOIN Table1
ON Table1.Col1 = sort.n

I am thinking about this problem two different ways because I can't decide if this is a programming problem or a data architecture problem. Check out the code below incorporating "famous" TV animals. Let's say that we are tracking dolphins, horses, bears, dogs and orangutans. We want to return only the horses, bears, and dogs in our query and we want bears to sort ahead of horses to sort ahead of dogs. I have a personal preference to look at this as an architecture problem, but can wrap my head around looking at it as a programming problem. Let me know if you have questions.
CREATE TABLE #AnimalType (
AnimalTypeId INT NOT NULL PRIMARY KEY
, AnimalType VARCHAR(50) NOT NULL
, SortOrder INT NOT NULL)
INSERT INTO #AnimalType VALUES (1,'Dolphin',5)
INSERT INTO #AnimalType VALUES (2,'Horse',2)
INSERT INTO #AnimalType VALUES (3,'Bear',1)
INSERT INTO #AnimalType VALUES (4,'Dog',4)
INSERT INTO #AnimalType VALUES (5,'Orangutan',3)
CREATE TABLE #Actor (
ActorId INT NOT NULL PRIMARY KEY
, ActorName VARCHAR(50) NOT NULL
, AnimalTypeId INT NOT NULL)
INSERT INTO #Actor VALUES (1,'Benji',4)
INSERT INTO #Actor VALUES (2,'Lassie',4)
INSERT INTO #Actor VALUES (3,'Rin Tin Tin',4)
INSERT INTO #Actor VALUES (4,'Gentle Ben',3)
INSERT INTO #Actor VALUES (5,'Trigger',2)
INSERT INTO #Actor VALUES (6,'Flipper',1)
INSERT INTO #Actor VALUES (7,'CJ',5)
INSERT INTO #Actor VALUES (8,'Mr. Ed',2)
INSERT INTO #Actor VALUES (9,'Tiger',4)
/* If you believe this is a programming problem then this code works */
SELECT *
FROM #Actor a
WHERE a.AnimalTypeId IN (2,3,4)
ORDER BY case when a.AnimalTypeId = 3 then 1
when a.AnimalTypeId = 2 then 2
when a.AnimalTypeId = 4 then 3 end
/* If you believe that this is a data architecture problem then this code works */
SELECT *
FROM #Actor a
JOIN #AnimalType at ON a.AnimalTypeId = at.AnimalTypeId
WHERE a.AnimalTypeId IN (2,3,4)
ORDER BY at.SortOrder
DROP TABLE #Actor
DROP TABLE #AnimalType

ORDER BY CHARINDEX(','+convert(varchar,status)+',' ,
',rejected,active,submitted,approved,')
Just put a comma before and after a string in which you are finding the substring index or you can say that second parameter.
And first parameter of CHARINDEX is also surrounded by , (comma).

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Using a custom aggregate function in a GROUP BY? - sql

Since you're using SQL Server 2008, I would suggest writing the aggregate function as a CLR function. http://msdn.microsoft.com/en-us/library/91e6taax(v=vs.80).aspx Also, people have asked this question before. Perhaps their answers would be helpful Function to Calculate Median in Sql Server

Related

Sql Server While Loop with Changing Condition

Deleting records that are similar with previous one SQL Server

Any standard SQL expression to sort results base on IN expression [duplicate]

SQL Server Simple Group by query

Select records with order of IN clause

Categories

Resources