Percentile_Disc on SQL Server Group By error [duplicate] - sql

I am trying to use PERCENTILE_DISC with a number of other built in aggregate functions. The code I am attempting reads like this, but it fails:
SELECT
[DataPoint] AS [DataPoint],
MIN([Value]) AS [Value MIN],
MAX([Value]) AS [Value MAX],
AVG([Value]) AS [Value AVG],
PERCENTILE_DISC(0.5)
WITHIN GROUP
(ORDER BY [Value])
OVER
(PARTITION BY [DataPoint])
AS MedianCont
FROM [Table] AS [Table]
WHERE ([DataPoint]
IN (
...
)
)
GROUP BY [DataPoint]
So this works...
SELECT
Distinct [DataPoint],
PERCENTILE_DISC(0.5)
WITHIN GROUP
(ORDER BY [Value])
OVER
(PARTITION BY [DataPoint])
AS MedianCont
FROM [Table] AS [Table]
WHERE ([DataPoint]
IN (
...
)
)
And this works...
SELECT
[DataPoint] AS [DataPoint],
MIN([Value]) AS [Value MIN],
MAX([Value]) AS [Value MAX],
AVG([Value]) AS [Value AVG]
FROM [Table] AS [Table]
WHERE ([DataPoint]
IN (
...
)
)
GROUP BY [DataPoint]
But when I try to combine them, it wants me to declare Value in the Group By clause, which I do not want because I want a distinct list of DataPoint's, not a value per DataPoint.
Column 'Value' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.

It would appear that using the over clause, thus creating a 'window function', separates the aggregate function from the select statement.
SELECT
Distinct [DataPoint],
MIN([Value]) OVER (PARTITION BY [DataPoint]) AS [Value MIN],
MAX([Value]) OVER (PARTITION BY [DataPoint]) AS [Value MAX],
AVG([Value]) OVER (PARTITION BY [DataPoint]) AS [Value AVG],
PERCENTILE_DISC(0.5)
WITHIN GROUP
(ORDER BY [Value])
OVER
(PARTITION BY [DataPoint])
AS MedianCont
FROM [Table] AS [Table]
WHERE ([DataPoint]
IN (
...
)
)

Related

Selecting records that appear several times in a row

My problem is that I would like to select some records which appears in a row.
For example we have table like this:
x
x
x
y
y
x
x
y
Query should give answer like this:
x 3
y 2
x 2
y 1
SQL tables represent unordered sets. Your question only makes sense if there is a column that specifies the ordering. If so, you can use the difference-of-row-numbers to determine the groups and then aggregate:
select col1, count(*)
from (select t.*,
row_number() over (order by <ordering col>) as seqnum,
row_number() over (partition by col1 order by <ordering col>) as seqnum_2
from t
) t
group by col1, (seqnum - seqnum_2)
I made a SQL Fiddle
http://sqlfiddle.com/#!18/f8900/5
CREATE TABLE [dbo].[SomeTable](
[data] [nchar](1) NULL,
[id] [int] IDENTITY(1,1) NOT NULL
);
INSERT INTO SomeTable
([data])
VALUES
('x'),
('x'),
('x'),
('y'),
('y'),
('x'),
('x'),
('y')
;
select * from SomeTable;
WITH SomeTable_CTE (Data, total, BaseId, NextId)
AS
(
SELECT
Data,
1 as total,
Id as BaseId,
Id+1 as NextId
FROM SomeTable
where not exists(
Select * from SomeTable Previous
where Previous.Id+1 = SomeTable.Id
and Previous.Data = SomeTable.Data)
UNION ALL
select SomeTable_CTE.Data, SomeTable_CTE.total+1, SomeTable_CTE.BaseId as BaseId, SomeTable.Id+1 as NextId
from SomeTable_CTE inner join SomeTable on
SomeTable.Data = SomeTable_CTE.Data
and
SomeTable.Id = SomeTable_CTE.NextId
)
SELECT Data, max(total) as total
FROM SomeTable_CTE
group by Data, BaseId
order by BaseId
The elephant in the room is the missing column(s) to establish the order of rows.
SELECT col1, count(*)
FROM (
SELECT col1, order_column
, row_number() OVER (ORDER BY order_column)
- row_number() OVER (PARTITION BY col1 ORDER BY order_column) AS grp
FROM tbl
) t
GROUP BY col1, grp
ORDER BY min(order_column);
To exclude partitions with only a single row, add a HAVING clause:
SELECT col1, count(*)
FROM (
SELECT col1, order_column
, row_number() OVER (ORDER BY order_column)
- row_number() OVER (PARTITION BY col1 ORDER BY order_column) AS grp
FROM tbl
) t
GROUP BY col1, grp
HAVING count(*) > 1
ORDER BY min(order_column);
db<>fiddle here
Add a final ORDER BY to maintain original order (and a meaningful result). You may want to add a column like min(order_column) as well.
Related:
Find the longest streak of perfect scores per player
Select longest continuous sequence
Group by repeating attribute

SQL Server : find duplicate record by column value

I have a value like below dataset. Now how I can find the duplicate DataSetID like: 201 & 401 is duplicate record.
Use PIVOT and ROW_Number
For Non Duplicates
FIDDLE DEMO
SELECT * FROm Tbl WHERE DateSetID IN
(
SELECT DateSetID FROM
(
SELECT DateSetID,[Name], [Age], [Gender],ROW_NUMBER() OVER (PARTITION BY [Name], [Age],
[Gender] ORDER BY DateSetID) RN
FROM (SELECT * FROM Tbl) AS SourceTable
PIVOT(MAX(ColumnB) FOR ColumnA IN ([Name], [Age], [Gender])
) AS PivotTable)Tmp WHERE RN = 1
);
For Duplicates alone
FIDDLE DEMO
SELECT T.* FROM Tbl T JOIN (
SELECT DatasetID, ColumnA, ColumnB
FROM
(
SELECT DatasetID, [Name], [Age], [Gender], ROW_NUMBER() OVER (PARTITION BY [Name], [Age], [Gender] ORDER BY DatasetID) RN
FROM (SELECT * FROM Tbl) AS SourceTable
PIVOT(MAX(ColumnB) FOR ColumnA IN ([Name], [Age], [Gender])) AS PivotTable
)Tmp
UNPIVOT
(
ColumnB
FOR ColumnA in ([Name], [Age], [Gender])
) AS UnpivotOp
WHERE RN > 1
)X ON T.ColumnA = X.ColumnA AND T.ColumnB = X.ColumnB;
You need to count pivoted rows using ubounded rows window (default)
SELECT *
FROm Tbl
WHERE DatasetID IN (
SELECT DatasetID
FROM (
SELECT DatasetID, [Name], [Age], [Gender]
,count(*) OVER (PARTITION BY [Name], [Age], [Gender]) cnt
FROM Tbl
PIVOT(MAX(ColumnB) FOR ColumnA IN ([Name], [Age], [Gender])
) AS PivotTable
)Tmp
WHERE cnt > 1
);
Fiddle
concat the columns, and apply group by.
select distinct Datesetid from tableA
where concat(columnA, columnB) in (
select concat(columnA, columnB) from
tableA
group by concat(columnA, columnB)
having count(1) > 1)
see dbfiddle.
write give below query & find duplicate value in table
SELECT DISTINCT FirstName, LastName, MobileNo FROM CUSTOMER;

What is the best way to sort below result

For example:
Name Date
A 2018-06-02
B 2018-06-03
B 2018-06-01
C 2018-06-01
What is the best way to get:
B 2018-06-03
B 2018-06-01
A 2018-06-02
C 2018-06-01
Sort order is first by Order by Date DESC, but then should follow by all the records for that Name
Try forcing the max date by each name.
IF OBJECT_ID('tempdb..#Data') IS NOT NULL
DROP TABLE #Data
CREATE TABLE #Data (
Name VARCHAR(10),
Date DATE)
INSERT INTO #Data (
Name,
Date)
VALUES
('A', '2018-06-02'),
('B', '2018-06-03'),
('B', '2018-06-01'),
('C', '2018-06-01')
SELECT
D.Name,
D.Date
FROM
#Data AS D
ORDER BY
MAX(D.Date) OVER (PARTITION BY D.Name ORDER BY D.Date DESC) DESC,
D.Date DESC,
D.Name
Use window function :
order by count(*) over (partition by name) desc, [date] desc, name
For based on date use max() function instead
order by max([date]) over (partition by name) desc, [date] desc, name
This should get you what you want:
WITH VTE AS(
SELECT [name],
CONVERT(date,[date]) AS [date] --that isn't confusing
FROM (VALUES ('A','20180602'),('B','20180603'),('B','20180601'),('C','20180601')) V([Name],[date])),
MaxDate AS (
SELECT *,
MAX([date]) OVER (PARTITION BY [name]) AS MaxDate
FROM VTE)
SELECT [name],[date]
FROM MaxDate
ORDER BY MaxDate DESC,
[date] DESC,
[name] ASC;
EDIT
I recommend you use ExLo's answer. What I posted was headed in the right direction with window functionality but his seems to match exactly what you need.
You should post expected results and possibly more test data to get a better answer.
Without testing I believe this will set you down a good path.
You can use Dense_Rank () Over (Order By Date Desc) As DateRank and Row_Number() Over (Partition By Name Order By Date Desc) As NameDateOrder
If the above is a subquery or cte you can select from that and order by DateRank, NameDateOrder
Use row_number:
declare #t as table ([Name] char(1), [Date] date)
insert into #t values
('A', '2018-06-02')
, ('B', '2018-06-03')
, ('B', '2018-06-01')
, ('C', '2018-06-01')
Select t.*, row_number() over (order by md desc) as r
from
(
select [Name], max([date]) md
from #t
group by [Name]
) x
inner join #t t on t.[Name] = x.[Name]

Select distinct on 2 columns, but return all columns from SQL Server

Is there a way to select distinct on 2 columns but return all columns?
For example
select distinct name, type
from dbo.Table
but return all columns from dbo.Table?
I found this solution that works for a single column,
SELECT
*
FROM
(SELECT
[name], [type],
[col1], [col2], [col3],
[etc], [dateAdded], [ID],
ROW_NUMBER() OVER (PARTITION BY type ORDER BY dateAdded DESC) rownumber
FROM
[dbo].[Table]) a
WHERE
rownumber = 1;
is it possible to do this for multiple columns?
You can use partition by name and type as below
SELECT * from(select
[name]
,[type]
,[col1]
,[col2]
,[col3]
,[etc]
,[dateAdded]
,[ID]
,ROW_NUMBER() OVER(Partition by name, type order by dateAdded DESC) rownumber from [dbo].[Table]) a where rownumber = 1;

Select In PIVOT

Is there any way to have a CASE SELECT Statement in a PIVOTED Column. My Code is as follows
SELECT PName, [RN], [HA], [LVN], [MSW], [SC]
FROM
(
Query
) src
pivot
(
max(Visits)
for Discipline in ([RN], [HA], [LVN], [MSW], [SC])
) piv
I am getting the output as follows
Pname RN HA LVN MSW SC
AA AG-2/W LO-1/W NA-1/W SK-2/W NO-2/MON
AA JL-2/W NULL NULL NULL NULL
Because there have been 2 RNs assigned to 1 PN I want to summarize the results only in 1 Row and select only 1 value to be displayed in the RN column so that the result is only as follows based on my condition.
Pname RN HA LVN MSW SC
AA JL-2/W LO-1/W NA-1/W SK-2/W NO-2/MON
Without seeing your full query, you should be able to apply a row_number to the inner query and then use a WHERE clause similar to this:
SELECT PName, [RN], [HA], [LVN], [MSW], [SC]
FROM
(
<yourQuery>, row_number() over(order by somefield) rn -- add a rownumber here
) src
pivot
(
max(Visits)
for Discipline in ([RN], [HA], [LVN], [MSW], [SC])
) piv
where rn = 1
If you post your full query, there might be other ways to do this.
edit, using your info from a previous question, your query would be like this:
select patname, [HA], [MSW], [RN]
from
(
select patName, Disc,
sname+' '+schedule new_value,
row_number() over(partition by patname, disc order by disc) rowNum
from yourquery
) src
pivot
(
max(new_value)
for disc in ([HA], [MSW], [RN])
) piv
where rownum = 1
See SQL Fiddle with Demo
Or you can use:
select *
from
(
SELECT PName, [RN], [HA], [LVN], [MSW], [SC] , row_number() over(partition by PName order by PName) rn
FROM
(
<yourQuery>
) src
pivot
(
max(Visits)
for Discipline in ([RN], [HA], [LVN], [MSW], [SC])
) piv
) x
where rn = 1
You'll have to remove the data AFTER the pivoting, because [RN] itself is subject to a MAX condition, i.e. it is itself a pivot column. Wrap it in a CTE, apply a RowNumber() function and partition correctly, then filter it for just 1 row per partition.
;WITH CTE AS (
SELECT PName, [RN], [HA], [LVN], [MSW], [SC],
ROW_NUMBER() OVER (partition by PName order by [RN] ASC) RowNum
FROM
(
Query
) src
pivot
(
max(Visits)
for Discipline in ([RN], [HA], [LVN], [MSW], [SC])
) piv
)
SELECT *
FROM CTE
WHERE RowNum = 1;