MSSQL SELECT -get one result per group

MSSQL SELECT -get one result per group - sql

I can't figure how to write a mssql select to solve this.
Example of a existing table data:
Id GroupID Pass
___ ________ _____
1 1 1
2 1 0
3 2 1
4 2 0
5 2 0
6 3 1
7 3 1
What i need is the following example (if one of Group is not passed, then 0 else 1):
GroupID Pass
________ _____
1 0
2 0
3 1
Thanks for your help!

Use MIN aggregation
SELECT groupid, MIN(pass)
FROM tablename
GROUP BY groupid

use window function row_number()
select GroupID,Pass from (
select *,
row_number() over(
partition by GroupID
order by Pass asc
) as [rn]
from [yourtable] as [t]
) as [t1]
where [rn] = 1;

Use MIN function with GROUP BY.
Query
select [GroupID], min([Pass]) as [Pass]
from [your_table_name]
group by [GroupID]
order by [GroupID];
And if the Pass column is in BIT type, then need to cast in to INT.

Related

ROW_Number with Custom Group

I am trying to have row_number based on custom grouping but I am not able to produce it.
Below is my Query
CREATE TABLE mytbl (wid INT, id INT)
INSERT INTO mytbl Values(1,1),(2,1),(3,0),(4,2),(5,3)
Current Output
wid id
1 1
2 1
3 0
4 2
5 3
Query
SELECT *, RANK() OVER(PARTITION BY wid, CASE WHEN id = 0 THEN 0 ELSE 1 END ORDER BY ID)
FROM mytbl
I would like to rank the rows based on custom condition like if ID is 0 then I have start new group until I have non 0 ID.
Expected Output
wid id RN
1 1 1
2 1 1
3 0 1
4 2 2
5 3 2

Guessing here, as we don't have much clarification, but perhaps this:
SELECT wid,
id,
COUNT(CASE id WHEN 0 THEN 1 END) OVER (ORDER BY wid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) +1 AS [Rank]
FROM mytbl ;

If I understand you correctly, you may use the next approach. Note, that you need to have an ordering column (I assume this is wid column):
Statement:
;WITH ChangesCTE AS (
SELECT
*,
CASE WHEN LAG(id) OVER (ORDER BY wid) = 0 THEN 1 ELSE 0 END AS ChangeIndex
FROM mytbl
), GroupsCTE AS (
SELECT
*,
SUM(ChangeIndex) OVER (ORDER BY wid) AS GroupIndex
FROM ChangesCTE
)
SELECT
wid,
id,
DENSE_RANK() OVER (ORDER BY GroupIndex) AS Rank
FROM GroupsCTE
Result:
wid id Rank
1 1 1
2 1 1
3 0 1
4 2 2
5 3 2

without much clarification on the logic required, my understanding is you want to increase the Rank by 1 whenever id = 0
select wid, id,
[Rank] = sum(case when id = 0 then 1 else 0 end) over(order by wid)
+ case when id <> 0 then 1 else 0 end
from mytbl

Try this,
CREATE TABLE #mytbl (wid INT, id INT)
INSERT INTO #mytbl Values(1,1),(2,1),(3,0)
,(4,2),(5,3),(6,0),(7,4),(8,5),(9,6)
;with CTE as
(
select *,ROW_NUMBER()over(order by wid)rn
from #mytbl where id=0
)
,CTE1 as
(
select max(rn)+1 ExtraRN from CTE
)
select a.* ,isnull(ca.rn,ca1.ExtraRN) from #mytbl a
outer apply(select top 1 * from CTE b
where a.wid<=b.wid )ca
cross apply(select ExtraRN from CTE1)ca1
drop table #mytbl
Here both OUTER APPLY and CROSS APPLY will not increase cardianility estimate.It will always return only one rows.

Select row based on max value in column SQL Server

How can I get the output from the input using Microsoft SQL Server? (Basically select the row per ID where vote is max).
Input
ID Label Vote
-----------------------
79185673 2 3
79185673 0 17
79185724 4 5
79185724 1 13
79185724 0 2
79185900 1 17
79185900 2 1
79185900 4 2
79186190 3 3
79186190 2 17
Output
ID Label Vote
-----------------------
79185673 0 17
79185724 1 13
79185900 1 17
79186190 2 17

Use ROW_NUMBER or DENSE_RANK function to give a rank per ID in the descending order of Vote column and then select the rows having rank 1.
I prefer DENSE_RANK function, because it will give same rank for the same Vote values.
Query
;with cte as(
select [rank] = DENSE_RANK() over(
partition by [ID]
order by [Vote] desc
), *
from [your_table_name]
)
select [ID], [Label], [Vote] from cte
where [rank] = 1;

Count of duplicate values by two columns in SQL Server

From this table:
Number Value
1 a
2 b
3 a
2 c
2 b
3 a
2 b
I need to get count of all duplicate rows by Number and Value, i.e. 5.
Thanks.

I think this query is what you want:
SELECT SUM(t.cnt)
FROM
(
SELECT COUNT(*) cnt
FROM table_name
GROUP BY number, value
HAVING COUNT(*) > 1
)t;

Maybe something like this?
select value,number,max(cnt) as Count_distinct from (
select *,row_number () over (partition by value,number order by number) as cnt
from #sample
)t
group by value,number
Output
+---------------------------------+
| Value | Number | Count_Distinct |
| a | 1 | 1 |
| b | 2 | 3 |
| c | 2 | 1 |
| a | 3 | 2 |
+---------------------------------+

Select
count(distinct Number) as Distinct_Numbers,
count(distinct Value) as Distinct_Values
from
Table
This shows how many distinct values are in each column. Does this help?

Give a row number partition by both the columns and order by both the columns. Then count the number of rows where row number greater than 1.
Query
;with cte as(
select [rn] = row_number() over(
partition by [Number], [Value]
order by [Number], [Value]
), *
from [your_table_name]
)
select count(*) from cte
where [rn] > 1;

I think you mean number of unique number - value pairs, you can use:
SELECT count(*)
FROM
(SELECT ROW_NUMBER() OVER (PARTITION BY number, value ORDER BY (select 1)) from mytable rnk) i
where i.rnk = 1

May be this query may help you
select * from [dbo].[Sample_table1]
;WITH
DupContactRecords(number,value,DupsCount)
AS
(
SELECT number,value, COUNT() AS TotalCount FROM [Sample_table1] GROUP BY number,value HAVING COUNT() > 1
)
--to get the duplicats
/*select * from DupContactRecords*/
SELECT sum(DupsCount) FROM DupContactRecords

Any other alternative to write this SQL query

I need to select data base upon three conditions
Find the latest date (StorageDate Column) from the table for each record
See if there is more then one entry for date (StorageDate Column) found in first step for same ID (ID Column)
and then see if DuplicateID is = 2
So if table has following data:
ID |StorageDate | DuplicateTypeID
1 |2014-10-22 | 1
1 |2014-10-22 | 2
1 |2014-10-18 | 1
2 |2014-10-12 | 1
3 |2014-10-11 | 1
4 |2014-09-02 | 1
4 |2014-09-02 | 2
Then I should get following results
ID
1
4
I have written following query but it is really slow, I was wondering if anyone has better way to write it.
SELECT DISTINCT(TD.RecordID)
FROM dbo.MyTable TD
JOIN (
SELECT T1.RecordID, T2.MaxDate,COUNT(*) AS RecordCount
FROM MyTable T1 WITH (nolock)
JOIN (
SELECT RecordID, MAX(StorageDate) AS MaxDate
FROM MyTable WITH (nolock)
GROUP BY RecordID)T2
ON T1.RecordID = T2.RecordID AND T1.StorageDate = T2.MaxDate
GROUP BY T1.RecordID, T2.MaxDate
HAVING COUNT(*) > 1
)PT ON TD.RecordID = PT.RecordID AND TD.StorageDate = PT.MaxDate
WHERE TD.DuplicateTypeID = 2

Try this and see how the performance goes:
;WITH
tmp AS
(
SELECT *,
RANK() OVER (PARTITION BY ID ORDER BY StorageDate DESC) AS StorageDateRank,
COUNT(ID) OVER (PARTITION BY ID, StorageDate) AS StorageDateCount
FROM MyTable
)
SELECT DISTINCT ID
FROM tmp
WHERE StorageDateRank = 1 -- latest date for each ID
AND StorageDateCount > 1 -- more than 1 entry for date
AND DuplicateTypeID = 2 -- DuplicateTypeID = 2

You can use analytic function rank , can you try this query ?
Select recordId from
(
select *, rank() over ( partition by recordId order by [StorageDate] desc) as rn
from mytable
) T
where rn =1
group by recordId
having count(*) >1
and sum( case when duplicatetypeid =2 then 1 else 0 end) >=1

Add a column with the max value of the group

I want to add an extra column, where the max values of each group (ID) will appear.
Here how the table looks like:
select ID, VALUE from mytable
ID VALUE
1 4
1 1
1 7
2 2
2 5
3 7
3 3
Here is the result I want to get:
ID VALUE max_values
1 4 7
1 1 7
1 7 7
2 2 5
2 5 5
3 7 7
3 3 7
Thank you for your help in advance!

Your previous questions indicate that you are using SQL Server, in which case you can use window functions:
SELECT ID,
Value,
MaxValue = MAX(Value) OVER(PARTITION BY ID)
FROM mytable;
Based on your comment on another answer about first summing value, you may need to use a subquery to actually get this:
SELECT ID,
Date,
Value,
MaxValue = MAX(Value) OVER(PARTITION BY ID)
FROM ( SELECT ID, Date, Value = SUM(Value)
FROM mytable
GROUP BY ID, Date
) AS t;

There is no need to use GROUP BY in subselect.
select ID, VALUE,
(select MAX(VALUE) from mytable where ID = t.ID) as MaxValue
from mytable t

Use this query.
SELECT ID
,value
,(
SELECT MAX(VALUE)
FROM GetMaxValue gmv
WHERE gmv.ID = gmv1.ID
GROUP BY ID
) as max_value
FROM GetMaxValue gmv1
ORDER BY ID

Try it with a sub select and group by, then grab the MAX of this group:
select
ID,
VALUE,
(select MAX(VALUE)
from mytable
group by ID
having ID = t.ID
) as max_values
from mytable t
Edit:
I built a SQL fiddle, which shows that my solution works, but also VDohnal is correct and doesn't need the group by, so I'll upvote his answer.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

MSSQL SELECT -get one result per group - sql

I can't figure how to write a mssql select to solve this. Example of a existing table data: Id GroupID Pass _ ____ _ 1 1 1 2 1 0 3 2 1 4 2 0 5 2 0 6 3 1 7 3 1 What i need is the following example (if one of Group is not passed, then 0 else 1): GroupID Pass __ _ 1 0 2 0 3 1 Thanks for your help!

Use MIN aggregation SELECT groupid, MIN(pass) FROM tablename GROUP BY groupid

use window function row_number() select GroupID,Pass from ( select *, row_number() over( partition by GroupID order by Pass asc ) as [rn] from [yourtable] as [t] ) as [t1] where [rn] = 1;

Use MIN function with GROUP BY. Query select [GroupID], min([Pass]) as [Pass] from [your_table_name] group by [GroupID] order by [GroupID]; And if the Pass column is in BIT type, then need to cast in to INT.

Related

ROW_Number with Custom Group

Select row based on max value in column SQL Server

Count of duplicate values by two columns in SQL Server

Any other alternative to write this SQL query

Add a column with the max value of the group

Categories

Resources