Group By and get distinct value that occurs most often - sql

I want to group by a varchar column and find the foreignkey value that occurs the most. The problem is that multiple fiModel can be assigned to the same TAC(first 8 digits of a 15-digit value called SSN_Number).
Here is a simplified model and query with sample-data:
create table #data(
SSN_Number varchar(15),
fiModel int
)
insert into #data
SELECT '351806038155151',451 UNION ALL SELECT '353797028764243',232 UNION ALL SELECT '353797028764243',438 UNION ALL SELECT '353797028764243',438 UNION ALL SELECT '353797028764243',447 UNION ALL SELECT '358372015611578',318 UNION ALL SELECT '352045039834626',279 UNION ALL SELECT '352045031234567',279 UNION ALL SELECT '351806035647381',451 UNION ALL SELECT '352045037654321',207
--- following query returns all records(10)
select * from #data Order By SSN_Number
--- following query gives the distinct TAC's+fiModel, but TACs can repeat (9)
select substring(ssn_number,1,8)as TAC,fiModel,count(*) from #data
group by substring(ssn_number,1,8),fiModel
Order By substring(ssn_number,1,8),fiModel
--- following query gives the correct(distinct) TAC's (4),
--- but i need the fiModel that occurs most often with the assigned TAC
--- if the number is the same, it doesn't matter what to take
select substring(ssn_number,1,8)as TAC,count(*) from #data
group by substring(ssn_number,1,8)
Order By substring(ssn_number,1,8)
drop table #data
So this is the desired result:
TAC fiModel
35180603 451
35204503 279
35379702 438
35837201 318

This should do the trick (CTE's to the rescue!):
;with cte as (
select substring(ssn_number,1,8) as TAC, fiModel, ROW_NUMBER() OVER (PARTITION BY substring(ssn_number,1,8) ORDER BY count(*) desc) as row
from #data
group by substring(ssn_number,1,8),fiModel
)
select TAC, fiModel
from cte
where row = 1
As subquery:
Select TAC,fiModel
from(
Select substring(ssn_number,1,8)as TAC, fiModel
,ROW_NUMBER() OVER (PARTITION BY substring(ssn_number,1,8) ORDER BY count(*) desc) as row
from #data
group by substring(ssn_number,1,8),fiModel
)as data
where row=1

Related

Perform a function after union of two tables

I have two tables that I want to union together then perform some math functions on the combined table.
I know how to do the math for each separate table, but throwing in a union table to go off of is out of my league.
Here's the math for one table using column header "UnitsReceived" and "AsnPsUnits"
The other table would have headers: "cUnitsReceived" and "cAsnPsUnits"
select VendName,
1-abs(((cast(sum(UnitsReceived) as decimal(5,0))) - (cast(sum(AsnPsUnits) as decimal(5,0)))) /(cast(sum(AsnPsUnits) as decimal(5,0)))) as ASNpsAcc
from VenTest2
where ID<20
group by VendName
How would I perform this function after the union of two tables?
You'll need to get the unioned tables into some table object before performing your function. This could be done using:
A Common Table Expression
with cte as (
select ID, VALUE from A
union all
select ID, VALUE from B
)
select
*
,myfunction(VALUE) as MyFunctionResult
from
cte
A temp table
select ID, VALUE into #myTempTable from A
insert into #myTempTable select ID, VALUE from B
select
*
,myfunction(VALUE) as MyFunctionResult
from
#myTempTable
A table variable
declare #myTableVariable table (ID int, VALUE decimal)
insert into #myTableVariable
select ID, VALUE from A
union all
select ID, VALUE from B
select
*
,myfunction(VALUE) as MyFunctionResult
from
#myTableVariable
A sub query
select
*
,myfunction(VALUE) as MyFunctionResult
from
(
select ID, VALUE from A
union all
select ID, VALUE from B
) mySubQuery
This will help with the subq being the union
select VendName,
1-abs(((cast(sum(UnitsReceived) as decimal(5,0))) - (cast(sum(AsnPsUnits) as decimal(5,0)))) /(cast(sum(AsnPsUnits) as decimal(5,0)))) as ASNpsAcc
from
(
select ID, UnitsReceived, AsnPsUnits from VenTest2 where ID<20
union
select ID1, UnitsReceived1, AsnPsUnits1 from VenTest1
)a
group by VendName
This is not the way, brothers:
select VendName,
1-abs(((cast(sum(UnitsReceived) as decimal(10,2))) - (cast(sum(AsnPsUnits) as decimal(10,2)))) /(cast(sum(AsnPsUnits) as decimal(10,2)))) as ASNpsAcc
from VenTest2
where ID<10
group by VendName
union
select cVendName,
1-abs(((cast(sum(cUnitsReceived) as decimal(10,2))) - (cast(sum(casnpsunits) as decimal(10,2)))) /(cast(sum(cAsnPsUnits) as decimal(10,2)))) as ASNpsAcc
from CTest
where id <10
group by cvendname

Recursive CTE to find present and previous WorkState and entry time of the WorkState

I have the below
The schema is as under
declare #t table(CustomerId varchar(10),WorkState varchar(10),statechangedate datetimeoffset, stateorder int)
insert into #t
select '1','WorkStateA','2018-10-30 13:38:53.5133333 +00:00',1 union all
select '2','WorkStateA','2018-05-18 17:04:56.9900000 +00:00',1 union all
select '2','WorkStateA','2018-05-18 16:22:20.3266667 +00:00',2 union all
select '2','WorkStateB','2018-05-09 12:46:33.8300000 +00:00',3 union all
select '3','WorkStateF','2018-06-21 12:40:03.2933333 +00:00',1 union all
select '3','WorkStateE','2018-06-21 12:38:43.9000000 +00:00',2 union all
select '3','WorkStateD','2018-06-21 12:38:24.7533333 +00:00',3 union all
select '3','WorkStateC','2018-06-21 12:38:11.0233333 +00:00',4 union all
select '3','WorkStateB','2018-06-21 12:38:04.1933333 +00:00',5 union all
select '3','WorkStateA','2018-06-21 12:36:51.4633333 +00:00',6
select * from #t
What I am looking for
Means I need to capture the Present and Previous WorkState and Entry time of the WorkState on customer basis.
I have tried with the below recursive CTE
;with cte as(
select
t.CustomerId,
PresentWorkState = t.WorkState,
PresentStatechangedate = t.statechangedate,
t.stateorder,
PreviousWorkState = null ,
PreviousStatechangedate= null
from #t t where t.stateorder=1
union all
select
t1.CustomerId,
t1.WorkState,
t1.statechangedate,
c.stateorder,
c.PreviousWorkState,
c.PreviousStatechangedate
from #t t1
join cte c on t1.stateorder !=c.stateorder)
select *
from cte
but could not.
Like Andrew said LAG() should work for your purpose instead of using recursive cte. Check this:
SELECT *
,LAG(WorkState) OVER(PARTITION BY CustomerId ORDER BY statechangedate)
,LAG(statechangedate) OVER(PARTITION BY CustomerId ORDER BY statechangedate)
FROM #T
ORDER BY CustomerId,statechangedate
(Posted on behalf of the question author).
LEAD function solved it. Here is the output:
select
CustomerId,
PresentWorkState = WorkState,
PresentStatechangedate = statechangedate,
PreviousWorkState = LEAD(WorkState) OVER (partition by CustomerId ORDER BY stateorder) ,
PreviousStatechangedate = LAG(statechangedate) OVER (partition by CustomerId ORDER BY stateorder)
from #t

Select unique field

I have this table:
TableA
----------------
ID (pk) Name
1 A
2 B
3 C
4 A
5 D
6 A
7 B
8 A
9 D
10 C
....
I need to randomly extract with a SELECT TOP 5 ID, Name FROM TableA
with Name that must be unique within the 5 records.
I'm trying :
;WITH group
AS
(
SELECT ID, Name,
ROW_NUMBER() OVER (PARTITION BY Name ORDER BY NewId()) rn
FROM TableA
)
SELECT ID, Name
FROM group
WHERE rn = 1
but every time I have quite the same results.
I need to select between all the values for ID at random, assuring that Name will always be different for each record.
I hope the problem is understandable. Any ideas?
Found a solution. It seems to work!
;WITH group
AS (
SELECT ID, Name, ROW_NUMBER() OVER (PARTITION BY Name ORDER BY NewId()) rn FROM TableA )
SELECT top 5 ID, Name, NewId() [NewId]
FROM group
WHERE rn = 1
ORDER BY [newid]
Perhaps the problem is that although newid() is random, it may tend to be sequential. Does this fix the problem?
WITH g as (
SELECT ID, Name,
ROW_NUMBER() OVER (PARTITION BY Name ORDER BY RAND(CHECKSUM(NewId()))) as rn
FROM TableA
)
SELECT ID, Name
FROM g
WHERE rn = 1;
CREATE TABLE #test(ID INT ,Name VARCHAR(1)) INSERT INTO #test(ID ,Name )
SELECT 1,'A' UNION ALL SELECT 2,'B' UNION ALL SELECT 3,'C' UNION ALL
SELECT 4,'A' UNION ALL SELECT 5,'D'UNION ALL SELECT 6,'A' UNION ALL
SELECT 7,'B' UNION ALL SELECT 8,'A'UNION ALL SELECT 9,'D' UNION ALL
SELECT 10,'C'
SELECT T1.ID ,T1.Name FROM #test T1
JOIN ( SELECT TOP 5 Name FROM #test T2 ORDER BY NEWID()
) A ON T1.Name = A.Name ORDER BY A.Name
;WITH group
AS
(
SELECT ID, Name,
ROW_NUMBER() OVER (PARTITION BY Name ORDER BY NewId()) rn
FROM TableA
)
SELECT top 5 ID, Name, NewId() [NewId]
FROM group
WHERE rn = 1
ORDER BY [newid]

Display only the first row per match

I have a table (AreaPartners), and I want to match only the first "Name" record in each group, order by "ID", grouped by "Area". So for the table below:
Area Name ID
AB ISmith 748
AB AWood 750
AB HArcher 751
AB DMunslow 753
AB DCornelius 754
BH MLee 301
BH NMcClean 307
BH DMiles 309
BH LPayze 325
BH MPinnock 329
I'd want to return the results ISmith for AB and MLee for BH.
How do I go about doing this? I believe it's something to do with the Group By function, but I can't for the life of me get it to work.
Try this:
SELECT yourTable.Area, yourTable.Name
FROM yourTable INNER JOIN (
SELECT MIN(Id) AS MinId
FROM yourTable
GROUP BY Area) M ON yourTable.Id = M.MinId
Update because of comment (There is no table variable and Partition over is not a MS access statement). You can also do it with an IN statement:
SELECT
yourTable.Area,
yourTable.Name
FROM yourTable
WHERE yourTable.Id IN
(
SELECT
MIN(tbl.Id) AS MinId
FROM
yourTable as tbl
GROUP BY
tbl.Area
)
In MSSQL you can write this:
DECLARE #tbl TABLE
(
Area VARCHAR(100),
Name VARCHAR(100),
ID INT
)
INSERT INTO #tbl
SELECT 'AB','ISmith',748
UNION ALL
SELECT 'AB','AWood',750
UNION ALL
SELECT 'AB','HArcher',751
UNION ALL
SELECT 'AB','DMunslow',753
UNION ALL
SELECT 'AB','DCornelius',754
UNION ALL
SELECT 'BH','MLee',301
UNION ALL
SELECT 'BH','NMcClean',307
UNION ALL
SELECT 'BH','DMiles',309
UNION ALL
SELECT 'BH','LPayze',325
UNION ALL
SELECT 'BH','MPinnock',325
;WITH CTE
AS
(
SELECT
RANK() OVER(PARTITION BY tbl.Area ORDER BY ID) AS iRank,
tbl.ID,
tbl.Area,
tbl.Name
FROM
#tbl AS tbl
)
SELECT
*
FROM
CTE
WHERE
CTE.iRank=1

Select Rows with Maximum Column Value group by Another Column

This should be a simple question, but I can't get it to work :(
How to select rows that have the maximum column value,as group by another column?
For example,
I have the following table definition:
ID
Del_Index
docgroupviewid
The issue now is that I want to group by results by docgroupviewid first, and then choose one row from each docgroupviewid group, depending on which one has the highest del_index.
I tried
SELECT docgroupviewid, max(del_index),id FROM table
group by docgroupviewid
But instead of return me with the correct id, it returns me with the earliest id from the group with the same docgroupviewid.
Any ideas?
I've struggled with this many times myself and the solution is to think about your query differently.
I want each DocGroupViewID row where the Del_Index is the highest(max) for all rows with that DocGroupViewID:
SELECT
T.DocGroupViewID,
T.Del_Index,
T.ID
FROM MyTable T
WHERE T.Del_Index = (
SELECT MAX( T1.Del_Index ) FROM MyTable T1
WHERE T1.DocGroupViewID = T.DocGroupViewID
)
It gets more complex when more than one row can have the same Del_Index, since then you need some way to choose which one to show.
EDIT: wanted to follow up with another option
You can use the RANK() or ROW_NUMBER() functions with a CTE to get more control over the results, as follows:
-- fake a source table
DECLARE #t TABLE (
ID int IDENTITY(1,1) PRIMARY KEY,
Del_Index int,
DocGroupViewID int
)
INSERT INTO #t
SELECT 1, 1 UNION ALL
SELECT 2, 1 UNION ALL
SELECT 3, 1 UNION ALL
SELECT 1, 2 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 1, 3 UNION ALL
SELECT 2, 3 UNION ALL
SELECT 3, 3 UNION ALL
SELECT 4, 3
-- show our source
SELECT * FROM #t
-- select using RANK (can have duplicates)
;WITH cteRank AS
(
SELECT
DocGroupViewID,
Del_Index,
ID,
RANK() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowRank,
ROW_NUMBER() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowNumber
FROM #t
)
SELECT *
FROM cteRank
WHERE RowRank = 1
-- select using ROW_NUMBER
;WITH cteRowNumber AS
(
SELECT
DocGroupViewID,
Del_Index,
ID,
RANK() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowRank,
ROW_NUMBER() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowNumber
FROM #t
)
SELECT *
FROM cteRowNumber
WHERE RowNumber = 1
If you have ways to sort out ties, just add it to the ORDER BY.
You will have to complicate your query a little bit:
select a.docgroupviewid, a.del_index, a.id from table a
where a.del_index = (select max(b.del_index) from table
where b.docgroupviewid = a.docgroupviewid)