TSQL - Parent Child (1 to zero/many) Grouping/Aggregation - sql

Code (Sample Data Staging):
DECLARE #Emp TABLE
(
[EId] INT IDENTITY(1, 1)
, [FN] NVARCHAR(50)
, [LN] NVARCHAR(50)
) ;
DECLARE #EmpPhCont TABLE
(
[EId] INT
, [PhType] VARCHAR(10)
, [PhNum] VARCHAR(16)
, [PhExt] VARCHAR(10)
, [IsMain] BIT
, [CreatedOn] DATETIME
) ;
INSERT INTO #Emp
VALUES
( N'Emp1', N'Emp1' )
, ( N'Emp2', N'Emp2' )
, ( N'Emp3', N'Emp3' )
, ( N'Emp4', N'Emp4' )
, ( N'Emp5', N'Emp5' )
, ( N'Emp6', N'Emp5' ) ;
INSERT INTO #EmpPhCont
VALUES
( 1, 'Home', '111111111', NULL, 0, '2020-01-01 00:00:01' )
, ( 1, 'Mobile', '222222222', NULL, 1, '2020-01-01 00:00:02' )
, ( 1, 'Work', '333333333', NULL, 0, '2020-01-01 00:00:03' )
, ( 2, 'Work', '444444444', '567', 1, '2020-01-01 00:00:04' )
, ( 2, 'Mobile', '555555555', NULL, 0, '2020-01-01 00:00:05' )
, ( 2, 'Mobile', '454545454', NULL, 0, '2020-01-01 00:00:06' )
, ( 3, 'Home', '777777777', NULL, 0, '2020-01-01 00:00:07' )
, ( 3, 'Mobile', '888888888', NULL, 1, '2020-01-01 00:00:08' )
, ( 3, 'Mobile', '12121212', NULL, 0, '2020-01-01 00:00:09' )
, ( 4, 'Work', '101010101', '111', 1, '2020-01-01 00:00:10' )
, ( 4, 'Work', '101010102', '232', 0, '2020-01-01 00:00:11' )
, ( 5, 'Work', '545454545', '456', 0, '2020-01-01 00:00:10' )
, ( 5, 'Work', '456456456', NULL, 1, '2020-01-01 00:00:11' ) ;
Description:
#Emp is the sample Employee table (Unique Employee records).
EId = Employee Id
FN = First Name
LN = Last Name
#EmpPhCont is the sample Employee Phone Contact table (Each Emp from #Emp table can have zero, one, or multiple phone numbers here - unique by Emp/Type).
PhType = Phone Type (home, mobile, work, and etc)
PhNum = Phone Number
PhExt = Phone Extension (mostly available for "Work" PhType)
IsMain = Is it main contact number. Each employee with a phone num will have exactly 1 record marked as IsMain.
CreatedOn = Date the record was created
Goal:
To output 1 record per employee with the following Columns
EId | HomeNum | MobileNum | WorkNum | WorkNumExt | MainPhType
Rules:
Return all EId for all records from #Emp, whether they have a #EmpPhCont record or not.
For each emp that has #EmpPhCont record avail, return the newest created PhNum and PhExt for the corresponding PhType, UNLESS an older record for the same Emp/PhType is marked as IsMain = 1 (For any emp, for whichever PhType, if IsMain = 1, always return that PhNum and PhExt value).
Expected Output:
EId HomeNum MobileNum WorkNum WorkNumExt MainPhType
1 111111111 222222222 333333333 NULL Mobile
2 NULL 454545454 444444444 567 Work
3 777777777 888888888 NULL NULL Mobile
4 NULL NULL 101010102 111 Work
5 NULL NULL 456456456 NULL Work
6 NULL NULL NULL NULL NULL
My unsuccessful try:
SELECT [EM].[EId]
, MAX ( IIF([PH].[PhType] = 'Home', [PH].[PhNum], NULL)) AS [HomePhNum]
, MAX ( IIF([PH].[PhType] = 'Mobile', [PH].[PhNum], NULL)) AS [MobilePhNum]
, MAX ( IIF([PH].[PhType] = 'Work', [PH].[PhNum], NULL)) AS [WorkPhNum]
FROM #Emp AS [EM]
LEFT JOIN #EmpPhCont AS [PH]
ON [EM].[EId] = [PH].[EId]
GROUP BY [EM].[EId] ;

Use ROW_NUMBER() window function inside a CTE to get the rows from #EmpPhCont that you want returned and join this CTE to #Emp:
with cte as (
select *,
row_number() over (partition by [EId], [PhType] order by [IsMain] desc, [CreatedOn] desc) rn
from #EmpPhCont
)
select e.[EId],
max(case when c.[PhType] = 'Home' then c.[PhNum] end) HomeNum,
max(case when c.[PhType] = 'Mobile' then c.[PhNum] end) MobileNum,
max(case when c.[PhType] = 'Work' then c.[PhNum] end) WorkNum,
max(case when c.[PhType] = 'Work' then c.[PhExt] end) WorkNumExt,
max(case when c.[IsMain] = 1 then c.[PhType] end) MainPhType
from #Emp e left join cte c
on c.[EId] = e.[EId] and c.rn = 1
group by e.[EId]
See the demo.
Results:
> EId | HomeNum | MobileNum | WorkNum | WorkNumExt | MainPhType
> --: | :-------- | :-------- | :-------- | :--------- | :---------
> 1 | 111111111 | 222222222 | 333333333 | null | Mobile
> 2 | null | 454545454 | 444444444 | 567 | Work
> 3 | 777777777 | 888888888 | null | null | Mobile
> 4 | null | null | 101010101 | 111 | Work
> 5 | null | null | 456456456 | null | Work
> 6 | null | null | null | null | null

I would implement that using APPLY:
SELECT EId, HomeNum, MobileNum, WorkNum, WorkNumExt
, COALESCE(HomeMain, MobileMain, WorkMain) AS MainPhType
FROM Emp e
OUTER APPLY (
SELECT TOP 1 c.[PhNum] AS HomeNum
, CASE WHEN c.[IsMain] = 1 THEN 'Home' END AS HomeMain
FROM EmpPhCont c
WHERE c.[EId] = e.[EId]
AND c.[PhType] = 'Home'
ORDER BY c.[IsMain] DESC, c.[CreatedOn] DESC
) home
OUTER APPLY (
SELECT TOP 1 c.[PhNum] AS MobileNum
, CASE WHEN c.[IsMain] = 1 THEN 'Mobile' END AS MobileMain
FROM EmpPhCont c
WHERE c.[EId] = e.[EId]
AND c.[PhType] = 'Mobile'
ORDER BY c.[IsMain] DESC, c.[CreatedOn] DESC
) mobile
OUTER APPLY (
SELECT TOP 1 c.[PhNum] AS WorkNum
, c.[PhExt] AS WorkNumExt
, CASE WHEN c.[IsMain] = 1 THEN 'Work' END AS WorkMain
FROM EmpPhCont c
WHERE c.[EId] = e.[EId]
AND c.[PhType] = 'Work'
ORDER BY c.[IsMain] DESC, c.[CreatedOn] DESC
) work
See SQL Fiddle for demo.
Output
EId | HomeNum | MobileNum | WorkNum | WorkNumExt | MainPhType
1 | 111111111 | 222222222 | 333333333 | (null) | Mobile
2 | (null) | 454545454 | 444444444 | 567 | Work
3 | 777777777 | 888888888 | (null) | (null) | Mobile
4 | (null) | (null) | 101010101 | 111 | Work
5 | (null) | (null) | 456456456 | (null) | Work
6 | (null) | (null) | (null) | (null) | (null)
Note: This solution will only be viable for large data sets if the EmpPhCont table has an index on [EId], [PhType], otherwise it'll be too slow.

row_number(), outer apply and aggregation:
select *
from #Emp as e
outer apply
(
select
MAX ( case when d.[PhType] = 'Home' then d.[PhNum] end) AS [HomePhNum]
, MAX ( case when d.[PhType] = 'Mobile' then d.[PhNum] end) AS [MobilePhNum]
, MAX ( case when d.[PhType] = 'Work' then d.[PhNum] end) AS [WorkPhNum]
, MAX ( case when d.[PhType] = 'Work' then d.[PhExt] end) AS [WorkNumExt]
, MAX ( case when IsMain = 1 then d.[PhType] end) AS MainPhType --work is max if both mob&work as set as main..
from
(
select *, row_number() over(partition by PhType order by IsMain DESC, CreatedOn DESC) as rownum
from #EmpPhCont as p
where p.EId = e.EId
) as d
where d.rownum = 1
) as ph;

Related

How to include empty field values in query count when the value in a second field has a certain value for a given period (3rd field)

My query needs to display, for each student, for each financial award received, the number of terms (semesters) the student was enrolled full time (greater than or equal to 12 hours) whether or not the award was received that semester PLUS the number of terms the student was enrolled part time (less than 12 hours, but greater than 1 hour) AND the student received the award. Currently, I can only get the query to count the number of terms each award was received.
For example, let's say that the student with the ID of 000001 has the following awards and enrollment (displayed by term):
| AWARD_111 | AWARD_222 | AWARD_333 | AWARD_444 | HRS_ENROLLED
--------------------------------------------------------------------------
FALL_2015 | Y | | Y | | 15
SPRING_2016 | Y | | Y | | 13
FALL_2016 | Y | | Y | Y | 17
SPRING_2017 | Y | | Y | Y | 15
FALL_2017 | Y | | Y | | 17
SPRING_2018 | Y | | Y | | 15
SUMMER_2018 | | | | | 3
FALL_2018 | Y | Y | Y | | 15
SPRING_2019 | Y | Y | Y | | 17
SUMMER_2019 | | Y | | | 1
FALL_2019 | | | Y | | 12
The result I'm currently getting is this (just showing for the one student above):
STUDENT_ID | AWARD_CODE | AWARD_COUNT
-------------------------------------
000001 | AWARD_1111 | 8
000001 | AWARD_2222 | 3
000001 | AWARD_3333 | 9
000001 | AWARD_4444 | 2
The result I want is:
STUDENT_ID | AWARD_CODE | AWARD_COUNT
-------------------------------------
000001 | AWARD_1111 | 9
000001 | AWARD_2222 | 10
000001 | AWARD_3333 | 9
000001 | AWARD_4444 | 9
Here is my stripped-down query (actually, one of many completely different types of queries I tried that each produced the same wrong results):
SELECT TERM.STUDENT_ID AS STUDENT_ID,
AWARDS_FUND_CODE AS AWARD_CODE,
SUM ( CASE WHEN TERM.HRS_ENROLLED >= 12
THEN 1
WHEN ( ( TERM.HRS_ENROLLED >= 1 ) AND ( AWARDS.STUDENT_ID = TERM.STUDENT_ID ) )
THEN 1
ELSE 0
END ) AS AWARD_COUNT
FROM ( SELECT ENROLLMENT_STUDENT_ID AS STUDENT_ID,
ENROLLMENT_TERM_CODE AS TERM_CODE,
ENROLLMENT_ENROLLED_HRS AS HRS_ENROLLED
FROM ENROLLMENT
WHERE EXISTS ( SELECT AWARDS_FUND_CODE
FROM AWARDS
WHERE ENROLLMENT_STUDENT_ID = AWARDS_STUDENT_ID ) ) TERM,
AWARDS
WHERE TERM.STUDENT_ID = AWARDS_STUDENT_ID
AND TERM.TERM_CODE = AWARDS_TERM_CODE
GROUP BY TERM.STUDENT_ID, AWARDS_FUND_CODE
ORDER BY AWRDS_FUND_CODE
As usual, the difficult part was setting up the test data. I will leave the end pivot to you.
WITH
aset AS
(SELECT 1 sortval
, 'FALL_2015' AS quarter
, 'Y' AS award_111
, NULL AS award_222
, 'Y' AS award_333
, NULL AS award_444
, 15 AS hours_enrolled
FROM DUAL
UNION ALL
SELECT 2, 'SPRING_2016', 'Y', NULL, 'Y', NULL, 13 FROM DUAL
UNION ALL
SELECT 3, 'FALL_2016', 'Y', NULL, 'Y', 'Y', 17 FROM DUAL
UNION ALL
SELECT 4, 'SPRING_2017', 'Y', NULL, 'Y', 'Y', 15 FROM DUAL
UNION ALL
SELECT 5, 'FALL_2017', 'Y', NULL, 'Y', NULL, 17 FROM DUAL
UNION ALL
SELECT 6, 'SPRING_2018', 'Y', NULL, 'Y', NULL, 15 FROM DUAL
UNION ALL
SELECT 7, 'SUMMER_2018', NULL, NULL, NULL, NULL, 3 FROM DUAL
UNION ALL
SELECT 8, 'FALL_2018', 'Y', 'Y', 'Y', NULL, 15 FROM DUAL
UNION ALL
SELECT 9, 'SPRING_2019', 'Y', 'Y', 'Y', NULL, 17 FROM DUAL
UNION ALL
SELECT 10, 'SUMMER_2019', NULL, 'Y', NULL, NULL, 1 FROM DUAL
UNION ALL
SELECT 11, 'FALL_2019', NULL, NULL, 'Y', NULL, 12 FROM DUAL),
bset AS( SELECT aset.*, '00001' AS student_id FROM aset ),
cset AS
( SELECT COUNT( award_111 ) AS cnt_111
, COUNT( award_222 ) AS cnt_222
, COUNT( award_333 ) AS cnt_333
, COUNT( award_444 ) AS cnt_444
, student_id
FROM bset
GROUP BY student_id),
dset AS
(SELECT bset.*
, cnt_111
, cnt_222
, cnt_333
, cnt_444
FROM cset
INNER JOIN bset ON cset.student_id = bset.student_id)
SELECT SUM( CASE WHEN cnt_111 > 0 AND (hours_enrolled >= 12 OR award_111 = 'Y') THEN 1 ELSE 0 END ) AS award_111
, SUM( CASE WHEN cnt_222 > 0 AND (hours_enrolled >= 12 OR award_222 = 'Y') THEN 1 ELSE 0 END ) AS award_222
, SUM( CASE WHEN cnt_333 > 0 AND (hours_enrolled >= 12 OR award_333 = 'Y') THEN 1 ELSE 0 END ) AS award_333
, SUM( CASE WHEN cnt_444 > 0 AND (hours_enrolled >= 12 OR award_444 = 'Y') THEN 1 ELSE 0 END ) AS award_444
FROM dset
GROUP BY student_id
ORDER BY student_id
This results in the following output
AWARD_111 AWARD_222 AWARD_333 AWARD_444
9 10 9 9
Here is the code that ultimately worked:
SELECT TERM.STUDENT_ID AS STUDENT_ID,
ENROLLMENT.FUND_CODE AS FUND_CODE,
SUM( ENROLLMENT.COUNTER ) AS AWARD_COUNT
FROM ( SELECT ENROLLMENT_STUDENT_ID AS STUDENT_ID,
ENROLLMENT_TERM_CODE AS TERM_CODE,
ENROLLMENT_ENROLLED_HRS AS HRS_ENROLLED,
FUND.FUND_CODE FUND_CODE,
CASE WHEN ENROLLMENT_ENROLLED_HRS >= 12
THEN 1
WHEN ENROLLMENT_ENROLLED_HRS >= 1
THEN 1
ELSE 0
END COUNTER
FROM ENROLLMENT,
( SELECT TERM_AWARD_STUDENT_ID AS STUDENT_ID,
TERM_AWARD_FUND_CODE AS FUND_CODE
FROM TERM_AWARD
GROUP BY TERM_AWARD_STUDENT_ID, TERM_AWARD_FUND_CODE ) FUND
WHERE ENROLLMENT_STUDENT_ID = FUND.STUDENT_ID
ORDER BY FUND.FUND_CODE, ENROLLMENT_TERM_CODE ) TERM,
( SELECT TERM_AWARD_STUDENT_ID AS STUDENT_ID,
TERM_AWARD_PERIOD AS PERIOD,
TERM_AWARD_FUND_CODE AS FUND_CODE
FROM TERM_AWARD,
ENROLLMENT
WHERE TERM_AWARD_STUDENT_ID = ENROLLMENT_STUDENT_ID
AND TERM_AWARD_PERIOD = ENROLLMENT_TERM_CODE ) AWARD
WHERE ENROLLMENT.PIDM = AWARD.PIDM (+)
AND ENROLLMENT.TERM_CODE = AWARD.PERIOD (+)
AND ENROLLMENT.FUND_CODE = AWARD.FUND_CODE (+)
GROUP BY ENROLLMENT.PIDM, ENROLLMENT.FUND_CODE
ORDER BY ENROLLMENT.PIDM, ENROLLMENT.FUND_CODE

how to separate and sum 2 columns based on condition

I'm doing a select statement and I have a column I would like to separate into 2 columns based on their type, and then get the sum of the amounts grouped by an ID
I want all the gold and platinum types in one column, and all the silver and bronze in a 2nd column, then summed and grouped by the ID so it looks like this :
I tried doing a union like this:
SELECT
ID,
SUM(Amount) AS "Gold/Platinum",
0 AS "Bronze/Silver"
FROM
table
WHERE
Type IN ('gold', 'platinum')
GROUP BY
ID
UNION ALL
SELECT
ID,
SUM(Amount) AS "Bronze/Silver",
0 AS "Gold/Platinum"
FROM
table
WHERE
Type IN ('bronze', 'silver')
GROUP BY
ID
The gold/platinum column will be correct, but I get nothing in the bronze/silver column
Use conditional aggregation:
select id,
sum(case when Type in ('gold', 'platinum') then amount else 0 end) as gold_platinum,
sum(case when Type in ('bronze', 'silver') then amount else 0 end) as bronze_silver
from t
group by id
order by id;
You can run this in SSMS:
DECLARE #data TABLE( [ID] INT, [Type] VARCHAR(10), [Amount] INT );
INSERT INTO #data ( [ID], [Type], [Amount] ) VALUES
( 1, 'gold', 100 )
, ( 1, 'gold', 50 )
, ( 1, 'bronze', 75 )
, ( 2, 'silver', 10 )
, ( 2, 'bronze', 20 )
, ( 3, 'gold', 35 )
, ( 4, 'silver', 20 )
, ( 4, 'platinum', 30 );
SELECT
[ID]
, SUM( CASE WHEN [Type] IN ( 'gold', 'platinum' ) THEN Amount ELSE 0 END ) AS [Gold/Platinum]
, SUM( CASE WHEN [Type] IN ( 'bronze', 'silver' ) THEN Amount ELSE 0 END ) AS [Bronze/Silver]
FROM #data
GROUP BY [ID]
ORDER BY [ID];
Returns
+----+---------------+---------------+
| ID | Gold/Platinum | Bronze/Silver |
+----+---------------+---------------+
| 1 | 150 | 75 |
| 2 | 0 | 30 |
| 3 | 35 | 0 |
| 4 | 30 | 20 |
+----+---------------+---------------+

T-SQL How to "Flatten" top 3 rows into a single row

I've searched for an answer to this question and found questions similar to my own, however I do not have a "ColumnHeader" column to denote which field the record should go into. Ex:
TSQL Pivot without aggregate function
trying to flatten rows into columns
Fetching Columns of a multiple rows in one row
My problem is thus - I have data in this format (selected as a top 3 result from a product recommendation query):
------------------------------
CustID | StyleNo | Brand | ID
------------------------------
1 | ABC | BrandA| 1
------------------------------
1 | DEF | BrandB| 2
------------------------------
1 | GHI | BrandC| 3
------------------------------
2 | JKL | BrandA| 4
------------------------------
2 | MNO | BrandB| 5
------------------------------
2 | PQR | BrandD| 6
------------------------------
That I'd like to make look like this:
-----------------------------------------------------------------
CustID | StyleNo1| StyleNo2| StyleNo3 | Brand1 | Brand2 | Brand3
-----------------------------------------------------------------
1 | ABC | DEF | GHI | BrandA | BrandB | BrandC
-----------------------------------------------------------------
2 | JKL | MNO | PQR | BrandA | BrandB | BrandD
-----------------------------------------------------------------
In order for my program to simply read the row of recommendations for each customer.
What I have attempted is a PIVOT - however I have nothing to really aggregate upon. I've also attempted the Min(Case...When...Then...End) as outlined in the second linked question, but as stated I don't have reference to a "Header" column.
The ID column is completely inconsequential for the time being, but it may help to solve this problem. It is NOT needed in the end result.
I am currently using SQLServer 2012
With the window function Row_Number() and a conditional aggregation
Select CustID
,StyleNo1 = max(case when RN=1 then StyleNo else null end)
,StyleNo2 = max(case when RN=2 then StyleNo else null end)
,StyleNo3 = max(case when RN=3 then StyleNo else null end)
,Brand1 = max(case when RN=1 then Brand else null end)
,Brand2 = max(case when RN=2 then Brand else null end)
,Brand3 = max(case when RN=3 then Brand else null end)
From (
Select *,RN = Row_Number() over (Partition By CustID Order by StyleNo,Brand)
From YourTable
) A
Where RN<=3
Group By CustID
Returns
What you are doing is called "pivoting" - for this you could use PIVOT. A better way IMHO is to use approach that Jeff Moden talks about in this article.
WITH idSort AS
(
SELECT *, rn = ROW_NUMBER() OVER (PARTITION BY CustID ORDER BY ID) FROM #yourTable
)
SELECT
CustID,
StyleNo1 = MAX(CASE rn WHEN 1 THEN StyleNo END),
StyleNo2 = MAX(CASE rn WHEN 2 THEN StyleNo END),
StyleNo3 = MAX(CASE rn WHEN 3 THEN StyleNo END),
Brand1 = MAX(CASE rn WHEN 1 THEN Brand END),
Brand2 = MAX(CASE rn WHEN 2 THEN Brand END),
Brand3 = MAX(CASE rn WHEN 3 THEN Brand END)
FROM idSort
GROUP BY CustID;
Other approach can be using CTE's and Cross Apply.
CREATE TABLE #UnFlattenedData
(
CustID TINYINT ,
StyleNo CHAR(3) ,
Brand CHAR(6) ,
ID TINYINT
);
INSERT INTO #UnFlattenedData
( CustID, StyleNo, Brand, ID )
VALUES ( 1, -- CustID - tinyint
'ABC', -- StyleNo - char(3)
'BrandA', -- Brand - char(6)
1 -- ID - tinyint
),
( 1, -- CustID - tinyint
'DEF', -- StyleNo - char(3)
'BrandB', -- Brand - char(6)
2 -- ID - tinyint
),
( 1, -- CustID - tinyint
'GHI', -- StyleNo - char(3)
'BrandC', -- Brand - char(6)
3 -- ID - tinyint
),
( 2, -- CustID - tinyint
'JKL', -- StyleNo - char(3)
'BrandA', -- Brand - char(6)
4 -- ID - tinyint
),
( 2, -- CustID - tinyint
'MNO', -- StyleNo - char(3)
'BrandB', -- Brand - char(6)
5 -- ID - tinyint
),
( 2, -- CustID - tinyint
'PQR', -- StyleNo - char(3)
'BrandD', -- Brand - char(6)
6 -- ID - tinyint
);
WITH cte
AS ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY u1.CustID ORDER BY u1.ID ) AS R1
FROM #UnFlattenedData AS u1
),
u1
AS ( SELECT C1.CustID ,
U1.StyleNo ,
U1.Brand
FROM cte AS C1
INNER JOIN #UnFlattenedData AS U1 ON U1.CustID = C1.CustID
AND U1.ID = C1.ID
WHERE C1.R1 = 1
),
u2
AS ( SELECT C1.CustID ,
U1.StyleNo ,
U1.Brand
FROM cte AS C1
INNER JOIN #UnFlattenedData AS U1 ON U1.CustID = C1.CustID
AND U1.ID = C1.ID
WHERE C1.R1 = 2
),
u3
AS ( SELECT C1.CustID ,
U1.StyleNo ,
U1.Brand
FROM cte AS C1
INNER JOIN #UnFlattenedData AS U1 ON U1.CustID = C1.CustID
AND U1.ID = C1.ID
WHERE C1.R1 = 3
)
SELECT u1.CustID ,
u1.StyleNo AS StyleNo1 ,
u2.StyleNo AS StyleNo2 ,
u3.StyleNo AS StyleNo3 ,
u1.Brand AS Brand1 ,
u2.Brand AS Brand2 ,
u3.Brand AS Brand3
FROM u1
CROSS APPLY ( SELECT *
FROM u2
WHERE u2.CustID = u1.CustID
) AS u2
CROSS APPLY ( SELECT *
FROM u3
WHERE u3.CustID = u1.CustID
) AS u3;

Selecting records with maximum value in group

I have a transaction table with the following structure:
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 1 | Posted | Amy | 500.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 2 | Voided | Amy | 500.00 | 2
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 3 | Posted | Bob | 70.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I'm trying to construct a query to group the records by GUID and select the single record with the largest SequenceNumber (if it isn't NULL):
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I've tried adding this line:
where SequenceNumber = (select MAX(SequenceNumber) from [Transaction] t2 where t.[GUID] = t2.[GUID])
but that doesn't get me any transactions where the status is Pending (they don't have sequence numbers). How can I fix this query?
If it's SQL-Server you can use a CTE + ROW_NUMBER:
WITH CTE AS
(
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber,
rn = row_number() over (partition by t.[GUID] Order By t.SequenceNumber DESC)
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)
SELECT GUID, ID, Status, Payee, Amount, SequenceNumber
FROM CTE
WHERE rn = 1
This will include the row where SequenceNumber is null. If you want all rows with the maximum SequenceNumber(in case of ties) use DENSE_RANK instead of ROW_NUMBER.
You can calculate the MAX(ID) and it's related [GUID] in a subquery and JOIN to it in order to get the desired results:
Sample subquery:
SELECT [GUID] ,
MAX(ID) MaxId
FROM Transaction
GROUP BY [GUID]
Would produce:
GUID MaxId
1F7D880C-E7C0-E411-B8F6-004056AB77C2 5
AF732CF5-E6C0-E411-B8F6-004056AB77C2 4
F2CC0B03-76C7-E411-A48D-004056AB787C 6
Full Demo:
CREATE TABLE #Transaction
(
[GUID] VARCHAR(36) ,
[ID] INT ,
[Status] VARCHAR(7) ,
[Payee] VARCHAR(5) ,
[Amount] INT ,
[SequenceNumber] VARCHAR(4)
);
INSERT INTO #Transaction
( [GUID], [ID], [Status], [Payee], [Amount], [SequenceNumber] )
VALUES ( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 1, 'Posted', 'Amy', 500.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 2, 'Voided', 'Amy', 500.00,
'2' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 3, 'Posted', 'Bob', 70.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 4, 'Posted', 'Amy', 512.50,
'3' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 5, 'Posted', 'Bob', 66.00,
'2' ),
( 'F2CC0B03-76C7-E411-A48D-004056AB787C', 6, 'Pending', 'Carol',
240.00, NULL );
SELECT #Transaction.*
FROM #Transaction
INNER JOIN ( SELECT [GUID] ,
MAX(ID) MaxId
FROM #Transaction
GROUP BY [GUID]
) t ON t.[GUID] = #Transaction.[GUID]
AND t.MaxId = #Transaction.ID
ORDER BY ID
Try this way to get maximum SequenceNumber
CASE WHEN MAX(SequenceNumber IS NULL) = 0 THEN MAX(SequenceNumber) ELSE NULL END AS SequenceNumber
I don't know if SQL Server has windowing functions, so you may be able to do this more cleanly, but here's a vanilla SQL solution:
select highest.[GUID],
highest.[ID],
ts.Description "Status",
highest.Payee,
highest.Amount,
highest.SequenceNumber
from [Transaction] highest
join TransactionStatus ts
on ts.ID = highest.ID
left join [Transaction] higher
on higher.[GUID] = highest.[GUID]
and higher.SequenceNumber > highest.SequenceNumber
where higher.[GUID] is null;
omething like this:
SELECT * FROM
(
select
t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount,
ROW_NUMBER() OVER PARTITION BY (t.[GUID]
ORDER BY t.SequenceNumber DESC) AS rownum
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)vals where vals.rownum = 1

How to pivot rows to columns with known max number of columns

I have a table structured as such:
Pricing_Group
GroupID | QTY
TestGroup1 | 1
TestGroup1 | 2
TestGroup1 | 4
TestGroup1 | 8
TestGroup1 | 22
TestGroup2 | 2
TestGroup3 | 2
TestGroup3 | 5
What I'm looking for is a result like this:
Pricing_Group
GroupID | QTY1 | QTY2 | QTY3 | QTY4 | QTY5
TestGroup1 | 1 | 2 | 4 | 8 | 22
TestGroup2 | 2 | NULL | NULL | NULL | NULL
TestGroup3 | 2 | 5 | NULL | NULL | NULL
Note that there can only ever be a maximum of 5 different quantities for a given GroupID, there's just no knowing what those 5 quantities will be.
This seems like an application of PIVOT, but I can't quite wrap my head around the syntax that would be required for an application like this.
Thanks for taking the time to look into this!
Perfect case for pivot and you don't need a CTE:
Declare #T Table (GroupID varchar(10) not null,
QTY int)
Insert Into #T
Values ('TestGroup1', 1),
('TestGroup1', 2),
('TestGroup1', 4),
('TestGroup1', 8),
('TestGroup1', 22),
('TestGroup2', 2),
('TestGroup3', 2),
('TestGroup3', 5)
Select GroupID, [QTY1], [QTY2], [QTY3], [QTY4], [QTY5]
From (Select GroupID, QTY,
RowID = 'QTY' + Cast(ROW_NUMBER() Over (Partition By GroupID Order By QTY) as varchar)
from #T) As Pvt
Pivot (Min(QTY)
For RowID In ([QTY1], [QTY2], [QTY3], [QTY4], [QTY5])
) As Pvt2
You can pivot on a generated rank;
;with T as (
select
rank() over (partition by GroupID order by GroupID, QTY) as rank,
GroupID,
QTY
from
THE_TABLE
)
select
*
from
T
pivot (
max(QTY)
for rank IN ([1],[2],[3],[4],[5])
) pvt
>>
GroupID 1 2 3 4 5
----------------------------------------
TestGroup1 1 2 4 8 22
TestGroup2 2 NULL NULL NULL NULL
TestGroup3 2 5 NULL NULL NULL
You can also use case statement to perform the pivot:
declare #t table ( GroupID varchar(25), QTY int)
insert into #t
values ('TestGroup1', 1),
('TestGroup1', 2),
('TestGroup1', 4),
('TestGroup1', 8),
('TestGroup1', 22),
('TestGroup2', 2),
('TestGroup3', 2),
('TestGroup3', 5)
;with cte_Stage (r, GroupId, QTY)
as ( select row_number() over(partition by GroupId order by QTY ),
GroupId,
QTY
from #t
)
select GroupId,
[QTY1] = sum(case when r = 1 then QTY else null end),
[QTY2] = sum(case when r = 2 then QTY else null end),
[QTY3] = sum(case when r = 3 then QTY else null end),
[QTY4] = sum(case when r = 4 then QTY else null end),
[QTY5] = sum(case when r = 5 then QTY else null end),
[QTYX] = sum(case when r > 5 then QTY else null end)
from cte_Stage
group
by GroupId;