Selecting records with maximum value in group - sql

I have a transaction table with the following structure:
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 1 | Posted | Amy | 500.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 2 | Voided | Amy | 500.00 | 2
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 3 | Posted | Bob | 70.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I'm trying to construct a query to group the records by GUID and select the single record with the largest SequenceNumber (if it isn't NULL):
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I've tried adding this line:
where SequenceNumber = (select MAX(SequenceNumber) from [Transaction] t2 where t.[GUID] = t2.[GUID])
but that doesn't get me any transactions where the status is Pending (they don't have sequence numbers). How can I fix this query?

If it's SQL-Server you can use a CTE + ROW_NUMBER:
WITH CTE AS
(
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber,
rn = row_number() over (partition by t.[GUID] Order By t.SequenceNumber DESC)
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)
SELECT GUID, ID, Status, Payee, Amount, SequenceNumber
FROM CTE
WHERE rn = 1
This will include the row where SequenceNumber is null. If you want all rows with the maximum SequenceNumber(in case of ties) use DENSE_RANK instead of ROW_NUMBER.

You can calculate the MAX(ID) and it's related [GUID] in a subquery and JOIN to it in order to get the desired results:
Sample subquery:
SELECT [GUID] ,
MAX(ID) MaxId
FROM Transaction
GROUP BY [GUID]
Would produce:
GUID MaxId
1F7D880C-E7C0-E411-B8F6-004056AB77C2 5
AF732CF5-E6C0-E411-B8F6-004056AB77C2 4
F2CC0B03-76C7-E411-A48D-004056AB787C 6
Full Demo:
CREATE TABLE #Transaction
(
[GUID] VARCHAR(36) ,
[ID] INT ,
[Status] VARCHAR(7) ,
[Payee] VARCHAR(5) ,
[Amount] INT ,
[SequenceNumber] VARCHAR(4)
);
INSERT INTO #Transaction
( [GUID], [ID], [Status], [Payee], [Amount], [SequenceNumber] )
VALUES ( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 1, 'Posted', 'Amy', 500.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 2, 'Voided', 'Amy', 500.00,
'2' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 3, 'Posted', 'Bob', 70.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 4, 'Posted', 'Amy', 512.50,
'3' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 5, 'Posted', 'Bob', 66.00,
'2' ),
( 'F2CC0B03-76C7-E411-A48D-004056AB787C', 6, 'Pending', 'Carol',
240.00, NULL );
SELECT #Transaction.*
FROM #Transaction
INNER JOIN ( SELECT [GUID] ,
MAX(ID) MaxId
FROM #Transaction
GROUP BY [GUID]
) t ON t.[GUID] = #Transaction.[GUID]
AND t.MaxId = #Transaction.ID
ORDER BY ID

Try this way to get maximum SequenceNumber
CASE WHEN MAX(SequenceNumber IS NULL) = 0 THEN MAX(SequenceNumber) ELSE NULL END AS SequenceNumber

I don't know if SQL Server has windowing functions, so you may be able to do this more cleanly, but here's a vanilla SQL solution:
select highest.[GUID],
highest.[ID],
ts.Description "Status",
highest.Payee,
highest.Amount,
highest.SequenceNumber
from [Transaction] highest
join TransactionStatus ts
on ts.ID = highest.ID
left join [Transaction] higher
on higher.[GUID] = highest.[GUID]
and higher.SequenceNumber > highest.SequenceNumber
where higher.[GUID] is null;

omething like this:
SELECT * FROM
(
select
t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount,
ROW_NUMBER() OVER PARTITION BY (t.[GUID]
ORDER BY t.SequenceNumber DESC) AS rownum
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)vals where vals.rownum = 1

Related

TSQL - GROUP BY on continous rows only

I am working on a SQL Server 2017 (v14.0).
I have a table like this:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 01.01.2021 |
100 | open | 02.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 20.01.2021 |
100 | open | 20.01.2021 | 30.01.2021 |
I want to group it by Key and State, but only for continuous rows.
So my expected result would be something like:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 30.01.2021 |
Any idea on how to do this? I have the strong feeling, that this should be possible with the help of ROW_NUMBER somehow, but I was not able to figure it out yet...
(In this example data some weird group by calendarweek or something similar might be possible, but this is not my intention)
It is a Gaps and Islands problem. One solution is this:
WITH cte1 AS (
SELECT *, CASE WHEN LAG([state]) OVER (PARTITION BY [key] ORDER BY [from]) = [state] THEN 0 ELSE 1 END AS chg
FROM t
), cte2 AS (
SELECT *, SUM(chg) OVER (PARTITION BY [key] ORDER BY [from]) AS grp
FROM cte1
)
SELECT [key], grp, MIN([state]), MIN([from]), MAX([until])
FROM cte2
GROUP BY [key], grp
ORDER BY [key], grp
One possibility is as below added:
Create table myTable_o1
(
[key] int
,[state] varchar(100)
,[From] date
,[Until] date
)
insert into myTable_o1 values (100, 'open', '2021-01-01','2021-01-01')
insert into myTable_o1 values (100, 'open', '2021-01-02','2021-01-02')
insert into myTable_o1 values (100, 'closed', '2021-01-03','2021-01-13')
insert into myTable_o1 values (100, 'open', '2021-01-4','2021-01-20')
insert into myTable_o1 values (100, 'open', '2021-01-20','2021-01-30')
SELECT
[key]
,[state]
,[From]
,[until]
FROM
(
Select
[key]
, [state]
, [From]
, row_number() over (partition by tiles order by [key]) row_num
, ISNULL(Lead(Until) over (partition by tiles order by [key]) , Until) [until]
FROM
(
SELECT * ,
Ntile(2) over ( order by [Key]) as [tiles]
from myTable_o1
) AS A
) AS B WHERE B.row_num in (1,3)

SQL return second max date for each id, date and channel

I have the following table:
id channel_id date
1 | 1 | 2017-01-10
1 | 2 | 2018-02-05
1 | 1 | 2019-03-07
1 | 2 | 2020-03-15
2 | 1 | 2018-01-17
2 | 1 | 2019-07-20
2 | 1 | 2020-01-10
I want to return for previous maximum date for each date and id but two separate columns for both channel_id. So, one column for previous max date for channel_id is equal to 1 and another for previous max date for channel_id is equal to 2. What I want to get can be found below:
id channel_id date prev_date_channel_id1 prev_date_channel_id2
1 | 1 | 2017-01-10 | NULL | NULL |
1 | 2 | 2018-02-05 | 2017-01-10 | NULL |
1 | 1 | 2019-03-07 | 2017-01-10 | 2018-02-05 |
1 | 2 | 2020-03-15 | 2019-03-07 | 2018-02-05 |
2 | 1 | 2018-01-17 | NULL | NULL |
2 | 1 | 2019-07-20 | 2018-01-17 | NULL |
2 | 1 | 2020-01-10 | 2019-07-20 | NULL |
I made a query as below and returns what I want but takes too much time. I'd appreciate any optimization suggestions!
SELECT
a.id,
a.date,
MAX(c.date) AS prev_date_channel_id1,
MAX(d.date) AS prev_date_channel_id2
FROM
table a
LEFT JOIN
table c ON a.id=c.id AND a.date>c.date AND c.channel_id=1
LEFT JOIN
table d ON a.id=d.id AND a.date>d.date AND d.channel_id=2
GROUP BY a.id, a.date
Use lag() for the previous date and a cumulative conditional max for the channel 2 date:
select t.*, lag(date) over (partition by id order by date) as prev_date,
max(case when channel = 2 then date end) over
(partition by id
order by date
rows between unbounded preceding and 1 row preceding
) as prev_date_channel2
from t;
I think there's an error in your "expected output" for the value of prev_date_channel_id1 on the last row (it should be 2019-07-20).
In any case, with appropriate indexing an outer apply top 1 construct might serve you better:
create table t
(
id int,
channel_id int,
[date] date
constraint pk_t primary key clustered (id, channel_id, [date])
);
insert t values
(1, 1, '2017-01-10'),
(1, 2, '2018-02-05'),
(1, 1, '2019-03-07'),
(1, 2, '2020-03-15'),
(2, 1, '2018-01-17'),
(2, 1, '2019-07-20'),
(2, 1, '2020-01-10');
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = c1.dt,
prev_date_channel_id2 = c2.dt
from t t1
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 1
and [date] < t1.[date]
order by date desc
) c1(dt)
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 2
and [date] < t1.[date]
order by date desc
) c2(dt)
order by t1.id, t1.[date];
Or possibly faster still, especially with the key changed to constraint pk_t primary key clustered (id, [date], [channel_id]))
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = prev.c1,
prev_date_channel_id2 = prev.c2
from t t1
outer apply (
select c1 = max(iif(channel_id = 1, [date], null)),
c2 = max(iif(channel_id = 2, [date], null))
from t
where id = t1.id
and [date] < t1.[date]
) prev
Assuming you have an index on those three columns, you can use subqueries:
SELECT [T0].[id],
[T0].[channel_id],
[T0].[date],
[prev_date_channel_id1] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 1
),
[prev_date_channel_id2] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 2
)
FROM [t] [T0];

TSQL - Parent Child (1 to zero/many) Grouping/Aggregation

Code (Sample Data Staging):
DECLARE #Emp TABLE
(
[EId] INT IDENTITY(1, 1)
, [FN] NVARCHAR(50)
, [LN] NVARCHAR(50)
) ;
DECLARE #EmpPhCont TABLE
(
[EId] INT
, [PhType] VARCHAR(10)
, [PhNum] VARCHAR(16)
, [PhExt] VARCHAR(10)
, [IsMain] BIT
, [CreatedOn] DATETIME
) ;
INSERT INTO #Emp
VALUES
( N'Emp1', N'Emp1' )
, ( N'Emp2', N'Emp2' )
, ( N'Emp3', N'Emp3' )
, ( N'Emp4', N'Emp4' )
, ( N'Emp5', N'Emp5' )
, ( N'Emp6', N'Emp5' ) ;
INSERT INTO #EmpPhCont
VALUES
( 1, 'Home', '111111111', NULL, 0, '2020-01-01 00:00:01' )
, ( 1, 'Mobile', '222222222', NULL, 1, '2020-01-01 00:00:02' )
, ( 1, 'Work', '333333333', NULL, 0, '2020-01-01 00:00:03' )
, ( 2, 'Work', '444444444', '567', 1, '2020-01-01 00:00:04' )
, ( 2, 'Mobile', '555555555', NULL, 0, '2020-01-01 00:00:05' )
, ( 2, 'Mobile', '454545454', NULL, 0, '2020-01-01 00:00:06' )
, ( 3, 'Home', '777777777', NULL, 0, '2020-01-01 00:00:07' )
, ( 3, 'Mobile', '888888888', NULL, 1, '2020-01-01 00:00:08' )
, ( 3, 'Mobile', '12121212', NULL, 0, '2020-01-01 00:00:09' )
, ( 4, 'Work', '101010101', '111', 1, '2020-01-01 00:00:10' )
, ( 4, 'Work', '101010102', '232', 0, '2020-01-01 00:00:11' )
, ( 5, 'Work', '545454545', '456', 0, '2020-01-01 00:00:10' )
, ( 5, 'Work', '456456456', NULL, 1, '2020-01-01 00:00:11' ) ;
Description:
#Emp is the sample Employee table (Unique Employee records).
EId = Employee Id
FN = First Name
LN = Last Name
#EmpPhCont is the sample Employee Phone Contact table (Each Emp from #Emp table can have zero, one, or multiple phone numbers here - unique by Emp/Type).
PhType = Phone Type (home, mobile, work, and etc)
PhNum = Phone Number
PhExt = Phone Extension (mostly available for "Work" PhType)
IsMain = Is it main contact number. Each employee with a phone num will have exactly 1 record marked as IsMain.
CreatedOn = Date the record was created
Goal:
To output 1 record per employee with the following Columns
EId | HomeNum | MobileNum | WorkNum | WorkNumExt | MainPhType
Rules:
Return all EId for all records from #Emp, whether they have a #EmpPhCont record or not.
For each emp that has #EmpPhCont record avail, return the newest created PhNum and PhExt for the corresponding PhType, UNLESS an older record for the same Emp/PhType is marked as IsMain = 1 (For any emp, for whichever PhType, if IsMain = 1, always return that PhNum and PhExt value).
Expected Output:
EId HomeNum MobileNum WorkNum WorkNumExt MainPhType
1 111111111 222222222 333333333 NULL Mobile
2 NULL 454545454 444444444 567 Work
3 777777777 888888888 NULL NULL Mobile
4 NULL NULL 101010102 111 Work
5 NULL NULL 456456456 NULL Work
6 NULL NULL NULL NULL NULL
My unsuccessful try:
SELECT [EM].[EId]
, MAX ( IIF([PH].[PhType] = 'Home', [PH].[PhNum], NULL)) AS [HomePhNum]
, MAX ( IIF([PH].[PhType] = 'Mobile', [PH].[PhNum], NULL)) AS [MobilePhNum]
, MAX ( IIF([PH].[PhType] = 'Work', [PH].[PhNum], NULL)) AS [WorkPhNum]
FROM #Emp AS [EM]
LEFT JOIN #EmpPhCont AS [PH]
ON [EM].[EId] = [PH].[EId]
GROUP BY [EM].[EId] ;
Use ROW_NUMBER() window function inside a CTE to get the rows from #EmpPhCont that you want returned and join this CTE to #Emp:
with cte as (
select *,
row_number() over (partition by [EId], [PhType] order by [IsMain] desc, [CreatedOn] desc) rn
from #EmpPhCont
)
select e.[EId],
max(case when c.[PhType] = 'Home' then c.[PhNum] end) HomeNum,
max(case when c.[PhType] = 'Mobile' then c.[PhNum] end) MobileNum,
max(case when c.[PhType] = 'Work' then c.[PhNum] end) WorkNum,
max(case when c.[PhType] = 'Work' then c.[PhExt] end) WorkNumExt,
max(case when c.[IsMain] = 1 then c.[PhType] end) MainPhType
from #Emp e left join cte c
on c.[EId] = e.[EId] and c.rn = 1
group by e.[EId]
See the demo.
Results:
> EId | HomeNum | MobileNum | WorkNum | WorkNumExt | MainPhType
> --: | :-------- | :-------- | :-------- | :--------- | :---------
> 1 | 111111111 | 222222222 | 333333333 | null | Mobile
> 2 | null | 454545454 | 444444444 | 567 | Work
> 3 | 777777777 | 888888888 | null | null | Mobile
> 4 | null | null | 101010101 | 111 | Work
> 5 | null | null | 456456456 | null | Work
> 6 | null | null | null | null | null
I would implement that using APPLY:
SELECT EId, HomeNum, MobileNum, WorkNum, WorkNumExt
, COALESCE(HomeMain, MobileMain, WorkMain) AS MainPhType
FROM Emp e
OUTER APPLY (
SELECT TOP 1 c.[PhNum] AS HomeNum
, CASE WHEN c.[IsMain] = 1 THEN 'Home' END AS HomeMain
FROM EmpPhCont c
WHERE c.[EId] = e.[EId]
AND c.[PhType] = 'Home'
ORDER BY c.[IsMain] DESC, c.[CreatedOn] DESC
) home
OUTER APPLY (
SELECT TOP 1 c.[PhNum] AS MobileNum
, CASE WHEN c.[IsMain] = 1 THEN 'Mobile' END AS MobileMain
FROM EmpPhCont c
WHERE c.[EId] = e.[EId]
AND c.[PhType] = 'Mobile'
ORDER BY c.[IsMain] DESC, c.[CreatedOn] DESC
) mobile
OUTER APPLY (
SELECT TOP 1 c.[PhNum] AS WorkNum
, c.[PhExt] AS WorkNumExt
, CASE WHEN c.[IsMain] = 1 THEN 'Work' END AS WorkMain
FROM EmpPhCont c
WHERE c.[EId] = e.[EId]
AND c.[PhType] = 'Work'
ORDER BY c.[IsMain] DESC, c.[CreatedOn] DESC
) work
See SQL Fiddle for demo.
Output
EId | HomeNum | MobileNum | WorkNum | WorkNumExt | MainPhType
1 | 111111111 | 222222222 | 333333333 | (null) | Mobile
2 | (null) | 454545454 | 444444444 | 567 | Work
3 | 777777777 | 888888888 | (null) | (null) | Mobile
4 | (null) | (null) | 101010101 | 111 | Work
5 | (null) | (null) | 456456456 | (null) | Work
6 | (null) | (null) | (null) | (null) | (null)
Note: This solution will only be viable for large data sets if the EmpPhCont table has an index on [EId], [PhType], otherwise it'll be too slow.
row_number(), outer apply and aggregation:
select *
from #Emp as e
outer apply
(
select
MAX ( case when d.[PhType] = 'Home' then d.[PhNum] end) AS [HomePhNum]
, MAX ( case when d.[PhType] = 'Mobile' then d.[PhNum] end) AS [MobilePhNum]
, MAX ( case when d.[PhType] = 'Work' then d.[PhNum] end) AS [WorkPhNum]
, MAX ( case when d.[PhType] = 'Work' then d.[PhExt] end) AS [WorkNumExt]
, MAX ( case when IsMain = 1 then d.[PhType] end) AS MainPhType --work is max if both mob&work as set as main..
from
(
select *, row_number() over(partition by PhType order by IsMain DESC, CreatedOn DESC) as rownum
from #EmpPhCont as p
where p.EId = e.EId
) as d
where d.rownum = 1
) as ph;

Selecting most recent record for each group with a sum function

This is a sample table
ID | Serial | Quantity | Date_Created
-------------------------------------
1 | AS1GD | 10 | 2014-12-25 8:00:00 AM
1 | GO9A4 | 5 | 2014-12-28 9:04:32 AM
2 | JF8WS | 15 | 2014-12-29 9:23:43 AM
2 | JFLE0 | 15 | 2015-01-04 10:53:12 AM
2 | S8A4A | 10 | 2015-01-05 9:12:46 AM
3 | FXOE3 | 20 | 2015-01-03 9:31:52 AM
3 | LSOR9 | 22 | 2015-01-06 12:00:44 PM
My expected result
ID | Serial | Total_Quantity | Last_DateCreated
-------------------------------------------------
1 | GO9A4 | 15 | 2014-12-28 9:04:32 AM
2 | S8A4A | 40 | 2015-01-05 9:12:46 AM
3 | LSOR9 | 42 | 2015-01-06 12:00:44 PM
Here's a query I tried but it's not returning the sum but only the quantity of the record
WITH total AS
( SELECT [ID], [date_created], [serial], sum(quantity) as qty,
ROW_NUMBER() OVER (PARTITION BY [ID] ORDER BY [date_created] DESC) AS rownum
FROM [table]
group by ID, date_created, serial
)
SELECT ID, Serial, qty, date_created
FROM total
WHERE rownum = 1
Since you are grouping by more than the ID but want the SUM() at the ID level, you can add OVER() to your SUM():
;WITH total AS ( SELECT [ID]
, [date_created]
, [serial]
, SUM(SUM(quantity)) OVER(PARTITION BY [ID]) as qty
, ROW_NUMBER() OVER (PARTITION BY [ID] ORDER BY [date_created] DESC) AS rownum
FROM [table]
GROUP BY ID, date_created, serial
)
SELECT ID, Serial, qty, date_created
FROM total
WHERE rownum = 1
The above creates an oddity in which you need two SUM() in order to use the OVER(), but you can ditch the GROUP BY altogether in your example:
;WITH total AS ( SELECT [ID]
, [date_created]
, [serial]
, SUM(quantity) OVER(PARTITION BY [ID]) as qty
, ROW_NUMBER() OVER (PARTITION BY [ID] ORDER BY [date_created] DESC) AS rownum
FROM Table1
)
SELECT ID, Serial, qty, date_created
FROM total
WHERE rownum = 1
Demo: SQL Fiddle
This will work as long as you don't have two records with the same ID created in the same second:
WITH RecentSUM AS
(
SELECT ID, MAX(DateCreated) DateCreated, SUM(Quantity) TotalQuantity
FROM [table]
GROUP BY ID
)
SELECT t.ID, t.Serial, r.TotalQuantity, r.DateCreated
FROM RecentSUM r
INNER JOIN [table] t ON t.ID = r.ID and t.DateCreated=r.DateCreated;

Aggregating Several Columns in SQL

Suppose I have a table that looks like the following
id | location | dateHired | dateRehired | dateTerminated
1 | 1 | 10/1/2011 | NULL | 12/1/2011
2 | 1 | 10/3/2011 | 11/1/2011 | 12/31/2011
3 | 5 | 10/5/2011 | NULL | NULL
4 | 5 | 10/5/2011 | NULL | NULL
5 | 7 | 11/5/2011 | NULL | 12/1/2011
6 | 10 | 11/2/2011 | NULL | NULL
and I wanted to condense that into a summary table such that:
location | date | hires | rehires | terms
1 | 10/1/2011 | 1 | 0 | 0
1 | 10/3/2011 | 1 | 0 | 0
1 | 11/1/2011 | 0 | 1 | 0
1 | 12/1/2011 | 0 | 0 | 1
1 | 12/31/2011 | 1 | 0 | 0
5 | 10/5/2011 | 2 | 0 | 0
etc.
-- what would that SQL look like? I was thinking it would be something to the effect of:
SELECT
e.location
, -- ?
,SUM(CASE WHEN e.dateHired IS NOT NULL THEN 1 ELSE 0 END) AS Hires
,SUM(CASE WHEN e.dateRehired IS NOT NULL THEN 1 ELSE 0 END) As Rehires
,SUM(CASE WHEN e.dateTerminated IS NOT NULL THEN 1 ELSE 0 END) As Terms
FROM
Employment e
GROUP BY
e.Location
,--?
But I'm not real keen if that's entirely correct or not?
EDIT - This is for SQL 2008 R2.
Also,
INNER JOIN on the date columns assumes that there are values for all three categories, which is false; which is the original problem I was trying to solve. I was thinking something like COALESCE, but that doesn't really make sense either.
I am sure there is probably an easier, more elegant way to solve this. However, this is the simplest, quickest that I can think of this late that works.
CREATE TABLE #Temp
(
Location INT,
Date DATETIME,
HireCount INT,
RehireCount INT,
DateTerminatedCount INT
)
--This will keep us from having to do an insert if does not already exist
INSERT INTO #Temp (Location, Date)
SELECT DISTINCT Location, DateHired FROM Employment
UNION
SELECT DISTINCT Location, DateRehired FROM Employment
UNION
SELECT DISTINCT Location, DateTerminated FROM Employment
UPDATE #Temp
SET HireCount = Hired.HireCount
FROM #Temp
JOIN
(
SELECT Location, DateHired AS Date, SUM(*) AS HireCount
FROM Employment
GROUP BY Location, DateHired
) AS Hired
UPDATE #Temp
SET RehireCount= Rehire.RehireCount
FROM #Temp
JOIN
(
SELECT Location, DateRehired AS Date, SUM(*) AS RehireCount
FROM Employment
GROUP BY Location, DateRehired
) AS Rehire
ON Rehire.Location = #Temp.Location AND Rehire.Date = #Temp.Date
UPDATE #Temp
SET DateTerminatedCount = Terminated.DateTerminatedCount
FROM #Temp
JOIN
(
SELECT Location, DateTerminated AS Date, SUM(*) AS DateTerminatedCount
FROM Employment
GROUP BY Location, DateTerminated
) AS Terminated
ON Terminated.Location = #Temp.Location AND Terminated.Date = #Temp.Date
SELECT * FROM #Temp
How about something like:
with dates as (
select distinct location, d from (
select location, dateHired as [d]
from tbl
where dateHired is not null
union all
select location, dateRehired
from tbl
where dateRehired is not null
union all
select location, dateTerminated
from tbl
where dateTerminated is not null
)
)
select location, [d],
(
select count(*)
from tbl
where location = dates.location
and dateHired = dates.[d]
) as hires,
(
select count(*)
from tbl
where location = dates.location
and dateRehired = dates.[d]
) as rehires,
(
select count(*)
from tbl
where location = dates.location
and dateTerminated = dates.[d]
) as terms
from dates
I don't have a SQL server handy, or I'd test it out.
SELECT * FROM
(SELECT location, dateHired as date, COUNT(1) as hires FROM mytable GROUP BY location, date) H
INNER JOIN
(SELECT location, dateReHired as date, COUNT(1) as rehires FROM mytable GROUP BY location, date) R ON H.location = R.location AND H.dateHired = R.dateRehired
INNER JOIN
(SELECT location, dateTerminated as date, COUNT(1) as terminated FROM mytable GROUP BY location, date) T
ON H.location = T.location AND H.dateHired = T.dateTerminated