T-SQL pivot with count on pivoted results - sql

I have the following data that I would like to pivot and get a count based on the pivoted results.
DECLARE #tempMusicSchoolStudent TABLE
(school VARCHAR(50),
studentname VARCHAR(50),
instrumentname VARCHAR(255),
expertise INT)
INSERT INTO #tempMusicSchoolStudent(school, studentname, instrumentname, expertise)
SELECT 'Foster','Matt','Guitar','10'
UNION
SELECT 'Foster','Jimmy','Guitar','5'
UNION
SELECT 'Foster','Jimmy','Keyboard','8'
UNION
SELECT 'Foster','Ryan','Keyboard','9'
UNION
SELECT 'Midlothean','Kyle','Keyboard','10'
UNION
SELECT 'Midlothean','Mary','Guitar','4'
UNION
SELECT 'Midlothean','Mary','Keyboard','7'
Raw data:
I'd like the results to look like the data below....
I got this data using the sql query below. The problem with this query is that I have a dynamic amount of instruments (I've only shown 2 in this example for simplicity sake). I'd like to use pivot because it will be cleaner dynamic sql. Otherwise I would have to dynamically left join the table to itself for each instrument.
SELECT
t.school, t.instrumentname, t.expertise,
t1.instrumentname, t1.expertise,
COUNT(DISTINCT t.studentname) [DistinctStudentCount]
FROM
#tempMusicSchoolStudent t
LEFT JOIN
#tempMusicSchoolStudent t1 ON t1.school = t.school
AND t1.studentname = t.studentname
AND t.instrumentname <> t1.instrumentname
GROUP BY
t.school, t.instrumentname, t.expertise, t1.instrumentname, t1.expertise
ORDER BY
t.school, t.instrumentname, t.expertise, t1.instrumentname, t1.expertise
If anyone has any ideas on how I can do this in a cleaner way than dynamically left joining the table to itself it would be much appreciated. Thanks.

You just need conditional aggregation:
SELECT t.school, t.instrumentname, t.expertise, t.instrumentname,
COUNT(DISTINCT t.studentname) as DistinctStudentCount
FROM #tempMusicSchoolStudent t
GROUP BY t.school, t.instrumentname, t.expertise, t.instrumentname;
You have rows with NULL values. It is entirely unclear where those come from. Your question is focused on some notion of "pivoting" where it seems that you only need aggregation. But it doesn't explain where the NULL rows comes from.

You can try to make it dynamic for multipe instruments. Refer
;with cte
as
(
SELECT * from
(SELECT * FROM #tempMusicSchoolStudent t) x
PIVOT
(MAX(expertise) FOR instrumentname in ([Guitar], [Keyboard])) y
)
SELECT school, studentname,
expertise = case when Guitar is not null then 'Guitar' else NULL end,
Guitar AS instrumentname,
expertise = case when Keyboard is not null then 'Keyboard' else NULL end,
Keyboard AS instrumentname,
count(distinct studentname) AS [DistinctStudentCount]
from cte
group by school,studentname, Guitar, Keyboard
OUTPUT:
Foster Jimmy Guitar 5 Keyboard 8 1
Foster Matt Guitar 10 NULL NULL 1
Foster Ryan NULL NULL Keyboard 9 1
Midlothean Kyle NULL NULL Keyboard 10 1
Midlothean Mary Guitar 4 Keyboard 7 1

Here's the solution I was looking for, I had to use unpivot + pivot.
The real thing that I was struggling with was selecting multiple values for the column that is being pivoted, instead of the max value.
So in this case I wanted multiple "expertise" numbers under a given "instrument expertise" column. Not just the maximum expertise for that instrument.
The first key to understanding the solution is that the pivot statement is doing an implicit group by on the columns being selected. So in order to achieve multiple values under your pivoted column you have to keep the integrity of the column you are grouping on by including some type of dense_rank/rank/row_number. This basically represents changes in the value of the column you are pivoting on and is then included in the implicit group by the pivot is doing, which results in getting multiple values in the pivoted column, not just the max.
So in the code below the "expertisenum" column is keeping the integrity of the expertise data.
DECLARE #tempMusicSchoolStudent TABLE
(school VARCHAR(50),
studentname VARCHAR(50),
instrumentname VARCHAR(255),
expertise INT)
INSERT INTO #tempMusicSchoolStudent(school, studentname, instrumentname, expertise)
SELECT 'Foster','Matt','Guitar','10'
UNION
SELECT 'Foster','Jimmy','Guitar','5'
UNION
SELECT 'Foster','Jimmy','Keyboard','8'
UNION
SELECT 'Foster','Ryan','Keyboard','9'
UNION
SELECT 'Midlothean','Kyle','Keyboard','10'
UNION
SELECT 'Midlothean','Mary','Guitar','4'
UNION
SELECT 'Midlothean','Mary','Keyboard','7'
SELECT school, [Guitar expertise], [Keyboard expertise], COUNT(*) [Count]
FROM
(
SELECT school,[expertiseNum],
CASE WHEN [Columns]='expertise' THEN instrumentname + ' expertise'
END [Columns1], [Values] AS [Values1]
FROM
(
SELECT school, studentname, instrumentname, DENSE_RANK() OVER(PARTITION BY school,instrumentname ORDER BY expertise) AS [expertiseNum],
CONVERT(VARCHAR(255),expertise) AS [expertise]
FROM #tempMusicSchoolStudent
) x
UNPIVOT (
[Values] FOR [Columns] IN ([expertise])
) unpvt
) p
PIVOT (
MAX([Values1]) FOR [Columns1] IN ([Guitar expertise], [Keyboard expertise])
) pvt
GROUP BY school,[Guitar expertise], [Keyboard expertise]

Related

2 sql queries 2 different results using UNION

I have 2 queries that start with same tables but filter different columns. Both queries are unioned together so I can get a single count of people without duplication.
If I run the queries with the union commented out I have the same number of rows in each 1,953. When I run with the union I get 1,816 in one and 1,922 in the other.
My data is just an account # like 123456 in the first column and a 1/0 in the second column. Help me understand how this can happen if I am starting with the same number of rows.
Here is one of the queries
select distinct acct#,
case
when (lastFilledDate is not null and lastFilledDate<>'00/00/00') or
([Last Filled DC] is not null and [Last Filled DC]<>'00/00/00') or
(vivitrol is not null and vivitrol <>'00/00/00') or
(sublocade is not null and sublocade <>'00/00/00') or
(naltrexone is not null and naltrexone <>'00/00/00') then 1
else 0 end as result
from
(
select Acct#, DOB, [COE Contact Note], [COE-INTAKA Doc], [COE-MOM
Doc], lastFilledDate, [Last Filled DC],vivitrol,sublocade,naltrexone,
ROW_NUMBER() over (partition by Acct# order by [COE-INTAKA Doc] desc)
as apptRows
from tblAppBSCImportDashCOE2279 as main
where (([COE-MOM Doc]='Yes' and [COE Contact Note] is not null) or
[COE-MOM Doc]='No') and Appt is not null
) as sub
where apptRows=1
union
select distinct acctNo,
case
when
providerMAT='The Wright Center' and [COE-MOM Doc] is not null then
1
else 0
end as result
from
(
select acctNo, [COE-MOM Doc], MAT, providerMAT,
ROW_NUMBER() over (partition by acctNo order by COEBNMOM, [COE-MOM Doc]
desc) as apptRows
from tblAppBSCImportDashCOEHM2544 as main
where [COE-MOM Doc] is not null or COEBNMOM is not null
) as sub
where apptRows=1
results look like
acct# result
123456 1
234567 0
There is one possibility. The records you selected may result in duplicate records WITHIN each select statement. Let me try to illustrate with an example. (you can input the following query into your session to follow along)
IF OBJECT_ID('TEMPDB..#TEMP1') IS NOT NULL
DROP TABLE #TEMP1
IF OBJECT_ID('TEMPDB..#TEMP2') IS NOT NULL
DROP TABLE #TEMP2
CREATE TABLE #TEMP1(
id INT
,account INT
,amount INT
,yes_no INT
)
INSERT INTO #TEMP1 (id,account,amount,yes_no)
VALUES(1,123456,5,0)
,(2,123456,10,0)
,(3,123456,20,0)
CREATE TABLE #TEMP2(
id INT
,account INT
,amount INT
,yes_no INT
)
INSERT INTO #TEMP2 (id,account,amount,yes_no)
VALUES(4,123456,5,0)
,(5,123456,10,0)
,(6,123456,20,0)
SELECT *
FROM #TEMP1
SELECT *
FROM #TEMP2
Output of this is 2 tables with distinct records:
Now suppose I write queries that select account and the 'yes_no' column:
SELECT account,yes_no
FROM #TEMP1
SELECT account,yes_no
FROM #TEMP2
You can see that now all of the records are the same values within each select statement. So what do you think happens when I union these queries together?
SELECT account,yes_no
FROM #TEMP1
UNION
SELECT account,yes_no
FROM #TEMP2
UNION will output the distinct values of the ENTIRE OUTPUT, which also applies within each query. This is an extreme example of what I think you are experiencing. You need to include some sort of ID for each query such that it can be distinguished from other records within the query, like;
SELECT id,account,yes_no
FROM #TEMP1
UNION
SELECT id,account,yes_no
FROM #TEMP2

Get distinct individual column values (not distinct pairs) from two tables in single query

I have two tables like the following. One is for sport talents of some people and second for arts talents. One may not have a sport talent to list and same applies for art talent.
CREATE TABLE SPORT_TALENT(name varchar(10), TALENT varchar(10));
CREATE TABLE ART_TALENT(name varchar(10), TALENT varchar(10));
INSERT INTO SPORT_TALENT(name, TALENT) VALUES
('Steve', 'Footbal')
,('Steve', 'Golf')
,('Bob' , 'Golf')
,('Mary' , 'Tennnis');
INSERT INTO ART_TALENT(name, TALENT) VALUES
('Steve', 'Dancer')
, ('Steve', 'Singer')
, ('Bob' , 'Dancer')
, ('Bob' , 'Singer')
, ('John' , 'Dancer');
Now I want to list down sport talent and art talent of one person. I would like to avoid duplication. But I don't mind if there is a "null" in any output. I tried the following
select distinct sport_talent.talent as s_talent,art_talent.talent as a_talent
from sport_talent
JOIN art_talent on sport_talent.name=art_talent.name
where (sport_talent.name='Steve' or art_talent.name='Steve');
s_talent | a_talent
----------+----------
Footbal | Dancer
Golf | Singer
Footbal | Singer
Golf | Dancer
I would like to avoid redundancy and need something like the following (distinct values of sport talents + distinct values of art talents).
s_talent | a_talent
----------+----------
Footbal | Dancer
Golf | Singer
As mentioned in subject, I am not looking for distinct combinations. But at the same time, it's OK if there are some records with "null" value in one column. I am relatively new to SQL.
Try:
SELECT s_talent, a_talent
FROM (
SELECT distinct on (talent) talent as s_talent,
dense_rank() over (order by talent) as x
FROM SPORT_TALENT
WHERE name='Steve'
) x
FULL OUTER JOIN (
SELECT distinct on (talent) talent as a_talent,
dense_rank() over (order by talent) as x
FROM ART_TALENT
WHERE name='Steve'
) y
ON x.x = y.x
Demo: http://sqlfiddle.com/#!15/66e04/3
There are no duplicates in your query. Each of the four records in your query return is unique. This result may not be what you want, but seems like its problem is not the duplicate.
Postgres 9.4
... introduces unnest() with multiple arguments. Does exactly what you want, and should be fast, too. Per documentation:
The special table function UNNEST may be called with any number of
array parameters, and it returns a corresponding number of columns, as
if UNNEST (Section 9.18) had been called on each parameter separately
and combined using the ROWS FROM construct.
About ROWS FROM:
Compare result of two table functions using one column from each
SELECT *
FROM unnest(
ARRAY(SELECT DISTINCT talent FROM sport_talent WHERE name = 'Steve')
, ARRAY(SELECT DISTINCT talent FROM art_talent WHERE name = 'Steve')
) AS t(s_talent, a_talent);
Postgres 9.3 or older
SELECT s_talent, a_talent
FROM (
SELECT talent AS s_talent, row_number() OVER () AS rn
FROM sport_talent
WHERE name = 'Steve'
GROUP BY 1
) s
FULL JOIN (
SELECT talent AS a_talent, row_number() OVER () AS rn
FROM art_talent
WHERE name = 'Steve'
GROUP BY 1
) a USING (rn);
Similar previous answers with more explanation:
What type of JOIN to use
Sort columns independently, such that all nulls are last per column
This is similar to what #kordirko posted, but uses GROUP BY to get distinct talents, which is evaluated before window functions. So we only need a bare row_number() and not the more expensive dense_rank().
About the sequence of events in a SELECT query:
Best way to get result count before LIMIT was applied
SQL Fiddle.

PostgreSQL 9.3: Pivot table query

I want to show the pivot table(crosstab) for the given below table.
Table: Employee
CREATE TABLE Employee
(
Employee_Number varchar(10),
Employee_Role varchar(50),
Group_Name varchar(10)
);
Insertion:
INSERT INTO Employee VALUES('EMP101','C# Developer','Group_1'),
('EMP102','ASP Developer','Group_1'),
('EMP103','SQL Developer','Group_2'),
('EMP104','PLSQL Developer','Group_2'),
('EMP101','Java Developer',''),
('EMP102','Web Developer','');
Now I want to show the pivot table for the above data as shown below:
Expected Result:
Employee_Number TotalRoles TotalGroups Available Others Group_1 Group_2
---------------------------------------------------------------------------------------------------
EMP101 2 2 1 1 1 0
EMP102 2 2 1 1 1 0
EMP103 1 2 1 0 0 1
EMP104 1 2 1 0 0 1
Explanation: I want to show the Employee_Number, the TotalRoles which each employee has,
the TotalGroups which are present to all employees, the Available shows the employee available
in how many groups, the Others have to show the employee is available in other's also for which
the group_name have not assigned and finally the Group_Names must be shown in the pivot format.
SELECT * FROM crosstab(
$$SELECT grp.*, e.group_name
, CASE WHEN e.employee_number IS NULL THEN 0 ELSE 1 END AS val
FROM (
SELECT employee_number
, count(employee_role)::int AS total_roles
, (SELECT count(DISTINCT group_name)::int
FROM employee
WHERE group_name <> '') AS total_groups
, count(group_name <> '' OR NULL)::int AS available
, count(group_name = '' OR NULL)::int AS others
FROM employee
GROUP BY 1
) grp
LEFT JOIN employee e ON e.employee_number = grp.employee_number
AND e.group_name <> ''
ORDER BY grp.employee_number, e.group_name$$
,$$VALUES ('Group_1'::text), ('Group_2')$$
) AS ct (employee_number text
, total_roles int
, total_groups int
, available int
, others int
, "Group_1" int
, "Group_2" int);
SQL Fiddle demonstrating the base query, but not the crosstab step, which is not installed on sqlfiddle.com
Basics for crosstab:
PostgreSQL Crosstab Query
Special in this crosstab: all the "extra" columns. Those columns are placed in the middle, after the "row name" but before "category" and "value":
Pivot on Multiple Columns using Tablefunc
Once again, if you have a dynamic set of groups, you need to build this statement dynamically and execute it in a second call:
Selecting multiple max() values using a single SQL statement
You can use the crosstab function for this.
First of all you need to add the tablefunc extension if you haven't already:
CREATE EXTENSION tablefunc;
The crosstab functions require you to pass it a query returning the data you need to pivot, then a list of the columns in the output. (In other ways "tell me the input and the output format you want"). The sort order is important!
In your case, the input query is quite complicated - I think you need to do a load of separate queries, then UNION ALL them to get the desired data. I'm not entirely sure how you calculate the values "TotalGroups" and "Available", but you can modify the below in the relevant place to get what you need.
SELECT * FROM crosstab(
'SELECT employee_number, attribute, value::integer AS value FROM (with allemployees AS (SELECT distinct employee_number FROM employee) -- use a CTE to get distinct employees
SELECT employee_number,''attr_0'' AS attribute,COUNT(distinct employee_role) AS value FROM employee GROUP BY employee_number -- Roles by employee
UNION ALL
SELECT employee_number,''attr_1'' AS attribute,value from allemployees, (select count (distinct group_name) as value from employee where group_name <> '''') a
UNION ALL
SELECT employee_number,''attr_2'' AS attribute, COUNT(distinct group_name) AS value FROM employee where group_name <> '''' GROUP BY employee_number -- Available, do not know how this is calculate
UNION ALL
SELECT a.employee_number, ''attr_3'' AS attribute,coalesce(value,0) AS value FROM allemployees a LEFT JOIN -- other groups. Use a LEFT JOIN to avoid nulls in the output
(SELECT employee_number,COUNT(*) AS value FROM employee WHERE group_name ='''' GROUP BY employee_number) b on a.employee_number = b.employee_number
UNION ALL
SELECT a.employee_number, ''attr_4'' AS attribute,coalesce(value,0) AS value FROM allemployees a LEFT JOIN -- group 1
(SELECT employee_number,COUNT(*) AS value FROM employee WHERE group_name =''Group_1'' GROUP BY employee_number) b on a.employee_number = b.employee_number
UNION ALL
SELECT a.employee_number, ''attr_5'' AS attribute,coalesce(value,0) AS value FROM allemployees a LEFT JOIN -- group 2
(SELECT employee_number,COUNT(*) AS value FROM employee WHERE group_name =''Group_2'' GROUP BY employee_number) b on a.employee_number = b.employee_number) a order by 1,2')
AS ct(employee_number varchar,"TotalRoles" integer,"TotalGroups" integer,"Available" integer, "Others" integer,"Group_1" integer, "Group_2" integer)

SQL Left Join first match only

I have a query against a large number of big tables (rows and columns) with a number of joins, however one of tables has some duplicate rows of data causing issues for my query. Since this is a read only realtime feed from another department I can't fix that data, however I am trying to prevent issues in my query from it.
Given that, I need to add this crap data as a left join to my good query. The data set looks like:
IDNo FirstName LastName ...
-------------------------------------------
uqx bob smith
abc john willis
ABC john willis
aBc john willis
WTF jeff bridges
sss bill doe
ere sally abby
wtf jeff bridges
...
(about 2 dozen columns, and 100K rows)
My first instinct was to perform a distinct gave me about 80K rows:
SELECT DISTINCT P.IDNo
FROM people P
But when I try the following, I get all the rows back:
SELECT DISTINCT P.*
FROM people P
OR
SELECT
DISTINCT(P.IDNo) AS IDNoUnq
,P.FirstName
,P.LastName
...etc.
FROM people P
I then thought I would do a FIRST() aggregate function on all the columns, however that feels wrong too. Syntactically am I doing something wrong here?
Update:
Just wanted to note: These records are duplicates based on a non-key / non-indexed field of ID listed above. The ID is a text field which although has the same value, it is a different case than the other data causing the issue.
distinct is not a function. It always operates on all columns of the select list.
Your problem is a typical "greatest N per group" problem which can easily be solved using a window function:
select ...
from (
select IDNo,
FirstName,
LastName,
....,
row_number() over (partition by lower(idno) order by firstname) as rn
from people
) t
where rn = 1;
Using the order by clause you can select which of the duplicates you want to pick.
The above can be used in a left join, see below:
select ...
from x
left join (
select IDNo,
FirstName,
LastName,
....,
row_number() over (partition by lower(idno) order by firstname) as rn
from people
) p on p.idno = x.idno and p.rn = 1
where ...
Add an identity column (PeopleID) and then use a correlated subquery to return the first value for each value.
SELECT *
FROM People p
WHERE PeopleID = (
SELECT MIN(PeopleID)
FROM People
WHERE IDNo = p.IDNo
)
After careful consideration this dillema has a few different solutions:
Aggregate Everything
Use an aggregate on each column to get the biggest or smallest field value. This is what I am doing since it takes 2 partially filled out records and "merges" the data.
http://sqlfiddle.com/#!3/59cde/1
SELECT
UPPER(IDNo) AS user_id
, MAX(FirstName) AS name_first
, MAX(LastName) AS name_last
, MAX(entry) AS row_num
FROM people P
GROUP BY
IDNo
Get First (or Last record)
http://sqlfiddle.com/#!3/59cde/23
-- ------------------------------------------------------
-- Notes
-- entry: Auto-Number primary key some sort of unique PK is required for this method
-- IDNo: Should be primary key in feed, but is not, we are making an upper case version
-- This gets the first entry to get last entry, change MIN() to MAX()
-- ------------------------------------------------------
SELECT
PC.user_id
,PData.FirstName
,PData.LastName
,PData.entry
FROM (
SELECT
P2.user_id
,MIN(P2.entry) AS rownum
FROM (
SELECT
UPPER(P.IDNo) AS user_id
, P.entry
FROM people P
) AS P2
GROUP BY
P2.user_id
) AS PC
LEFT JOIN people PData
ON PData.entry = PC.rownum
ORDER BY
PData.entry
Use Cross Apply or Outer Apply, this way you can limit the amount of data to be joined from the table with the duplicates to the first hit.
Select
x.*,
c.*
from
x
Cross Apply
(
Select
Top (1)
IDNo,
FirstName,
LastName,
....,
from
people As p
where
p.idno = x.idno
Order By
p.idno //unnecessary if you don't need a specific match based on order
) As c
Cross Apply behaves like an inner join, Outer Apply like a left join
SQL Server CROSS APPLY and OUTER APPLY
Turns out I was doing it wrong, I needed to perform a nested select first of just the important columns, and do a distinct select off that to prevent trash columns of 'unique' data from corrupting my good data. The following appears to have resolved the issue... but I will try on the full dataset later.
SELECT DISTINCT P2.*
FROM (
SELECT
IDNo
, FirstName
, LastName
FROM people P
) P2
Here is some play data as requested: http://sqlfiddle.com/#!3/050e0d/3
CREATE TABLE people
(
[entry] int
, [IDNo] varchar(3)
, [FirstName] varchar(5)
, [LastName] varchar(7)
);
INSERT INTO people
(entry,[IDNo], [FirstName], [LastName])
VALUES
(1,'uqx', 'bob', 'smith'),
(2,'abc', 'john', 'willis'),
(3,'ABC', 'john', 'willis'),
(4,'aBc', 'john', 'willis'),
(5,'WTF', 'jeff', 'bridges'),
(6,'Sss', 'bill', 'doe'),
(7,'sSs', 'bill', 'doe'),
(8,'ssS', 'bill', 'doe'),
(9,'ere', 'sally', 'abby'),
(10,'wtf', 'jeff', 'bridges')
;
Try this
SELECT *
FROM people P
where P.IDNo in (SELECT DISTINCT IDNo
FROM people)
Depending on the nature of the duplicate rows, it looks like all you want is to have case-sensitivity on those columns. Setting the collation on these columns should be what you're after:
SELECT DISTINCT p.IDNO COLLATE SQL_Latin1_General_CP1_CI_AS, p.FirstName COLLATE SQL_Latin1_General_CP1_CI_AS, p.LastName COLLATE SQL_Latin1_General_CP1_CI_AS
FROM people P
http://msdn.microsoft.com/en-us/library/ms184391.aspx

More efficient way of doing multiple joins to the same table and a "case when" in the select

At my organization clients can be enrolled in multiple programs at one time. I have a table with a list of all of the programs a client has been enrolled as unique rows in and the dates they were enrolled in that program.
Using an External join I can take any client name and a date from a table (say a table of tests that the clients have completed) and have it return all of the programs that client was in on that particular date. If a client was in multiple programs on that date it duplicates the data from that table for each program they were in on that date.
The problem I have is that I am looking for it to only return one program as their "Primary Program" for each client and date even if they were in multiple programs on that date. I have created a hierarchy for which program should be selected as their primary program and returned.
For Example:
1.)Inpatient
2.)Outpatient Clinical
3.)Outpatient Vocational
4.)Outpatient Recreational
So if a client was enrolled in Outpatient Clinical, Outpatient Vocational, Outpatient Recreational at the same time on that date it would only return "Outpatient Clinical" as the program.
My way of thinking for doing this would be to join to the table with the previous programs multiple times like this:
FROM dbo.TestTable as TestTable
LEFT OUTER JOIN dbo.PreviousPrograms as PreviousPrograms1
ON TestTable.date = PreviousPrograms1.date AND PreviousPrograms1.type = 'Inpatient'
LEFT OUTER JOIN dbo.PreviousPrograms as PreviousPrograms2
ON TestTable.date = PreviousPrograms2.date AND PreviousPrograms2.type = 'Outpatient Clinical'
LEFT OUTER JOIN dbo.PreviousPrograms as PreviousPrograms3
ON TestTable.date = PreviousPrograms3.date AND PreviousPrograms3.type = 'Outpatient Vocational'
LEFT OUTER JOIN dbo.PreviousPrograms as PreviousPrograms4
ON TestTable.date = PreviousPrograms4.date AND PreviousPrograms4.type = 'Outpatient Recreational'
and then do a condition CASE WHEN in the SELECT statement as such:
SELECT
CASE
WHEN PreviousPrograms1.name IS NOT NULL
THEN PreviousPrograms1.name
WHEN PreviousPrograms1.name IS NULL AND PreviousPrograms2.name IS NOT NULL
THEN PreviousPrograms2.name
WHEN PreviousPrograms1.name IS NULL AND PreviousPrograms2.name IS NULL AND PreviousPrograms3.name IS NOT NULL
THEN PreviousPrograms3.name
WHEN PreviousPrograms1.name IS NULL AND PreviousPrograms2.name IS NULL AND PreviousPrograms3.name IS NOT NULL AND PreviousPrograms4.name IS NOT NULL
THEN PreviousPrograms4.name
ELSE NULL
END as PrimaryProgram
The bigger problem is that in my actual table there are a lot more than just four possible programs it could be and the CASE WHEN select statement and the JOINs are already cumbersome enough.
Is there a more efficient way to do either the SELECTs part or the JOIN part? Or possibly a better way to do it all together?
I'm using SQL Server 2008.
You can simplify (replace) your CASE by using COALESCE() instead:
SELECT
COALESCE(PreviousPrograms1.name, PreviousPrograms2.name,
PreviousPrograms3.name, PreviousPrograms4.name) AS PreviousProgram
COALESCE() returns the first non-null value.
Due to your design, you still need the JOINs, but it would be much easier to read if you used very short aliases, for example PP1 instead of PreviousPrograms1 - it's just a lot less code noise.
You can simplify the Join by using a bridge table containing all the program types and their priority (my sql server syntax is a bit rusty):
create table BridgeTable (
programType varchar(30),
programPriority smallint
);
This table will hold all the program types and the program priority will reflect the priority you've specified in your question.
As for the part of the case, that will depend on the number of records involved. One of the tricks that I usually do is this (assuming programPriority is a number between 10 and 99 and no type can have more than 30 bytes, because I'm being lazy):
Select patient, date,
substr( min(cast(BridgeTable.programPriority as varchar) || PreviousPrograms.type), 3, 30)
From dbo.TestTable as TestTable
Inner Join dbo.BridgeTable as BridgeTable
Left Outer Join dbo.PreviousPrograms as PreviousPrograms
on PreviousPrograms.type = BridgeTable.programType
and TestTable.date = PreviousPrograms.date
Group by patient, date
You can achieve this using sub-queries, or you could refactor it to use CTEs, take a look at the following and see if it makes sense:
DECLARE #testTable TABLE
(
[id] INT IDENTITY(1, 1),
[date] datetime
)
DECLARE #previousPrograms TABLE
(
[id] INT IDENTITY(1,1),
[date] datetime,
[type] varchar(50)
)
INSERT INTO #testTable ([date])
SELECT '2013-08-08'
UNION ALL SELECT '2013-08-07'
UNION ALL SELECT '2013-08-06'
INSERT INTO #previousPrograms ([date], [type])
-- a sample user as an inpatient
SELECT '2013-08-08', 'Inpatient'
-- your use case of someone being enrolled in all 3 outpation programs
UNION ALL SELECT '2013-08-07', 'Outpatient Recreational'
UNION ALL SELECT '2013-08-07', 'Outpatient Clinical'
UNION ALL SELECT '2013-08-07', 'Outpatient Vocational'
-- showing our workings, this is what we'll join to
SELECT
PPP.[date],
PPP.[type],
ROW_NUMBER() OVER (PARTITION BY PPP.[date] ORDER BY PPP.[Priority]) AS [RowNumber]
FROM (
SELECT
[type],
[date],
CASE
WHEN [type] = 'Inpatient' THEN 1
WHEN [type] = 'Outpatient Clinical' THEN 2
WHEN [type] = 'Outpatient Vocational' THEN 3
WHEN [type] = 'Outpatient Recreational' THEN 4
ELSE 999
END AS [Priority]
FROM #previousPrograms
) PPP -- Previous Programs w/ Priority
SELECT
T.[date],
PPPO.[type]
FROM #testTable T
LEFT JOIN (
SELECT
PPP.[date],
PPP.[type],
ROW_NUMBER() OVER (PARTITION BY PPP.[date] ORDER BY PPP.[Priority]) AS [RowNumber]
FROM (
SELECT
[type],
[date],
CASE
WHEN [type] = 'Inpatient' THEN 1
WHEN [type] = 'Outpatient Clinical' THEN 2
WHEN [type] = 'Outpatient Vocational' THEN 3
WHEN [type] = 'Outpatient Recreational' THEN 4
ELSE 999
END AS [Priority]
FROM #previousPrograms
) PPP -- Previous Programs w/ Priority
) PPPO -- Previous Programs w/ Priority + Order
ON T.[date] = PPPO.[date] AND PPPO.[RowNumber] = 1
Basically we have our deepest sub-select giving all PreviousPrograms a priority based on type, then our wrapping sub-select gives them row numbers per date so we can select only the ones with a row number of 1.
I am guessing you would need to include a UR Number or some other patient identifier, simply add that as an output to both sub-selects and change the join.