TSQL Select only the highest credentail - sql

I have a query that is returning multiple line for a single service because an individual may have multiple credentials. In the medical field you retain several credentials but for simplicity sake I will use just standard credentials Phd, MA, MS, BA, BS, AS
I need to know the simplest way to ignore rows where Z_ServiceLedger.clientvisit_id has any Credentials.credentials lower in the hierarchy. So if an employee does a service and he has a Phd and a MA only return the lines for Phd and if he has a Phd an Ma and a BA only return the lines for phd. We have around 50 credentials so if I use CASE for each credential you can see how mess that will get an I'm hoping there is a better way to avoid that.
Here is my current query:
SELECT DISTINCT
SUM(CASE WHEN v.non_billable = 0 THEN v.duration ELSE 0 END) / 60 AS billable_hours,
SUM(CASE WHEN (v.non_billable = 0 AND Z_ServiceLedger.payer_id = 63) THEN v.duration ELSE 0 END) / 60 AS billable_mro_hours,
Credentials.credentials
FROM
Z_ServiceLedger
INNER JOIN
ClientVisit v ON Z_ServiceLedger.clientvisit_id = v.clientvisit_id
LEFT JOIN
Employees ON v.emp_id = Employees.emp_id
LEFT JOIN
EmployeeCredential ON Employees.emp_id = EmployeeCredential.emp_id
LEFT JOIN
Credentials ON Credentials.credential_id = EmployeeCredential.credential_id
WHERE
v.rev_timein <= CASE
WHEN EmployeeCredential.end_date IS NOT NULL
THEN EmployeeCredential.end_date
ELSE GETDATE()
END
AND v.rev_timein >= #param1
AND v.rev_timein < DateAdd(d, 1, #param2)
AND Z_ServiceLedger.amount > 0
AND v.splitprimary_clientvisit_id IS NULL
AND v.gcode_primary_clientvisit_id IS NULL
AND v.non_billable = 0
AND v.non_billable = 'FALSE'
AND v.duration / 60 > 0
AND Z_ServiceLedger.action_type NOT IN ('SERVICE RATE CHANGE', 'CLIENT STATEMENT')
AND (EmployeeCredential.is_primary IS NULL OR EmployeeCredential.is_primary != 'False')
AND v.client_id != '331771 '
GROUP BY
Credentials.credentials,
v.non_billable
ORDER BY
Credentials.credentials

Some aliases and formatting really shed some light on some major logical flaws here. You have at least two predicates in your where clause that logically turn a left join into an inner join. This is total shot in the dark since from both of your questions today we don't have anything to actually work with for tables or sample data.
The biggest concern though is your where clause is trying to get rows v.non_billable = 0 and where it equals 'FALSE'. It can't be both.
Select sum(Case When v.non_billable = 0 Then v.duration Else 0 End) / 60 As billable_hours
, sum(Case When (v.non_billable = 0 And sl.payer_id = 63) Then v.duration Else 0 End) / 60 As billable_mro_hours
, c.credentials
From Z_ServiceLedger sl
Inner Join ClientVisit v On sl.clientvisit_id = v.clientvisit_id
Left Join Employees e On v.emp_id = e.emp_id
Left Join EmployeeCredential ec On e.emp_id = ec.emp_id
--if you leave these predicates in the where clause you have turned your left join into an inner join.
AND v.rev_timein <= isnull(ec.end_date, GetDate())
and (ec.is_primary Is Null Or ec.is_primary != 'False')
Left Join Credentials c On c.credential_id = ec.credential_id
Where v.rev_timein >= #param1
And v.rev_timein < DateAdd(day, 1, #param2)
And v.splitprimary_clientvisit_id Is Null
And v.gcode_primary_clientvisit_id Is Null
--you need to pick one value for v.non_billable. It can't be both 0 and 'FALSE' at the same time.
And v.non_billable = 0
And v.non_billable = 'FALSE'
--And v.duration / 60 > 0
and v.duration > 60 --this is the same thing and is SARGable
And sl.amount > 0
And sl.action_type NOT IN ('SERVICE RATE CHANGE', 'CLIENT STATEMENT')
And v.client_id != '331771 '
Group By c.credentials
, v.non_billable
Order By c.credentials

EDIT: Modified query to add a CTE to calculate the credential_rank, using a FROM (VALUES (...)) table-value-constructor syntax. This works in SQL 2008+. (https://learn.microsoft.com/en-us/sql/t-sql/queries/table-value-constructor-transact-sql?view=sql-server-2017)
SQL Fiddle
First, I'll build out a very simple piece of data.
SETUP:
CREATE TABLE Employees ( emp_id int, emp_name varchar(20) ) ;
INSERT INTO Employees (emp_id, emp_name)
VALUES (1,'Jay'),(2,'Bob')
;
CREATE TABLE Credentials ( credential_id int, credentials varchar(20), credential_rank int ) ;
INSERT INTO Credentials (credential_id, credentials, credential_rank)
VALUES (1,'BA',3),(2,'MA',2),(3,'PhD',1)
;
CREATE TABLE EmployeeCredential (emp_id int, credential_id int, is_primary bit, end_date date )
INSERT INTO EmployeeCredential (emp_id, credential_id, is_primary, end_date)
VALUES
( 1,2,null,'20200101' )
, ( 1,3,0,'20200101' ) /* NON-PRIMARY */
, ( 1,1,1,'20100101' ) /* EXPIRED CRED */
, ( 2,3,null,'20200101' )
, ( 2,3,1,'20200101' )
;
CREATE TABLE z_ServiceLedger ( payer_id int, clientvisit_id int, amount int, action_type varchar(50) ) ;
INSERT INTO z_ServiceLedger ( payer_id, clientvisit_id, amount, action_type )
VALUES (63,1,10,'XXXXX'),(63,2,20,'XXXXX'),(63,3,10,'XXXXX'),(63,4,30,'XXXXX')
;
CREATE TABLE ClientVisit ( clientvisit_id int, client_id int, non_billable bit, duration int, emp_id int , rev_timein date, splitprimary_clientvisit_id int, gcode_primary_clientvisit_id int ) ;
INSERT INTO ClientVisit ( clientvisit_id, client_id, non_billable, duration, emp_id, rev_timein, splitprimary_clientvisit_id, gcode_primary_clientvisit_id )
VALUES
(1, 1234, 0, 110, 1, getDate(), null, null )
, (2, 1234, null, 120, 1, getDate(), null, null )
, (3, 1234, 1, 110, 2, getDate(), null, null )
, (4, 1234, 0, 130, 2, getDate(), null, null )
;
MAIN QUERY:
; WITH creds AS (
SELECT c.credential_id, c.credentials, r.credential_rank
FROM Credentials c
LEFT OUTER JOIN (VALUES (1,3),(2,2),(3,1) ) r(credential_id, credential_rank)
ON c.credential_id = r.credential_id
)
SELECT DISTINCT
SUM(CASE WHEN ISNULL(v.non_billable,1) = 0 THEN v.duration ELSE 0 END)*1.0 / 60 AS billable_hours,
SUM(CASE WHEN (ISNULL(v.non_billable,1) = 0 AND zsl.payer_id = 63) THEN v.duration ELSE 0 END)*1.0 / 60 AS billable_mro_hours,
s2.credentials
FROM Z_ServiceLedger zsl
INNER JOIN ClientVisit v ON zsl.clientvisit_id = v.clientvisit_id
AND v.rev_timein >= #param1
AND v.rev_timein < DateAdd(d, 1, #param2)
AND v.splitprimary_clientvisit_id IS NULL
AND v.gcode_primary_clientvisit_id IS NULL
AND ISNULL(v.non_billable,1) = 0
AND v.duration*1.0 / 60 > 0
AND v.client_id <> 331771
INNER JOIN (
SELECT s1.emp_id, s1.emp_name, s1.credential_id, s1.credentials, s1.endDate
FROM (
SELECT e.emp_id, e.emp_name, c.credential_id, c.credentials, ISNULL(ec.end_date,GETDATE()) AS endDate
, ROW_NUMBER() OVER (PARTITION BY e.emp_id ORDER BY c.credential_rank) AS rn
FROM Employees e
LEFT OUTER JOIN EmployeeCredential ec ON e.emp_id = ec.emp_id
AND ISNULL(ec.is_primary,1) <> 0 /* I don't think a NULL is_primary should be TRUE */
LEFT OUTER JOIN creds c ON ec.credential_id = c.credential_id
) s1
WHERE s1.rn = 1
) s2 ON v.emp_id = s2.emp_id
AND v.rev_timein <= s2.endDate /* Credential not expired at rev_timein */
WHERE zsl.amount > 0
AND zsl.action_type NOT IN ('SERVICE RATE CHANGE', 'CLIENT STATEMENT')
GROUP BY s2.credentials
ORDER BY s2.credentials
Results:
| billable_hours | billable_mro_hours | credentials |
|----------------|--------------------|-------------|
| 1.833333 | 1.833333 | MA |
| 2.166666 | 2.166666 | PhD |
A couple of things to watch for:
1) Integer Division : duration/60 will return an integer. So if you had duration=70, then you'd have 70/60 = 1. You'd miss that 10 minutes, because of the result will be converted back to an integer. You lose that extra 10 minutes. Probably not what you inteded. The easiest solution is to just multiply duration by 1.0 so that it is forced into a decimal datatype and won't cause the operation to be treated like integers.
2) EmployeeCredential.is_primary != 'False' : Rather than account for the strings of "True"/"False", you should use an actual boolean value (1/0). And a NULL value should indicate that the value is NOT TRUE or FALSE rather than implying TRUE. Also, in SQL, != will work to indicate NOT EQUAL TO, but you should use <> instead. It means the same thing, but is grammatically more correct for SQL.
3) v.non_billable = 0 AND v.non_billable = 'FALSE' : This can be shortened to ISNULL(v.non_billable,1)=0 to short-circuit both checks, especially since non_billable can be NULL. You also avoid the implicit type converstion when comparing the number 0 and the string 'False'.
4) v.client_id != '331771 ' : Change to v.client_id<>33171. First, the != to <> that I mentioned earlier. Then '331771' is implicitly converted to a number. You should avoid implicit conversions.
5) You originally had v.non_billable in your GROUP BY. Since you aren't including it in your SELECT, you can't use it to GROUP BY. Also, you're already filtering out everything other than non_billable=0, so you'd never have more than one value to GROUP BY anyway. Just exclude it.
6) CASE WHEN EmployeeCredential.end_date IS NOT NULL THEN EmployeeCredential.end_date ELSE GETDATE() END : This is the same as saying ISNULL(EmployeeCredential.end_date,GETDATE()).
7) Unless you actually need to filter out specific records for a specific reason, more your JOIN conditions into the JOIN rather than using them in the WHERE clause. This will help you be more efficient with the data your initial query returns before it is filtered or reduced. Also, when using a WHERE filter with a LEFT JOIN, you may end up with unexpected results.

Related

Update Statement SQL doesn't update every record

Question
I've been trying to solve this question for quite a while now. But I'm not getting any closer to fixing it. I select a group of people eligible for getting a contract renewal. Now I want to update everyone with a specific code, but some of the records are staying blank.
What I've tried
These are the queries I'm using. First for selecting the records:
INSERT INTO SELECTION (CLIENDTID, CREATED_DT, FIRSTNAME, MIDDLENAME, LASTNAME, EMAIL, CONTRACTEND_DATE, PRODUCT, MOBILE,TELEPHONE, STREET, HOUSENUMBER, ADDITIVE, POSTALCODE, CITY)
SELECT CLIENDTID, GETDATE(),FIRSTNAME, MIDDLENAME, LASTNAME, EMAIL, CONTRACTEND_DATE, PRODUCT, MOBILE,TELEPHONE, STREET, HOUSENUMBER, ADDITIVE, POSTALCODE, CITY
FROM CONTRACTS C (NOLOCK)
INNER JOIN OPTINS O (NOLOCK) ON O.CLIENTID = C.CLIENTID
INNER JOIN HISTORY HIS(NOLOCK) ON HIS.CLIENTID = C.CLIENTID
WHERE
(
((DATEDIFF(DD,CURRENT_TIMESTAMP, CONTRACTEND_DATE) BETWEEN 26 AND 28)
AND
(O.MAIL=1 OR O.SMS=1 OR O.DM=1 OR 0.TELEPHONE=1 AND HIS.HISTORY IS NULL))
OR
((DATEDIFF(DD,CURRENT_TIMESTAMP, CONTRACTEND_DATE) BETWEEN 19 AND 21)
AND
(HIS.HISTORY<10 OR HIS.HISTORY IS NULL)
AND
O.SMS=1 AND C.MOBILE IS NOT NULL)
OR
((DATEDIFF(DD,CURRENT_TIMESTAMP, CONTRACTEND_DATE) BETWEEN 19 AND 21)
AND
(HIS.HISTORY<100 OR HIS.HISTORY IS NULL)
AND
(O.SMS=0 OR C.MOBILE IS NULL)
AND
O.CALL=1
AND
(C.MOBILE IS NOT NULL OR C.TELEPHONE IS NOT NULL))
OR
((DATEDIFF(DD,CURRENT_TIMESTAMP,CONTRACTEND_DATE) BETWEEN 12 AND 14)
AND
(HIS.HISTORY<100 OR HIS.HISTORY IS NULL)
AND
O.TELEPHONE=1
AND
(C.MOBILE IS NOT NULL OR C.TELEPHONE IS NOT NULL))
)
And then i use this query to update the records.
UPDATE S
SET CODE = CASE
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 26 AND 28) AND HIS.HISTORY IS NULL AND O.MAIL = 1 AND C.MAIL IS NOT NULL THEN 'MAIL'
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 26 AND 28) AND HIS.HISTORY IS NULL AND O.DM = 1 AND (O.MAIL=0 OR C.MAIL IS NULL) THEN 'DM'
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 26 AND 28) AND HIS.HISTORY IS NULL AND O.DM = 0 AND (O.MAIL=0 OR C.MAIL IS NULL) AND O.SMS=1 AND C.MOBILE IS NOT NULL THEN 'SMS'
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 26 AND 28) AND HIS.HISTORY IS NULL AND O.DM = 0 AND (O.MAIL=0 OR C.MAIL IS NULL) AND (O.SMS=0 OR C.MOBILE IS NULL) AND
O.TELEPHONE=1 AND (C.MOBILE IS NOT NULL OR C.TELEPHONE IS NOT NULL) THEN 'EXPORT'
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 19 AND 21) AND (HIS.HISTORY<10 OR HIS.HISTORY IS NULL)
AND O.SMS=1 AND C.MOBILE IS NOT NULL THEN 'SMS'
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 19 AND 21) AND (HIS.HISTORY<100 OR HIS.HISTORY IS NULL)
AND (O.SMS=0 OR C.MOBILE IS NULL) AND O.TELEPHONE=1 AND (C.MOBILE IS NOT NULL OR C.TELEPHONE IS NOT NULL) THEN 'EXPORT'
WHEN ( DATEDIFF(DD, CURRENT_TIMESTAMP, C.CONTRACTEND_DATE) BETWEEN 12 AND 14) AND (HIS.HISTORY<100 OR HIS.HISTORY IS NULL)
AND O.TELEPHONE=1 AND (C.MOBILE IS NOT NULL OR C.TELEPHONE IS NOT NULL) THEN 'EXPORT'
ELSE NULL
END
FROM SELECTION S(NOLOCK)
INNER JOIN CONTRACTS C (NOLOCK) ON C.CLIENTID = S.CLIENTID
INNER JOIN OPTINS O (NOLOCK) ON O.CLIENTID = C.CLIENTID
INNER JOIN HISTORY HIS(NOLOCK) ON HIS.CLIENTID = C.CLIENTID
WHERE S.CREATED_DT>DATEADD(hh,-4,GETDATE())
So basically it's the same selection i'm using to extract the records. But while updating them quite a few stay blank. And when i check the blank records they should've been given a code.
Maybe a case when statement is not the way to go about it, but i don't know how else to pull this off.
Assuming that the use NOLOCK isn't introducing data oddities by allowing "dirty reads", I see several possibilities for why not all the data in your SELECTION table is being updated.
The INSERT has the clause S.CREATED_DT>DATEADD(hh,-4,GETDATE()). If the INSERT was run more than 4 hours before the UPDATE then rows created by that INSERT won't be updated.
Your UPDATE for EXPORT (with BETWEEN 19 AND 21 days) has a condition of O.TELEPHONE = 1 while the INSERT uses O.CALL = 1. I'm guessing the latter is correct and you need to amend the UPDATE code accordingly.
The first part of the WHERE clause (for BETWEEN 26 AND 28) in your INSERT has some odd looking logic related to the HISTORY field update. I think the relevant code should be as I've given below. The order of operations (AND takes precedence over OR) means that my code is not equivalent to your code).
There may be other ways in which the the BETWEEN 26 AND 28 set of records is introducing issues as the code there is not at all equivalent, and seems to rely on business logic rather than logical equivalence.
Revised 26-28 Code for the INSERT
((DATEDIFF(DD,CURRENT_TIMESTAMP, CONTRACTEND_DATE) BETWEEN 26 AND 28)
AND
HIS.HISTORY IS NULL
AND
(O.MAIL=1 OR O.SMS=1 OR O.DM=1 OR O.TELEPHONE=1)
Note: I have assumed that 0.TELEPHONE is a typo and should be O.TELEPHONE.
A Different Approach
If you are confident that one of the two pieces of code is correct, I'd suggest you use exactly the same code in all the relevant spots. Here's a simplified version of how to do that:
INSERT INTO SELECTION
SELECT *
FROM SOURCETABLE t
WHERE
(
CASE
WHEN t.A=1 THEN 'A'
WHEN t.B=1 THEN 'B'
ELSE NULL
END
) IS NOT NULL
UPDATE s
SET s.Target =
CASE
WHEN t.A=1 THEN 'A'
WHEN t.B=1 THEN 'B'
ELSE NULL
END
FROM
SELECTION s
JOIN SOURCETABLE t ON s.ID = t.ID
WHERE
(
CASE
WHEN t.A=1 THEN 'A'
WHEN t.B=1 THEN 'B'
ELSE NULL
END
) IS NOT NULL

( CASE WHEN [date] IS NULL THEN 0 ELSE 1 END) is not working in my pivot

I have written the follwoing code expecting to get 0's and 1's returned in the CASES but I get 1's and NULL's.
any suggestions to get 0's?
DECLARE #Today DATETIME
SET #Today = FLOOR(CAST(GETDATE() AS FLOAT))
SELECT*
FROM
( SELECT e.[Employeenr],
e.[Name],
dc.[Code],
c.[Description],
(CASE WHEN ec.[date] IS NULL THEN 0 ELSE 1 END) as 'Date',
(CASE WHEN dc.[creationdate] IS NULL THEN 0 ELSE 1 END) as 'Aanwezig'
FROM HR_Employee e
left join HR_EmployeeDriverLicense d ON e.[Employeenr] = d.[Employee]
left join DriverLicenseCategory dc ON d.[DriverLicenseCategory] = dc.
[DriverLicenseCategorynr]
left join HR_EmployeeCertificate ec ON e.[Employeenr] = ec.[Employee]
left join HR_Certificate c ON ec.[Certificate] = c.[Certificatenr]
left join HR_Function f ON e.[Function] = f.[Functionnr]
WHERE (e.[Date_out_of_employment] IS NULL
or e.[Date_out_of_employment] >= #today
or e.[Licensenumber] is not null)
and e.[LicenseExpireDate] is not null
and c.[Description] is not null
and ec.[Certificate] <> 5
and f.[Functionnr] = 1
) AS SRC
PIVOT
( MAX(Aanwezig)
FOR [Code] IN ([C], [C1], [C1E], [CE])) AS PivotTable
PIVOT
( MAX ([Date])
FOR [Description] IN ([Kooiaap certificaat], [ADR Certificaat])) AS PivotTable
When you are performing PIVOT you will get a NULL for rows, which do not have a value for the corresponding PIVOT column. Unfortunately, if you want to get 0 instead NULL, you need to add this logic in the final SELECT statement.
Something like this:
SELECT ISNULL([Kooiaap certificaat], 0), ISNULL([ADR Certificaat], 0)
Instead of using MAX use COUNT function and CASE WHEN together.
Try below.
COUNT(CASE WHEN AANWEZIG= 0 THEN NULL WHEN AANWEZIG= 1 THEN 1 END)

Return row for GROUP BY CASE WHEN IS NULL THEN (...) ELSE (...) even if record does not exist

Let's consider the following scenario.
CREATE TABLE Replicant (Name NVARCHAR(10),Gen INT);
INSERT INTO Replicant VALUES ('tymtam', 2), ('Roy', 6);
SELECT
CASE WHEN Gen < 10 THEN '<10' ELSE '>=10' END as 'Gen',
count(*) as 'Count'
FROM Replicant
GROUP BY CASE WHEN Gen < 10 THEN '<10' ELSE '>=10' END;
The result is a single row:
Gen Count
<10 2
Can I up-sophisticate the query so that I get a zero for the ELSE case?
Gen Count
<10 2
>=10 0
Update 2
My discriminator is 'is null'
SELECT CASE WHEN Gen IS NOT NULL THEN 'Known' ELSE 'Unknown' END as 'Gen', count(*) as 'Count' FROM Replicant
GROUP BY CASE WHEN Gen IS NOT NULL THEN 'Known' ELSE 'Unknown' END;
The result is
Gen Count
Known 2
and I yearn for
Gen Count
Known 2
Unknown 0
Update 1
My context is that I have pairs of queries (metrics) for different generations of replicants:
INSERT INTO [dbo].[Metrics] (...) SELECT
'Metric X for >=10' as 'Name',
COUNT(*) AS 'Count',
(80_char_expression) AS 'Sum',
(80_char_expression) AS 'Min',
(80_char_expression) AS 'Max',
0 AS 'StandardDeviation'
FROM Replicant
WHERE TimestampUtc > DATEADD(WEEK, -1, Current_Timestamp)
AND Gen >= 10
INSERT INTO [dbo].[Metrics] (...) SELECT
'Metric X for <10' as 'Name',
--7 lines repeated from the 1st query
AND Gen < 10
I would prefer to have a single select to insert two rows, even if there are no records.
You can try to use UNOIN ALL make a comparison table for your score then do outer join
Query 1:
SELECT t1.word,
COUNT(Name) 'Count'
FROM
(
SELECT '<10' word,9 maxval,0 minval
UNION ALL
SELECT '>=10' word,2147483646 maxval,10 minval
) t1 LEFT JOIN Replicant on Gen BETWEEN t1.minval AND t1.maxval
GROUP BY t1.word
Results:
| word | Count |
|------|-------|
| <10 | 2 |
| >=10 | 0 |
You can use left join:
SELECT v.Gen, COUNT(r.gen) as cnt
FROM (VALUES (NULL, 10, '<10'),
(10, NULL, '>=10')
) v(lo, hi, gen) LEFT JOIN
Replicant r
ON (r.gen >= v.lo OR v.lo IS NULL) AND
(r.gen < v.hi OR v.hi IS NULL)
GROUP BY v.gen;
You can also use conditional aggregation and unpivoting:
select v.*
from (select sum(case when r.gen < 10 then 1 else 0 end) as gen_1,
sum(case when r.gen >= 10 then 1 else 0 end) as gen_2
from replicant r
) r cross apply
(values (gen_1, '<10'), (gen_2, '>=10')
) v(cnt, gen);

Updating a table using Case statements in SQL

I am trying to add a 0, 1, or null to a column in a specific category where a relativepersonid of a person has a diagdate up to a person's servicedate. Here are my tables:
DROP TABLE ICDCodes_w;
GO
CREATE TABLE ICDCodes_w
(
AnxietyDisorder VARCHAR(6),
DepressiveDisorder VARCHAR(6),
PTSD VARCHAR(6)
);
INSERT INTO ICDCodes_w
(
AnxietyDisorder,
DepressiveDisorder,
PTSD
)
VALUES
('293.84', '296.2', '309.81'),
('300', '296.21', 'F43.1'),
('305.42', 'F11.28', 'F31.76'),
('305.81', 'F43.8', 'F31.78'),
('F40.00', 'F43.10', '305.52');
GO
DROP TABLE DiagHX_w;
GO
CREATE TABLE DiagHX_w
(
ArchiveID VARCHAR(10),
RelativePersonID VARCHAR(10),
ICDCode VARCHAR(6),
DiagDate DATE
);
INSERT INTO DiagHX_w
(
ArchiveID,
RelativePersonID,
ICDCode,
DiagDate
)
VALUES
('1275741', '754241', '293.84', '1989-01-03'),
('2154872', '754241', '293.84', '1995-04-07'),
('4587215', '754241', '998.4', '1999-12-07'),
('4588775', '711121', 'F11.28', '2001-02-07'),
('3545455', '711121', NULL, NULL),
('9876352', '323668', '400.02', '1988-04-09'),
('3211514', '112101', 'F31.78', '2005-09-09'),
('3254548', '686967', 'F40.00', '1999-12-31'),
('4411144', '686967', '305.52', '2000-01-01'),
('6548785', '99999999','F40.00', '2000-02-03');
GO
DROP TABLE PatientFlags_w;
GO
CREATE TABLE PatientFlags_w
(
PersonID VARCHAR(10),
RelativePersonID VARCHAR(10),
AnxietyDisorder VARCHAR(2),
DepressiveDisorder VARCHAR(2),
PTSD VARCHAR(2),
);
INSERT INTO PatientFlags_w
(
PersonID,
RelativePersonID
)
VALUES
('99999999', '754241'),
('88888888', '754241'),
('77777777', '754241'),
('66666666', '711121'),
('55555555', '711121'),
('44444444', '323668'),
('33333333', '112101'),
('22222222', '686967'),
('11111111', '686967'),
('32151111', '887878'),
('78746954', '771125'),
('54621333', '333114'),
('55648888', '333114');
GO
DROP TABLE Person_w;
GO
CREATE TABLE Person_w
(
PersonID VARCHAR(10),
ServiceDate date
);
INSERT INTO Person_w
(
PersonID,
ServiceDate
)
VALUES
('99999999', '2000-12-31'),
('88888888', '2000-11-01'),
('69876541', '2000-09-04'),
('66666666', '2000-01-15'),
('55555555', '2000-07-22'),
('44444444', '2000-07-20'),
('65498711', '2000-11-17'),
('22222222', '2000-09-02'),
('11111111', '2000-02-04'),
('32151111', '2000-02-17'),
('78746954', '2000-03-29'),
('54621333', '2000-08-22'),
('55648888', '2000-10-20');
Here is my update statement:
UPDATE a
SET AnxietyDisorder = CASE
WHEN ICDCode IN
(
SELECT AnxietyDisorder FROM
Project..ICDCodes_w
) THEN
1
ELSE
0
END,
DepressiveDisorder = CASE
WHEN ICDCode IN
(
SELECT DepressiveDisorder FROM
Project..ICDCodes_w
) THEN
1
ELSE
0
END,
PTSD = CASE
WHEN ICDCode IN
(
SELECT PTSD FROM Project..ICDCodes_w
) THEN
1
ELSE
0
END
FROM PatientFlags_w a
JOIN DiagHX_w b
ON a.relativepersonid = b.RelativePersonID
JOIN Person_w p
ON a.personid = p.PersonID
WHERE diagdate <= p.servicedate;
This works on some values, but there are some that don't get updated. I know the issue is with my case statement and probably a join issue. What is a better way to write this? Here is an example query I used to check. The PTSD column should have a 1.
SELECT * FROM project..patientflags_w a
JOIN project..diaghx_w b
ON a.relativepersonid = b.RelativePersonID
JOIN project..person_w p
ON a.personid = p.personid
WHERE b.icdcode IN (SELECT PTSD FROM Project..ICDCodes_w)
AND b.diagdate <= p.servicedate
I did ask this question the other day, but my sample tables were all messed up, so I've verified that they work this time.
At first glance, the problem with your query is that you update the target (PatientFlags_w) multiple times: once for each flag. In some cases you seem to be ending up with the correct result, but its just by luck.
It's hard to tell if you want one row per person in the flag table, or one row per flag.
Can you review these queries and let us know if they are close to your desired results:
-- If you want one row per Person:
select RelativePersonID,
[AnxietyDisorder] = max(case when c.AnxietyDisorder is not null then 1 else 0 end),
[DepressiveDisorder] = max(case when c.DepressiveDisorder is not null then 1 else 0 end),
[PTSD] = max(case when c.PTSD is not null then 1 else 0 end)
from DiagHX_w d
left
join ICDCodes_w c on d.ICDCode in (c.AnxietyDisorder, c.DepressiveDisorder, c.PTSD)
group
by RelativePersonID;
-- If you want one row per Flag:
select RelativePersonID,
d.ICDCode,
[AnxietyDisorder] = case when c.AnxietyDisorder is not null then 1 else 0 end,
[DepressiveDisorder] = case when c.DepressiveDisorder is not null then 1 else 0 end,
[PTSD] = case when c.PTSD is not null then 1 else 0 end
from DiagHX_w d
left
join ICDCodes_w c on d.ICDCode in (c.AnxietyDisorder, c.DepressiveDisorder, c.PTSD);
If the diagnoses are not related to each other (I assumed since they are in the same table), you might want this instead:
select RelativePersonID,
[AnxietyDisorder] = max(case when c.AnxietyDisorder = d.ICDCode then 1 else 0 end),
[DepressiveDisorder] = max(case when c.DepressiveDisorder = d.ICDCode then 1 else 0 end),
[PTSD] = max(case when c.PTSD = d.ICDCode then 1 else 0 end)
from DiagHX_w d
left
join ICDCodes_w c on d.ICDCode in (c.AnxietyDisorder, c.DepressiveDisorder, c.PTSD)
group
by RelativePersonID;

SQL Optimize - From History table get value from two different dates

Not sure where to start... But basically I have a report table, an account table, and an account history table. The account history table will have zero or more records, where each record is the state of the account cancelled flag after it changed.
There is other stuff going on, but basically i am looking to return the account detail data, with the state of account cancelled bit on the start date and enddate as different columns.
What is the best way to do this?
I have the following working query below
(Idea) Should I do seperate joins on history table, 1 for each date?
I guess I could do it in three separate queries ( Get Begin Snapshot, End Snapshot, Normal Report query with a join to each snapshot)
something else?
Expected output:
AccountID, OtherData, StartDateCancelled, EndDateCancelled
Test Tables:
DECLARE #Report TABLE (ReportID INT, StartDate DATETIME, EndDate DATETIME)
DECLARE #ReportAccountDetail TABLE( ReportID INT, Accountid INT, Cancelled BIT )
DECLARE #AccountHistory TABLE( AccountID INT, ModifiedDate DATETIME, Cancelled BIT )
INSERT INTO #Report
SELECT 1,'1/1/2011', '2/1/2011'
--
INSERT INTO #ReportAccountDetail
SELECT 1 AS ReportID, 1 AS AccountID, 0 AS Cancelled
UNION
SELECT 1,2,0
UNION
SELECT 1,3,1
UNION
SELECT 1,4,1
--
INSERT INTO #AccountHistory
SELECT 2 AS CustomerID, '1/2/2010' AS ModifiedDate, 1 AS Cancelled
UNION--
SELECT 3, '2/1/2011', 1
UNION--
SELECT 4, '1/1/2010', 1
UNION
SELECT 4, '2/1/2010', 0
UNION
SELECT 4, '2/1/2011', 1
Current Query:
SELECT Accountid, OtherData,
MAX(CASE WHEN BeginRank = 1 THEN CASE WHEN BeginHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS StartDateCancelled,
MAX(CASE WHEN EndRank = 1 THEN CASE WHEN EndHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS EndDateCancelled
FROM
(
SELECT c.Accountid,
'OtherData' AS OtherData,
--lots of other data
ROW_NUMBER() OVER (PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS BeginRank,
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END AS BeginHistoryExists,
ROW_NUMBER() OVER ( PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS EndRank,
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END AS EndHistoryExists,
CAST( ch.Cancelled AS INT) AS HistoryCancelled,
0 AS DefaultCancel
FROM
#Report AS Report
INNER JOIN #ReportAccountDetail AS C ON Report.ReportID = C.ReportID
--Others joins related for data to return
LEFT JOIN #AccountHistory AS CH ON CH.AccountID = C.AccountID
WHERE Report.ReportID = 1
) AS x
GROUP BY AccountID, OtherData
Welcome input on writing stack overflow questions. Thanks!
ROW_NUMBER() often suprises me and out-performs my expectations. In this case, however, I'd be tempted to just use correlated sub-queries. At least, I'd test them against the alternatives.
Note: I would also use real tables, with real indexes, and a realistic volume of fake data. (If it's worth posting this question, I'm assuming that it's worth testing this realistically.)
SELECT
[Report].ReportID,
[Account].AccountID,
[Account].OtherData,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].StartDate ORDER BY ModifiedDate DESC), 0) AS StartDateCancelled,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].EndDate ORDER BY ModifiedDate DESC), 0) AS EndDateCancelled
FROM
Report AS [Report]
LEFT JOIN
ReportAccountDetail AS [Account]
ON [Account].ReportID = [Report].ReportID
ORDER BY
[Report].ReportID,
[Account].AccountID
Note: For whatever reason, I've found that TOP 1 and ORDER BY is faster than MAX().
In terms of your suggested answer, I'd modify it slightly to just use ISNULL instead of trying to make the Exists columns work.
I'd also join on the "other data" after all of the working out, rather than inside the inner-most query, so as to avoid having to group by all the "other data".
WITH
HistoricData AS
(
SELECT
Report.ReportID,
c.Accountid,
c.OtherData,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate DESC) AS BeginRank,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY ch.ModifiedDate DESC) AS EndRank,
CH.Cancelled
FROM
#Report AS Report
INNER JOIN
#ReportAccountDetail AS C
ON Report.ReportID = C.ReportID
LEFT JOIN
#AccountHistory AS CH
ON CH.AccountID = C.AccountID
AND CH.ModifiedDate <= Report.EndDate
)
,
FlattenedData AS
(
SELECT
ReportID,
Accountid,
OtherData,
ISNULL(MAX(CASE WHEN BeginRank = 1 THEN Cancelled END), 0) AS StartDateCancelled,
ISNULL(MAX(CASE WHEN EndRank = 1 THEN Cancelled END), 0) AS EndDateCancelled
FROM
[HistoricData]
GROUP BY
ReportID,
AccountID,
OtherData
)
SELECT
*
FROM
[FlattenedData]
LEFT JOIN
[OtherData]
ON Whatever = YouLike
WHERE
[FlattenedData].ReportID = 1
And a final possible version...
WITH
ReportStartHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].StartDate
)
AS [data]
WHERE
SequenceID = 1
)
,
ReportEndHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].EndDate
)
AS [data]
WHERE
SequenceID = 1
)
SELECT
[Report].ReportID,
[Account].*,
ISNULL([ReportStartHistory].Cancelled, 0) AS StartDateCancelled,
ISNULL([ReportEndHistory].Cancelled, 0) AS EndDateCancelled
FROM
Report AS [Report]
INNER JOIN
Account AS [Account]
LEFT JOIN
[ReportStartHistory]
ON [ReportStartHistory].ReportID = [Report].ReportID
AND [ReportStartHistory].AccountID = [Account].AccountID
LEFT JOIN
[ReportEndHistory]
ON [ReportEndHistory].ReportID = [Report].ReportID
AND [ReportEndHistory].AccountID = [Account].AccountID