Count with exists in SQL - sql

Why is this query not returning the count of the results? How do I get it to show the count
SELECT COUNT (*) AS MWith
FROM member m
JOIN Channel mc ON mc.MemberID = m.id
JOIN Client c ON c.id = m.clientid
JOIN packages p ON p.id = m.packageid
WHERE Enroll > '2018'
AND EXISTS (
SELECT * FROM
activity a
WHERE a.memberid = m.id
AND a.code IN ('785', 'a599')
)
GROUP BY m.id;
OUTPUT
MWith
1
1
1

The empty set is because of the clause group by .
The workarounds are:
Remove a GROUP BY, because m.id anyway is not part of the output
Use GROUP BY ALL
An original example:
SELECT COUNT(*) AS MWith
FROM member m
JOIN Channel mc
ON mc.MemberID = m.id
JOIN Client c
ON c.id = m.clientid
JOIN packages p
ON p.id = m.packageid
WHERE Enroll > '2018'
AND EXISTS
(
SELECT *
FROM activity a
WHERE a.memberid = m.id
AND a.code IN ( '785', 'a599' )
)
-- GROUP BY m.id;
Other, simpler examples to show a difference:
-- returns an empty resultset
SELECT COUNT(*) FROM sys.databases
WHERE 1=0
GROUP BY name
-- returns: a single row with 0
SELECT COUNT(*) FROM sys.databases
WHERE 1=0
-- Another example with GROUP BY ALL
-- it returns one row per grouped value, with expected count = 0
SELECT COUNT(*) FROM sys.databases
WHERE 1=0
GROUP BY ALL name

Related

Remove grouped data set when total of count is zero with subquery

I'm generating a data set that looks like this
category user total
1 jonesa 0
2 jonesa 0
3 jonesa 0
1 smithb 0
2 smithb 0
3 smithb 5
1 brownc 2
2 brownc 3
3 brownc 4
Where a particular user has 0 records in all categories is it possible to remove their rows form the set? If a user has some activity like smithb does, I'd like to keep all of their records. Even the zeroes rows. Not sure how to go about that, I thought a CASE statement may be of some help but I'm not sure, this is pretty complicated for me. Here is my query
SELECT DISTINCT c.category,
u.user_name,
CASE WHEN (
SELECT COUNT(e.entry_id)
FROM category c1
INNER JOIN entry e1
ON c1.category_id = e1.category_id
WHERE c1.category_id = c.category_id
AND e.user_name = u.user_name
AND e1.entered_date >= TO_DATE ('20140625','YYYYMMDD')
AND e1.entered_date <= TO_DATE ('20140731', 'YYYYMMDD')) > 0 -- I know this won't work
THEN 'Yes'
ELSE NULL
END AS TOTAL
FROM user u
INNER JOIN role r
ON u.id = r.user_id
AND r.id IN (1,2),
category c
LEFT JOIN entry e
ON c.category_id = e.category_id
WHERE c.category_id NOT IN (19,20)
I realise the case statement won't work, but it was an attempt on how this might be possible. I'm really not sure if it's possible or the best direction. Appreciate any guidance.
Try this:
delete from t1
where user in (
select user
from t1
group by user
having count(distinct category) = sum(case when total=0 then 1 else 0 end) )
The sub query can get all the users fit your removal requirement.
count(distinct category) get how many category a user have.
sum(case when total=0 then 1 else 0 end) get how many rows with activities a user have.
There are a number of ways to do this, but the less verbose the SQL is, the harder it may be for you to follow along with the logic. For that reason, I think that using multiple Common Table Expressions will avoid the need to use redundant joins, while being the most readable.
-- assuming user_name and category_name are unique on [user] and [category] respectively.
WITH valid_categories (category_id, category_name) AS
(
-- get set of valid categories
SELECT c.category_id, c.category AS category_name
FROM category c
WHERE c.category_id NOT IN (19,20)
),
valid_users ([user_name]) AS
(
-- get set of users who belong to valid roles
SELECT u.[user_name]
FROM [user] u
WHERE EXISTS (
SELECT *
FROM [role] r
WHERE u.id = r.[user_id] AND r.id IN (1,2)
)
),
valid_entries (entry_id, [user_name], category_id, entry_count) AS
(
-- provides a flag of 1 for easier aggregation
SELECT e.[entry_id], e.[user_name], e.category_id, CAST( 1 AS INT) AS entry_count
FROM [entry] e
WHERE e.entered_date BETWEEN TO_DATE('20140625','YYYYMMDD') AND TO_DATE('20140731', 'YYYYMMDD')
-- determines if entry is within date range
),
user_categories ([user_name], category_id, category_name) AS
( SELECT u.[user_name], c.category_id, c.category_name
FROM valid_users u
-- get the cartesian product of users and categories
CROSS JOIN valid_categories c
-- get only users with a valid entry
WHERE EXISTS (
SELECT *
FROM valid_entries e
WHERE e.[user_name] = u.[user_name]
)
)
/*
You can use these for testing.
SELECT COUNT(*) AS valid_categories_count
FROM valid_categories
SELECT COUNT(*) AS valid_users_count
FROM valid_users
SELECT COUNT(*) AS valid_entries_count
FROM valid_entries
SELECT COUNT(*) AS users_with_entries_count
FROM valid_users u
WHERE EXISTS (
SELECT *
FROM user_categories uc
WHERE uc.user_name = u.user_name
)
SELECT COUNT(*) AS users_without_entries_count
FROM valid_users u
WHERE NOT EXISTS (
SELECT *
FROM user_categories uc
WHERE uc.user_name = u.user_name
)
SELECT uc.[user_name], uc.[category_name], e.[entry_count]
FROM user_categories uc
INNER JOIN valid_entries e ON (uc.[user_name] = e.[user_name] AND uc.[category_id] = e.[category_id])
*/
-- Finally, the results:
SELECT uc.[user_name], uc.[category_name], SUM(NVL(e.[entry_count],0)) AS [entry_count]
FROM user_categories uc
LEFT OUTER JOIN valid_entries e ON (uc.[user_name] = e.[user_name] AND uc.[category_id] = e.[category_id])
Here's another method:
WITH totals AS (
SELECT
c.category,
u.user_name,
COUNT(e.entry_id) AS total,
SUM(COUNT(e.entry_id)) OVER (PARTITION BY u.user_name) AS user_total
FROM
user u
INNER JOIN
role r ON u.id = r.user_id
CROSS JOIN
category c
LEFT JOIN
entry e ON c.category_id = e.category_id
AND u.user_name = e.user_name
AND e1.entered_date >= TO_DATE ('20140625', 'YYYYMMDD')
AND e1.entered_date <= TO_DATE ('20140731', 'YYYYMMDD')
WHERE
r.id IN (1, 2)
AND c.category_id IN (19, 20)
GROUP BY
c.category,
u.user_name
)
SELECT
category,
user_name,
total
FROM
totals
WHERE
user_total > 0
;
The totals derived table calculates the totals per user and category as well as totals across all categories per user (using SUM() OVER ...). The main query returns only rows where the user total is greater than zero.

JOIN / LEFT JOIN conflict in SQL Server

I have a tricky query. I need to select all recent versions of 2 types of members of administrator groups. Here is the query:
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
This query will return all the administrator groups. The next step will be getting the members of these groups. Since I have 2 types of memberships (Explicit, Computed), I will have to use a LEFT JOIN to make sure that I am not excluding any rows.
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
-- The JOIN bellow can be excluded but it is here just to clarify the architecture
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
LEFT JOIN tblGroup_ComputedMember cm ON refMem.ObjectUID = cm.GroupObjectID
LEFT JOIN tblGroup_ExplicitMember em ON refMem.ObjectUID = em.GroupObjectID
The last piece in the puzzle is to get the latest version of each member. For that I will have to use JOIN to exclude older versions:
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ComputedMember
GROUP BY ObjectID
) MostRecentCM ON MostRecentCM.MaxId = cm.Id
and
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ExplicitMember
GROUP BY ObjectID
) MostRecentEM ON MostRecentEM.MaxId = em.Id
The full query will be:
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
LEFT JOIN tblGroup_ComputedMember cm ON refMem.ObjectUID = cm.GroupObjectID
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ComputedMember
GROUP BY ObjectID
) MostRecentCM ON MostRecentCM.MaxId = cm.Id
LEFT JOIN tblGroup_ExplicitMember em ON refMem.ObjectUID = em.GroupObjectID
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ExplicitMember
GROUP BY ObjectID
) MostRecentEM ON MostRecentEM.MaxId = em.Id
The issue is clear: The 2 JOIN to exclude old versions are also applied to the select statement and clearly no rows are returned. What would be the best solution to escape such situation and to return the intended values?
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
LEFT JOIN (
select GroupObjectID, ID, max(ID) over (partition by ObjectID) as maxID
from tblGroup_ComputedMember
) cm ON refMem.ObjectUID = cm.GroupObjectID and cm.ID = cm.maxID
LEFT JOIN (
select GroupObjectID, ID, max(ID) over (partition by ObjectID) as maxID
from tblGroup_ExplicitMember
) em ON refMem.ObjectUID = em.GroupObjectID and em.ID = em.maxID
where cm.ID = cm.MaxID
What about using LEFT join in your last two joins?
LEFT JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ComputedMember
GROUP BY ObjectID
) MostRecentCM ON MostRecentCM.MaxId = cm.Id
And then in Where clause filter values as:
WHERE MostRecentCM.MaxId IS NOT NULL
OR
MostRecentEM.MaxId IS NOT NULL

Inner join that ignore singlets

I have to do an self join on a table. I am trying to return a list of several columns to see how many of each type of drug test was performed on same day (MM/DD/YYYY) in which there were at least two tests done and at least one of which resulted in a result code of 'UN'.
I am joining other tables to get the information as below. The problem is I do not quite understand how to exclude someone who has a single result row in which they did have a 'UN' result on a day but did not have any other tests that day.
Query Results (Columns)
County, DrugTestID, ID, Name, CollectionDate, DrugTestType, Results, Count(DrugTestType)
I have several rows for ID 12345 which are correct. But ID 12346 is a single row of which is showing they had a row result of count (1). They had a result of 'UN' on this day but they did not have any other tests that day. I want to exclude this.
I tried the following query
select
c.desc as 'County',
dt.pid as 'PID',
dt.id as 'DrugTestID',
p.id as 'ID',
bio.FullName as 'Participant',
CONVERT(varchar, dt.CollectionDate, 101) as 'CollectionDate',
dtt.desc as 'Drug Test Type',
dt.result as Result,
COUNT(dt.dru_drug_test_type) as 'Count Of Test Type'
from
dbo.Test as dt with (nolock)
join dbo.History as h on dt.pid = h.id
join dbo.Participant as p on h.pid = p.id
join BioData as bio on bio.id = p.id
join County as c with (nolock) on p.CountyCode = c.code
join DrugTestType as dtt with (nolock) on dt.DrugTestType = dtt.code
inner join
(
select distinct
dt2.pid,
CONVERT(varchar, dt2.CollectionDate, 101) as 'CollectionDate'
from
dbo.DrugTest as dt2 with (nolock)
join dbo.History as h2 on dt2.pid = h2.id
join dbo.Participant as p2 on h2.pid = p2.id
where
dt2.result = 'UN'
and dt2.CollectionDate between '11-01-2011' and '10-31-2012'
and p2.DrugCourtType = 'AD'
) as derived
on dt.pid = derived.pid
and convert(varchar, dt.CollectionDate, 101) = convert(varchar, derived.CollectionDate, 101)
group by
c.desc, dt.pid, p.id, dt.id, bio.fullname, dt.CollectionDate, dtt.desc, dt.result
order by
c.desc ASC, Participant ASC, dt.CollectionDate ASC
This is a little complicated because the your query has a separate row for each test. You need to use window/analytic functions to get the information you want. These allow you to do calculate aggregation functions, but to put the values on each line.
The following query starts with your query. It then calculates the number of UN results on each date for each participant and the total number of tests. It applies the appropriate filter to get what you want:
with base as (<your query here>)
select b.*
from (select b.*,
sum(isUN) over (partition by Participant, CollectionDate) as NumUNs,
count(*) over (partition by Partitipant, CollectionDate) as NumTests
from (select b.*,
(case when result = 'UN' then 1 else 0 end) as IsUN
from base
) b
) b
where NumUNs <> 1 or NumTests <> 1
Without the with clause or window functions, you can create a particularly ugly query to do the same thing:
select b.*
from (<your query>) b join
(select Participant, CollectionDate, count(*) as NumTests,
sum(case when result = 'UN' then 1 else 0 end) as NumUNs
from (<your query>) b
group by Participant, CollectionDate
) bsum
on b.Participant = bsum.Participant and
b.CollectionDate = bsum.CollectionDate
where NumUNs <> 1 or NumTests <> 1
If I understand the problem, the basic pattern for this sort of query is simply to include negating or exclusionary conditions in your join. I.E., self-join where columnA matches, but columns B and C do not:
select
[columns]
from
table t1
join table t2 on (
t1.NonPkId = t2.NonPkId
and t1.PkId != t2.PkId
and t1.category != t2.category
)
Put the conditions in the WHERE clause if it benchmarks better:
select
[columns]
from
table t1
join table t2 on (
t1.NonPkId = t2.NonPkId
)
where
t1.PkId != t2.PkId
and t1.category != t2.category
And it's often easiest to start with the self-join, treating it as a "base table" on which to join all related information:
select
[columns]
from
(select
[columns]
from
table t1
join table t2 on (
t1.NonPkId = t2.NonPkId
)
where
t1.PkId != t2.PkId
and t1.category != t2.category
) bt
join [othertable] on (<whatever>)
join [othertable] on (<whatever>)
join [othertable] on (<whatever>)
This can allow you to focus on getting that self-join right, without interference from other tables.

Using MAX for date but adding column to group on 'breaks' the query - sub query?

I Have a table which holds date but I need to know the latest date where a condition is true per location, only issue is once I add a column called 'notes' it breaks the query and returns too many rows, current query is ...
SELECT
Location,
MAX(date) AS date,
type,
notes
FROM NotesTable a
INNER JOIN Location b on a.LocationID = b.LocationID
INNER JOIN Type c on a.typeid = c.typeid
WHERE typeid <> 8
GROUP BY Location, type, notes
If I comment out the notes column then it works fine but as soon as I add that to the grouping it then returns more rows than required.
Have tried using a subquery but still cant get it working, subquery below
SELECT
r.location,
r.date,
r.type,
t.notes
FROM (SELECT Location, MAX(date), type
FROM NotesTable a INNER JOIN Location b on a.LocationID = b.LocationID
INNER JOIN Type c on a.typeid = c.typeid
WHERE typeid <> 8
GROUP BY location,type
) r
INNER JOIN NotesTable t ON t.date = r.date
Anyone got any other suggestions?
select * from
(
SELECT Location,Date, Type, Notes, Row_Number() Over (Partition By Location, Type order by date desc) RN
FROM
NotesTable a
INNER JOIN Location b on a.LocationID = b.LocationID
INNER JOIN Type c on a.typeid = c.typeid
WHERE typeid <> 8
) v
WHERE rn = 1
Your query is almost correct, you need to add this additional condition in ON clause
AND
t.location = r.location AND
t.type = r.type
full query,
SELECT r.location
, r.DATE
, r.type
, t.notes
FROM (
SELECT Location
, MAX(DATE) maxDate
, type
FROM NotesTable a
INNER JOIN Location b
ON a.LocationID = b.LocationID
INNER JOIN Type c
ON a.typeid = c.typeid
WHERE typeid <> 8
GROUP BY location
, type
) r
INNER JOIN NotesTable t
ON t.DATE = r.maxDate AND
t.location = r.location AND
t.type = r.type

selecting the max values based on a count

How can i retrieve the max of each ValueCount based on the firmid. I need the data to be output like so.
My code is below
SELECT
F.FirmID,
F.Name,
DL.ValueId,
DL.ValueName,
count(DL.ValueName) AS ValueCount
FROM
dbo.Jobs AS J
INNER JOIN DimensionValues AS DV ON
DV.CrossRef = J.JobId
INNER JOIN dbo.DimensionLists AS DL ON
DV.ValueId = DL.ValueId
INNER JOIN Firms AS F ON
F.FirmId = J.ClientFirmId
WHERE
DL.DimensionId = 4
GROUP BY
F.FirmID,
F.Name,
DL.ValueName,
DL.ValueId
this produces something like
firmid | value | count
1 1 5
1 2 10
2 3 1
2 1 6
i need to return back the records with 10 and 6.
EDIT : SQL 2005 answer deleted.
Then you could push your results into a temporary table (or table variable) and do something like this...
SELECT
*
FROM
TempTable
WHERE
ValueCount = (SELECT MAX(ValueCount) FROM TempTable AS Lookup WHERE FirmID = TempTable.FirmID)
Or...
SELECT
*
FROM
TempTable
INNER JOIN
(SELECT FirmID, MAX(ValueCount) AS ValueCount FROM TempTable GROUP BY FirmID) AS lookup
ON lookup.FirmID = TempTable.FirmID
AND lookup.ValueCount = TempTable.ValueCount
These will give multiple records if any ValueCount is tied with another for the same FirmID. As such, you could try this...
SELECT
*
FROM
TempTable
WHERE
value = (
SELECT TOP 1
value
FROM
TempTable as lookup
WHERE
FirmID = TempTable.FirmID
ORDER BY
ValueCount DESC
)
For this problem you need to produce the result set of the query in order to determine the Max ValueCount, then you need to do the query again to pull just the records with Max ValueCount. You can do this many way, like repeating the main query as subqueries, and in SQL Server 2005/2008 by using a CTE. I think using the subqueries gets a little messy and would prefer the CTE, but for SQL Server 2000 you don't have that as an option. So, I've used a temp table instead of a CTE. I run it once to get the MaxValueCount and save that into a temp table, then run the query again and join against the temp table to get just the record with MaxValueCount.
create table #tempMax
(
FirmID int,
MaxValueCount int
)
insert #tempMax
SELECT t.FirmID, MAX(t.ValueCount) AS MaxValueCount
FROM (
SELECT F.FirmID, F.Name, DL.ValueId, DL.ValueName
, count(DL.ValueName) AS ValueCount
FROM dbo.Jobs AS J
INNER JOIN DimensionValues AS DV ON DV.CrossRef = J.JobId
INNER JOIN dbo.DimensionLists AS DL ON DV.ValueId = DL.ValueId
INNER JOIN Firms AS F ON F.FirmId = J.ClientFirmId
WHERE DL.DimensionId = 4
GROUP BY F.FirmID, F.Name, DL.ValueName, DL.ValueId) t
SELECT t.FirmID, t.Name, t.ValueID, t.ValueName, t.ValueCount
FROM (
SELECT F.FirmID, F.Name, DL.ValueId, DL.ValueName
, count(DL.ValueName) AS ValueCount
FROM dbo.Jobs AS J
INNER JOIN DimensionValues AS DV ON DV.CrossRef = J.JobId
INNER JOIN dbo.DimensionLists AS DL ON DV.ValueId = DL.ValueId
INNER JOIN Firms AS F ON F.FirmId = J.ClientFirmId
WHERE DL.DimensionId = 4
GROUP BY F.FirmID, F.Name, DL.ValueName, DL.ValueId) t
INNER JOIN #tempMax m ON t.FirmID = m.FirmID and t.ValueCount = m.MaxValueCount
DROP TABLE #tempMax
You should be able to use a derived table for this:
SELECT F.FirmID,
F.Name,
DL.ValueId,
DL.ValueName,
T.ValueCount
FROM Jobs J
INNER JOIN DimensionValues DV
ON DV.Crossref = J.JobID
INNER JOIN DimensionList DL
ON DV.ValueID = DL.ValueID
INNER JOIN Firms F
ON F.FirmID = J.ClientFirmID
--derived table
INNER JOIN (SELECT FirmID, MAX(ValueName) ValueCount FROM DimensionList GROUP BY FirmID) T
ON T.FirmID = F.FirmID
WHERE DL.DimensionId = 4
TBL1 and TBL2 is your query:
SELECT *
FROM TBL1
WHERE
TBL1.ValueCount = (SELECT MAX(TBL2.ValueCount) FROM TBL2 WHERE TBL2.FIRMID = TBL1.FIRMID)