SQL query to conditionally select a field value - sql

I have an SQL query that joins 3 tables to return me the required data. The query is as follows:
SELECT (s.user_created,
u.first_name,
u.last_name,
u.email,
s.school_name,
p.plan_name,
substring( a.message, 11), u.phone1)
FROM cb_school s
inner join ugrp_user u
on s.user_created = u.user_id
inner join cb_plan p
on s.current_plan = p.plan_id
inner join audit a
on u.user_id = a.user_id
where s.type = 'sample'
and a.module_short = 'sample-user'
and s.created_time > current_timestamp - interval '10 day';
The query works fine if all the attributes are present. Now for few of my rows, the following value would be a.module_short = 'sample-user' missing. But since I have included it as an AND condition, those rows will not be returned. I am trying to return an empty string for that field if it is present, else the value as per my current query. Is there any way to achieve this.

Think you could possibly use a CASE WHEN statement, like this:
SELECT CASE WHEN a.module_short = 'sample-user' THEN a.module_short
ELSE '' END AS a.module_short
FROM TableA

you can use COALESCE it returns the first not null.
SELECT COALESCE(a.module_short,'')
FROM TableA AS a

SELECT (s.user_created,
u.first_name,
u.last_name,
u.email,
s.school_name,
p.plan_name,
substring( a.message, 11), u.phone1)
FROM cb_school s
INNER JOIN ugrp_user u
ON s.user_created = u.user_id
INNER JOIN cb_plan p
ON s.current_plan = p.plan_id
INNER JOIN audit a
ON u.user_id = a.user_id
AND a.module_short = 'sample-user'
WHERE s.type = 'sample'
AND s.created_time > current_timestamp - interval '10 day';

You want to show all users that have at least one module_short.
If the module_short contains 'sample-user' then it should show it, else it should show NULL as module_short. You only want 1 row per user, even if it has multiple module_shorts.
You can use a CTE, ROW_NUMBER() and the CASE clause for this question.
Example Question
I have 3 tables.
Users: Users with an ID
Modules: Modules with an ID
UserModules: The link between users and modules. You user can have multiple models.
I need a query that returns me all users that have at least 1 module with 2 columns UserName and ModuleName.
I only one 1 row for each user. The ModuleName should only display SQL if the user has that module. Else it should display no module.
Example Tables:
Users:
id name
1 Manuel
2 John
3 Doe
Modules:
id module
1 StackOverflow
2 SQL
3 StackExchange
4 SomethingElse
UserModules:
id module_id user_id
1 1 2
2 1 3
4 2 2
5 2 3
6 3 1
7 3 3
8 4 1
9 4 3
Example Query:
with CTE as (
select
u.name as UserName
, CASE
WHEN m.module = 'SQL' THEN 'SQL' ELSE NULL END as ModuleName
, ROW_NUMBER() OVER(PARTITION BY u.id
ORDER BY (CASE
WHEN m.module = 'SQL' THEN 'Ja' ELSE NULL END) DESC) as rn
from UserModules as um
inner join Users as u
on um.user_id = u.id
inner join Modules as m
on um.module_id = m.id
)
select UserName, ModuleName from CTE
where rn = 1
Example Result:
UserName ModuleName
Manuel NULL
John SQL
Doe SQL
Your query would look like this:
with UsersWithRownumbersBasedOnModule_short as (
SELECT s.user_created,
u.first_name,
u.last_name,
u.email,
s.school_name,
p.plan_name,
substring( a.message, 11),
u.phone1)
CASE
WHEN a.module_short = 'sample-user'
THEN a.module_short
ELSE NULL
END AS ModuleShort
ROW_NUMBER() OVER(PARTITION BY u.user_id ORDER BY (
CASE
WHEN a.module_short = 'sample-user'
THEN a.module_short
ELSE NULL
END) DESC) as rn
FROM cb_school s
inner join ugrp_user u
on s.user_created = u.user_id
inner join cb_plan p
on s.current_plan = p.plan_id
inner join audit a
on u.user_id = a.user_id
where s.type = 'sample'
and s.created_time > current_timestamp - interval '10 day';)
select * from UsersWithRownumbersBasedOnModule_short
where rn = 1
PS: I removed a lose bracket after SELECT and your SUBSTRING() is missing 1 parameter, it needs 3.

Related

How to optimize multiple subqueries to the same data set

Imagine I have a query like the following one:
SELECT
u.ID,
( SELECT
COUNT(*)
FROM
POSTS p
WHERE
p.USER_ID = u.ID
AND p.TYPE = 1
) AS interesting_posts,
( SELECT
COUNT(*)
FROM
POSTS p
WHERE
p.USER_ID = u.ID
AND p.TYPE = 2
) AS boring_posts,
( SELECT
COUNT(*)
FROM
COMMENTS c
WHERE
c.USER_ID = u.ID
AND c.TYPE = 1
) AS interesting_comments,
( SELECT
COUNT(*)
FROM
COMMENTS c
WHERE
c.USER_ID = u.ID
AND c.TYPE = 2
) AS boring_comments
FROM
USERS u;
( Hopefully it's correct because I just came up with it and didn't test it )
where I try to calculate the number of interesting and boring posts and comments that the user has.
Now, the problem with this query is that we have 2 sequential scans on both the posts and comments table and I wonder if there is a way to avoid that?
I could probably LEFT JOIN both posts and comments to the users table and do some aggregation but it's gonna generate a lot of rows before aggregation and I am not sure if that's a good way to go.
Aggregate posts and comments and outer join them to the users table.
select
u.id as user_id,
coaleasce(p.interesting, 0) as interesting_posts,
coaleasce(p.boring, 0) as boring_posts,
coaleasce(c.interesting, 0) as interesting_comments,
coaleasce(c.boring, 0) as boring_comments
from users u
left join
(
select
user_id,
count(case when type = 1 then 1 end) as interesting,
count(case when type = 2 then 1 end) as boring
from posts
group by user_id
) p on p.user_id = u.id
left join
(
select
user_id,
count(case when type = 1 then 1 end) as interesting,
count(case when type = 2 then 1 end) as boring
from comments
group by user_id
) c on c.user_id = u.id;
compare results and execution plan (here you scan posts once):
with c as (
select distinct
count(1) filter (where TYPE = 1) over (partition by USER_ID) interesting_posts
, count(1) filter (where TYPE = 2) over (partition by USER_ID) boring_posts
, USER_ID
)
, p as (select USER_ID,max(interesting_posts) interesting_posts, max(boring_posts) boring_posts from c)
SELECT
u.ID, interesting_posts,boring_posts
, ( SELECT
COUNT(*)
FROM
COMMENTS c
WHERE
c.USER_ID = u.ID
) AS comments
FROM
USERS u
JOIN p on p.USER_ID = u.ID

How to optimize SQL Server query

I am copying data from one table to another table. While copying I am doing some calculation to modify one column.
SQL Server query:
INSERT INTO rat_proj_duration_map_2
SELECT
r.*,
r.hour_val / (CASE
WHEN week_val = 1 AND
(SELECT TOP 1
hrswk
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE calwk = 2
AND r.uid = u.uid
AND yr = 2016)
> 0 THEN (SELECT TOP 1
hrswk
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE calwk = 2
AND r.uid = u.uid
AND yr = 2016)
WHEN (SELECT
hrswk
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE r.week_val = us.calwk
AND r.uid = u.uid
AND yr = 2016)
< 1 AND
(SELECT
MAX(hrswk)
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE r.uid = u.uid
AND yr = 2016)
> 0 THEN (SELECT
MAX(hrswk)
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE r.uid = u.uid
AND yr = 2016)
WHEN (SELECT
COUNT(*)
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE r.uid = u.uid
AND yr = 2016)
<= 0 THEN 1
ELSE (SELECT
hrswk
FROM UserProfileRATinterface_view us
INNER JOIN users u
ON u.username = us.username
WHERE r.week_val = us.calwk
AND r.uid = u.uid
AND yr = 2016)
END) * 100 AS percentage_val
FROM rat_proj_duration_map r
When I run this query I getting time out issue.
TCP Provider: Timeout error [258]
SQL Server is not in my hand to increase time out value.
Is it possible to optimize my SQL query?
Are you sure this query is logically correct? You have several TOP 1s without specific ORDER BY, scalar comparison of subselect without TOP (which, I assume, may return more than one row if you are using top in other subselects with same source).
And yes - this query can be optimized. You can obtain all the values you need with a single subselect statement and avoid multiple execution of same subselects for each row of rat_proj_duration_map which you are having now:
INSERT INTO rat_proj_duration_map_2
SELECT
r.*,
r.hour_val / (CASE
WHEN week_val = 1 AND us.min_hrswk_2 > 0
THEN us.min_hrswk_2
WHEN us.min_hrswk_week_val <1
AND max_hrswk > 0
THEN max_hrswk
WHEN us.cnt <= 0
THEN 1
ELSE min_hrswk_week_val
END) * 100 as percentage_val
FROM
rat_proj_duration_map r
OUTER APPLY
(
SELECT
count(*) as cnt,
MIN(CASE WHEN calcw = 2 THEN hrswk END) as min_hrswk_2,
MIN(CASE WHEN calcw = r.week_val THEN hrswk END) as min_hrswk_week_val,
MAX(hrswk) as max_hrswk
FROM UserProfileRATinterface_view us
inner join users u on u.username=us.username
WHERE r.uid=u.uid and yr=2016
) us
But I can't be sure if original logic is correct. And the idea of that case to me looks like this:
...
r.hour_val / COALESCE(NULLIF(us.min_hrswk_2, 0),
NULLIF(us.min_hrswk_week_val, 0), NULLIF(max_hrswk, 0), 1)
...
The subqueries in your case clause seem to be essentially the same. You could simplify the whole command by defining a grouped version (... where yr=2016 group by u.uid) of this subquery (preferrably as a common table expression) and then work with that. This could potentially save a lot of redundant operations.
The following might work (have not tested it):
;WITH usrall as (
SELECT u.uid ui, hrswk hw, r.week wk, us.calwk cw
FROM UserProfileRATinterface_view us
INNER JOIN users u on u.username=us.username
WHERE r.uid=u.uid and yr=2016
), usrgrp as (
SELECT ui gui, MAX(hrswk) ghw, count(*) gcnt FROM usrall group by ui
), denom as (
SELECT gui dui, COALESCE( MAX(w2.hw), MAX(wkwc.hw), MAX(gwh) ) dnm
FROM usrgrp
LEFT JOIN usrall w2 ON w2.ui=gui AND w2.cw=2 AND w2.hw>0
LEFT JOIN usrall wkcw ON wkcw.ui=gui AND wkcw.wk=wkcw.cw AND wkwc.hw<1
GROUP BY gui
)
SELECT r.*, r.hour_val / d.dnm
FROM rat_proj_duration_map r
INNER JOIN denom d ON d.dui=u.uid
Essentially I have tried (I hope it works :-/) to replace the case construct by a COALESCE() function that checks the three possible calculated values one after the other. The first non-null value is accepted.
As I said: I have not tested it. Good luck

SQL Query to check if joining record exists

I have 2 tables - User table (tblUsers) and Logs table (tblLogs).
The User table contains - UserID and UserName.
The Logs table contains - UserID, ApplicationID, ApplicationName, LogonDateTime
I want to select all the Users and find if they have accessed a particular application (for example ApplicationID = 3)
Output expected
UserID, UserName, Status
Status - If he has visited the applicationID = 3 or not.
Since you are missing a third table which contain all the applications available ( i guess it's [tblApplications] ), the sql would be:
SELECT
U.UserID
, U.UserName
, (
CASE WHEN L.ApplicationID IS NOT NULL
THEN '1'
ELSE '0'
END
) AS Status
FROM tblUsers AS U, tblApplications AS A
LEFT OUTER JOIN tblLogs AS L
ON A.ID = L.ApplicationID
WHERE A.ID = '3'
Use a CASE expression to check the status.
Query
select t1.UserId, t1.UserName,
max(case when t1.UserId in
(select UserId from tblLogs where ApplicationID = 3) then 'Visited'
else 'Not Visited' end) as `status`
from tblUsers t1
left join tblLogs t2
on t1.UserID = t2.UserID
group by t1.UserId, t1.UserName;
SQl Fiddle Demo
I hope this query give you the direction of thought as to what you exact requirement is:
SELECT u.UserID,
u.UserName,
(SELECT (CASE
WHEN COUNT(*) > 0 THEN
'VISITED'
ELSE
NULL
END)
FROM tblLogs l
WHERE u.UserId = l.UserId
AND l.ApplicationID = '3')
AS status
FROM tblUsers u
Note that based on the DB that you use, the group by on UserName and ApplicationName might be optional.

SQL count number of users hava a value > 1

I need a select which brings two lines, one with the number of people with the " number of hits " > 0 and the other line with the number of people with the " number of hits " = 0
SELECT u.name as 'Usuário',u.full_name as 'Nome do Usuário',count(l.referer) as 'Número de Acessos'
FROM mmp_user u
LEFT JOIN MMP_MMPUBLISH_LOG l
on u.id=l.user_id
AND l.event_date between '2015-08-01' and '2015-08-08'
group by u.name,u.full_name
order by count(l.referer) desc
I have,
151 Users
9 accessed and
142 not accessed.
But i don't return this values in select, help me please.
Table mmp_user fields (ID,CREATED_BY,AVATAR_ID,CREATION_DATE,EMAIL,FULL_NAME,LAST_EDITED_BY,LAST_EDITION_DATE,NAME,OBSERVATION,USER_PASSWORD,PASSWORD_REMINDER,SIGNATURE,STATUS,ADMINISTRATOR,DESIGNER,SECURITY_OFFICE,PUBLISHER,BRANCH_ID,DEPARTMENT_ID,EXTENSION,PHONE,COMPANY_ID,POSITION,ADMISSION_DATE,PASSWORD_LAST_EDITION_DATE,DISMISSED_DATE,NEWSLETTER,EXPIRE_DATE,COMPANY,BRANCH,DEPARTMENT,AREA_ID,SITE,USER_NUMBER,PREFIX_HOME_PHONE,PREFIX_MOBILE_PHONE,ADDRESS,ADDRESS_COMPLEMENT,ADDRESS_TYPE,CITY,NEIGHBORHOOD,STATE,ZIP_CODE,BIRTHDATE,GENDER,HOME_PHONE,MOBILE_PHONE,CPF,MARIAGE_STATUS,NATIONALITY,RG,EDUCATION,URL_SITE,FIRST_NAME,LAST_NAME,ID_SAP,PASSWORD_GAFISA,NICKNAME,CODE_POSITION,CREATION_USER_ORIGIN,LEVEL_POSITION,BIRTH_DATE_VISIBILITY,HOME_PHONE_COUNTRY_PREFIX,HOME_PHONE_VISIBILITY,MOBILE_PHONE_COUNTRY_PREFIX,MOBILE_PHONE_VISIBILITY,AREA_PREFIX,COUNTRY_PREFIX,PHONE_OBSERVATION,RESPONSIBLE,RESOURCE_ID,AVATAR_RF_ID,RESOURCE_AVATAR_ID,AVATAR_URL_LUCENE,avatarurl,PASSWORD_EXCHANGE,USER_NAME_EXCHANGE,DOMAIN_EXCHANGE,I18N,LAST_IMPORT_FILE,HIERARCHY_POSITION,SECRET_NICKNAME,PROFILE_TYPE,NOT_VIEW_USER,CHANGE_POSITION_DATE,DISTINGUISHED_NAME,OU_USER,AUTH_TOKEN,AUTH_TOKEN_EXPIRATION)
TableMMP_MMPUBLISH_LOG fields (ID,MMPUBLISH_LOG_TYPE,EVENT_DATE,USER_ID,TRANSACTION_NAME,USER_IP,USER_LOGIN,USER_NAME,SESSION_ID,REFERER,PUBLISHING_OBJECT_ID,PUBLISHING_OBJECT_NAME,PHASE_ID,PHASE_NAME,PHASE_COMMENT,ACCESS_URL,HOME_PAGE_ID,HOMEPAGE_ID,phaseComment,phaseId,phaseName,PO_VERSION_NUMBER)
Thanks
You could wrap this query with another query and apply a case expression to the count:
SELECT access_code, COUNT(*)
FROM (SELECT u.name,
u.full_name,
CASE WHEN COUNT(l.referer) > 0 THEN 'access'
ELSE 'no access'
END as access_code
FROM mmp_user u
LEFT JOIN mmp_mmpupluish_log l ON
u.id=l.user_id AND
l.event_date BETWEEN '2015-08-01' AND '2015-08-08'
GROUP BY u.name, u.full_name) t
GROUP BY access_code
ORDER BY access_code ASC
SELECT u.name Usuário, u.full_name [Nome do Usuário],
count(l.referer) [Número de Acessos],
Sum(case when NumberOfHits = 0 then 1 else 0 end) ZeroHitsCount,
Sum(case when NumberOfHits > 0 then 1 else 0 end) HasSomeHitsCount
FROM mmp_user u
LEFT JOIN MMP_MMPUBLISH_LOG l
on u.id=l.user_id
AND l.event_date between '2015-08-01' and '2015-08-08'
group by u.name, u.full_name
order by count(l.referer) desc
Use a case statement:
SELECT (case when l.referer is null then 'Not Accessed'
else 'Accessed'
end) as which,
count(*) as 'Número de Acessos'
FROM mmp_user u LEFT JOIN
MMP_MMPUBLISH_LOG l
on u.id = l.user_id AND
l.event_date between '2015-08-01' and '2015-08-08'
group by (case when l.referer is null then 'Not Accessed'
else 'Accessed'
end)
order by count(l.referer) desc;
Actually, the above counts the number of accesses. One way to get the number of users is to use count(distinct u.id). Another way uses a subquery:
select AccessType, count(*)
from (select u.*,
(case when exists (select 1
from MMP_MMPUBLISH_LOG l
where u.id = l.user_id AND
l.event_date between '2015-08-01' and '2015-08-08'
)
then 'Accessed' else 'Not Accessed'
end) as AccessType
from mmp_user u
) u
group by AccessType;

Remove grouped data set when total of count is zero with subquery

I'm generating a data set that looks like this
category user total
1 jonesa 0
2 jonesa 0
3 jonesa 0
1 smithb 0
2 smithb 0
3 smithb 5
1 brownc 2
2 brownc 3
3 brownc 4
Where a particular user has 0 records in all categories is it possible to remove their rows form the set? If a user has some activity like smithb does, I'd like to keep all of their records. Even the zeroes rows. Not sure how to go about that, I thought a CASE statement may be of some help but I'm not sure, this is pretty complicated for me. Here is my query
SELECT DISTINCT c.category,
u.user_name,
CASE WHEN (
SELECT COUNT(e.entry_id)
FROM category c1
INNER JOIN entry e1
ON c1.category_id = e1.category_id
WHERE c1.category_id = c.category_id
AND e.user_name = u.user_name
AND e1.entered_date >= TO_DATE ('20140625','YYYYMMDD')
AND e1.entered_date <= TO_DATE ('20140731', 'YYYYMMDD')) > 0 -- I know this won't work
THEN 'Yes'
ELSE NULL
END AS TOTAL
FROM user u
INNER JOIN role r
ON u.id = r.user_id
AND r.id IN (1,2),
category c
LEFT JOIN entry e
ON c.category_id = e.category_id
WHERE c.category_id NOT IN (19,20)
I realise the case statement won't work, but it was an attempt on how this might be possible. I'm really not sure if it's possible or the best direction. Appreciate any guidance.
Try this:
delete from t1
where user in (
select user
from t1
group by user
having count(distinct category) = sum(case when total=0 then 1 else 0 end) )
The sub query can get all the users fit your removal requirement.
count(distinct category) get how many category a user have.
sum(case when total=0 then 1 else 0 end) get how many rows with activities a user have.
There are a number of ways to do this, but the less verbose the SQL is, the harder it may be for you to follow along with the logic. For that reason, I think that using multiple Common Table Expressions will avoid the need to use redundant joins, while being the most readable.
-- assuming user_name and category_name are unique on [user] and [category] respectively.
WITH valid_categories (category_id, category_name) AS
(
-- get set of valid categories
SELECT c.category_id, c.category AS category_name
FROM category c
WHERE c.category_id NOT IN (19,20)
),
valid_users ([user_name]) AS
(
-- get set of users who belong to valid roles
SELECT u.[user_name]
FROM [user] u
WHERE EXISTS (
SELECT *
FROM [role] r
WHERE u.id = r.[user_id] AND r.id IN (1,2)
)
),
valid_entries (entry_id, [user_name], category_id, entry_count) AS
(
-- provides a flag of 1 for easier aggregation
SELECT e.[entry_id], e.[user_name], e.category_id, CAST( 1 AS INT) AS entry_count
FROM [entry] e
WHERE e.entered_date BETWEEN TO_DATE('20140625','YYYYMMDD') AND TO_DATE('20140731', 'YYYYMMDD')
-- determines if entry is within date range
),
user_categories ([user_name], category_id, category_name) AS
( SELECT u.[user_name], c.category_id, c.category_name
FROM valid_users u
-- get the cartesian product of users and categories
CROSS JOIN valid_categories c
-- get only users with a valid entry
WHERE EXISTS (
SELECT *
FROM valid_entries e
WHERE e.[user_name] = u.[user_name]
)
)
/*
You can use these for testing.
SELECT COUNT(*) AS valid_categories_count
FROM valid_categories
SELECT COUNT(*) AS valid_users_count
FROM valid_users
SELECT COUNT(*) AS valid_entries_count
FROM valid_entries
SELECT COUNT(*) AS users_with_entries_count
FROM valid_users u
WHERE EXISTS (
SELECT *
FROM user_categories uc
WHERE uc.user_name = u.user_name
)
SELECT COUNT(*) AS users_without_entries_count
FROM valid_users u
WHERE NOT EXISTS (
SELECT *
FROM user_categories uc
WHERE uc.user_name = u.user_name
)
SELECT uc.[user_name], uc.[category_name], e.[entry_count]
FROM user_categories uc
INNER JOIN valid_entries e ON (uc.[user_name] = e.[user_name] AND uc.[category_id] = e.[category_id])
*/
-- Finally, the results:
SELECT uc.[user_name], uc.[category_name], SUM(NVL(e.[entry_count],0)) AS [entry_count]
FROM user_categories uc
LEFT OUTER JOIN valid_entries e ON (uc.[user_name] = e.[user_name] AND uc.[category_id] = e.[category_id])
Here's another method:
WITH totals AS (
SELECT
c.category,
u.user_name,
COUNT(e.entry_id) AS total,
SUM(COUNT(e.entry_id)) OVER (PARTITION BY u.user_name) AS user_total
FROM
user u
INNER JOIN
role r ON u.id = r.user_id
CROSS JOIN
category c
LEFT JOIN
entry e ON c.category_id = e.category_id
AND u.user_name = e.user_name
AND e1.entered_date >= TO_DATE ('20140625', 'YYYYMMDD')
AND e1.entered_date <= TO_DATE ('20140731', 'YYYYMMDD')
WHERE
r.id IN (1, 2)
AND c.category_id IN (19, 20)
GROUP BY
c.category,
u.user_name
)
SELECT
category,
user_name,
total
FROM
totals
WHERE
user_total > 0
;
The totals derived table calculates the totals per user and category as well as totals across all categories per user (using SUM() OVER ...). The main query returns only rows where the user total is greater than zero.