combine two sql queries having each a with clause - sql

I have got two queries who are doing the job for me but I would like to combine them to have the results in one table instead of copy past the results into excel.
First query gives me the number of users whose at least one subscription have expired per month in 2018:
WITH UniqueUsers AS
(
SELECT DISTINCT MONTH(ValidTo) ExpireMonth, UserId
FROM UserInAppPurchase
WHERE YEAR(ValidTo) = 2018
)
SELECT ExpireMonth, COUNT(UserId) UserCount
FROM UniqueUsers
GROUP BY ExpireMonth order by ExpireMonth;
Second query gives me the number of users whose at least made one subscription purchase per month in 2018:
WITH UniqueUsers AS
(
SELECT DISTINCT MONTH(PurchaseDate) PurchaseMonth, UserId
FROM UserInAppPurchase
WHERE YEAR(PurchaseDate) = 2018
)
SELECT PurchaseMonth, COUNT(UserId) UserCount
FROM UniqueUsers
GROUP BY PurchaseMonth order by PurchaseMonth;
Actually the PurchaseMonth and ExpireMonth are the same.
My expected output is: 1st column: months of 2018
2nd column: results from first query
3nd column: results from second query
It is not a big harm to just copy the two results and combine them manually, but I am curious how to do it directly in SQL.
Thanks for the help

I would unpivot the dates and just do aggregation:
SELECT MONTH(dte) as mon,
COUNT(DISTINCT ExpireUserId) as numExpiredUsers,
COUNT(DISTINCT PurchaseUserId) as numPurchaseUsers
FROM UserInAppPurchase uiap CROSS APPLY
(VALUES (ValidTo, UserId, NULL),
(PurchaseDate, NULL, UserId)
) v(dte, ExpireUserId, PurchaseUserId)
WHERE dte >= '2018-01-01' AND dte < '2019-01-01'
GROUP BY MONTH(dte)
ORDER BY MONTH(dte);
No subqueries, explicit JOINs, or CTEs are really needed for this logic.

You can use smth like this:
with months as
(
SELECT MONTH(ValidTo) as month_
FROM UniqueUsers
WHERE YEAR(ValidTo) = 2018
union
SELECT MONTH(PurchaseDate)
FROM UniqueUsers
WHERE YEAR(PurchaseDate) = 2018
),
UniqueUsers AS
(
SELECT MONTH(PurchaseDate) as ExpireMonth,
COUNT(UserId) UserCount
FROM UniqueUsers
WHERE YEAR(ValidTo) = 2018
GROUP BY MONTH(PurchaseDate)
),
UniqueUsers1 AS
(
SELECT MONTH(PurchaseDate) as PurchaseMonth,
COUNT(UserId) UserCount1
FROM UniqueUsers
WHERE YEAR(PurchaseDate) = 2018
GROUP BY MONTH(PurchaseDate)
)
select m.month_,
u.UserCount,
u1.UserCount1
from months m
left join UniqueUsers u
on m.month_ = u.ExpireMonth
left join UniqueUsers1 u1
on m.month_ = u1.PurchaseMonth
order by m.month_;
But I'm not sure your order by clause is what you want. Your months will be sorted as 1, 11, 12. Maybe you wanted another order, I just rewrote your queries in order to combine them.

Here you are, you can join CTEs
;WITH UniqueUsers1
AS (
SELECT DISTINCT MONTH(ValidTo) ExpireMonth
,UserId
FROM UserInAppPurchase
WHERE YEAR(ValidTo) = 2018
)
,UniqueUsers2
AS (
SELECT DISTINCT MONTH(PurchaseDate) PurchaseMonth
,UserId
FROM UserInAppPurchase
WHERE YEAR(PurchaseDate) = 2018
)
SELECT *
FROM (
SELECT ExpireMonth
,COUNT(UserId) UserCount
FROM UniqueUsers1 c1
GROUP BY ExpireMonth
) First
INNER JOIN (
SELECT PurchaseMonth
,COUNT(UserId) UserCount
FROM UniqueUsers2
GROUP BY PurchaseMonth
) Sec ON First.ExpireMonth = Sec.PurchaseMonth

You can store each result in separate temporary table, for example create two temporary tables like this:
CREATE Table #ExpiryDates (ExpireMonth int, TotalUsers int)
CREATE Table #PurchaseDates (PurchaseMonth int, TotalUsers int)
And then store first query result into the temporary table #ExpirayDates
;WITH UniqueUsers AS
(
SELECT DISTINCT MONTH(ValidTo) ExpireMonth, UserId
FROM UserInAppPurchase
WHERE YEAR(ValidTo) = 2018
)
INSERT INTO #ExpiryDates (ExpireMonth, TotalUsers)
SELECT ExpireMonth, COUNT(UserId) UserCount
FROM UniqueUsers
GROUP BY ExpireMonth order by ExpireMonth;
Second Query will be like this:
;WITH UniqueUsers AS
(
SELECT DISTINCT MONTH(PurchaseDate) PurchaseMonth, UserId
FROM UserInAppPurchase
WHERE YEAR(PurchaseDate) = 2018
)
INSERT INTO #PurchaseDates (PurchaseMonth, TotalUsers)
SELECT PurchaseMonth, COUNT(UserId) UserCount
FROM UniqueUsers
GROUP BY PurchaseMonth order by PurchaseMonth;
Finally, compine these into one query
SELECT p.PurchaseMonth MonthNumber, p.TotalUsers As UsersMadePurchases,
e.ExpireMonth, e.TotalUsers As UsersExpiredSubs
FROM #PurchaseDates p
LEFT JOIN #ExpiryDates e
ON e.ExpireMonth = p.PurchaseMonth
Result will be:
MonthNumber UsersMadePurchases ExpireMonth UsersExpiredSubs
4 1 NULL NULL
5 2 5 2
6 2 6 2
7 1 7 1
8 1 8 1
Let me know if this solves your problem.

Related

SQL : How to select the most recent value by country

I have a table with 3 columns : day, country, value. There are many values by country with different dates. For example :
DAY COUNTRY VALUE
04-SEP-19 BELGIUM 2124
15-MAR-19 BELGIUM 2135
21-MAY-19 SPAIN 1825
18-JUL-19 SPAIN 1724
26-MAR-19 ITALY 4141
I want to select the most recent value by country. For example :
DAY COUNTRY VALUE
04-SEP-19 BELGIUM 2124
18-JUL-19 SPAIN 1724
26-MAR-19 ITALY 4141
What is the sql query I can use?
Thank you for your help.
You can use the row_number() window function (if your DBMS supports it)).
SELECT x.day,
x.country,
x.value
FROM (SELECT t.day,
t.country,
t.value,
row_number() OVER (PARTITION BY t.country
ORDER BY t.day DESC) rn
FROM elbat t) x
WHERE x.rn = 1;
Another way of doing this is using a window function (SQL Server, MySQL8 etc)
e.g.
ROW_NUMBER() OVER ( PARTITION BY COUNTRY ORDER BY CONVERT(DATE, [Day]) DESC )
Then just filter to where this function returns 1
full example:
WITH TestData
AS ( SELECT '04-SEP-19' AS [Day], 'BELGIUM' AS [COUNTRY], 2124 AS [VALUE]
UNION
SELECT '15-MAR-19' AS [Day], 'BELGIUM' AS [COUNTRY], 2135 AS [VALUE]
UNION
SELECT '21-MAY-19' AS [Day], 'SPAIN' AS [COUNTRY], 1825 AS [VALUE]
UNION
SELECT '18-JUL-19' AS [Day], 'SPAIN' AS [COUNTRY], 1724 AS [VALUE]
UNION
SELECT '26-MAR-19' AS [Day], 'ITALY' AS [COUNTRY], 4141 AS [VALUE] ),
TestDataRanked
AS ( SELECT *,
ROW_NUMBER() OVER ( PARTITION BY COUNTRY ORDER BY CONVERT(DATE, [Day]) DESC ) AS SelectionRank
FROM TestData )
SELECT [Day],
COUNTRY,
[VALUE]
FROM TestDataRanked
WHERE SelectionRank = 1;
I understand the problem as you want the most recent value for all countries, as the country can repeat in the table(?):
select distinct t1.DAY, t1.COUNTRY, t1.VALUE
FROM day_test t1
inner join day_test t2 on t1.day in
(select max(day) from day_test t3 where t1.country = t3.country )
and t1.country = t2.country
I tested it and it works.
Let's suppose that the type of day column is date.
In the subquery, you can find the tuple of (country, max date) and to add the value, you can join as mentioned in the comments or use IN
SELECT DISTINCT day, country, value
FROM yourTable
WHERE (country, day)
in (
SELECT country, MAX(day) as day
FROM yourTable
GROUP BY country, value
)
You can use the following query:
Just replace the TABLE_NAME with the name of your table.
SELECT
COUNTRY,
VALUE,
MAX(DATE) AS "MostRecent"
FROM TABLE_NAME
GROUP BY COUNTRY;

Group by join date + add month in where sql

I have the following table:
PERSON
ID
Name
date_created
date_left
What I want is a list of all months and the amount of users joined and the amount of users that left.
I already have the following query: it returns the amount of new users that joined in the month that I pass:
select MONTH(date_created) 'Month', YEAR(date_created) 'Year', count(*) as 'New Users'
from person p
where YEAR(date_created) = 2018 and MONTH( p.date_created) = 5
group by MONTH(date_created), YEAR(date_created)
It returns what I want:
How would I edit this to include a year report and add the column 'Users left' next to the 'new users' one?
My result would be:
MONTH YEAR NEW USERS USERS LEFT
1 2019 10 5
I would "unpivot" the data using cross apply:
select v.[year], v.[month], sum(v.iscreated) as num_created,
sum(v.isleft) as num_left
from person p cross apply
(values (year(p.date_created), month(p.date_created), 1, 0),
(year(p.date_left), month(p.date_left), 0, 1)
) v([year], [month], iscreated, isleft)
group by v.[year], v.[month]
order by v.[year], v.[month];
The straight-forward approach is probably to full outer join all entries and all leaves. SQL Server makes this a bit awkward by not featuring USING, so we must use ON and COALESCE on month and year instead.
select
coalesce(pin.year, pout.year) as year,
coalesce(pin.month, pout.month) as month,
coalesce(pin.cnt, 0) as count_in,
coalesce(pout.cnt, 0) as count_out
from
(
select year(date_created) as year, month(date_created) as month, count(*) as cnt
from person
group by year(date_created), month(date_created)
) pin
full outer join
(
select year(date_left) as year, month(date_left) as month, count(*) as cnt
from person
group by month(date_left), year(date_left)
) pout on pout.year = pin.year and pout.month = pin.month
order by year, month;
Maybe you could do it with a SubSelect? Tried it right now with ORACLE Syntax, I'm not sure if it works in SQL-Server.
SELECT * FROM
(
select MONTH(date_created) 'Month_C', YEAR(date_created) 'Year_C', count(*) as 'New Users'
from person p
where YEAR(date_created) = 2018 and MONTH( p.date_created) = 5
group by MONTH(date_created), YEAR(date_created)
)created_user,
(
select MONTH(date_left) 'Month_L', YEAR(date_left) 'Year_L', count(*) as 'New Users'
from person p
where YEAR(date_left) = 2018 and MONTH( p.date_left) = 5
group by MONTH(date_left), YEAR(date_left)
) left_user
where created_user.Year_C = left_user.Year_L
and created_user.Month_C = left_user.Month_L

Group by in columns and rows, counts and percentages per day

I have a table that has data like following.
attr |time
----------------|--------------------------
abc |2018-08-06 10:17:25.282546
def |2018-08-06 10:17:25.325676
pqr |2018-08-05 10:17:25.366823
abc |2018-08-06 10:17:25.407941
def |2018-08-05 10:17:25.449249
I want to group them and count by attr column row wise and also create additional columns in to show their counts per day and percentages as shown below.
attr |day1_count| day1_%| day2_count| day2_%
----------------|----------|-------|-----------|-------
abc |2 |66.6% | 0 | 0.0%
def |1 |33.3% | 1 | 50.0%
pqr |0 |0.0% | 1 | 50.0%
I'm able to display one count by using group by but unable to find out how to even seperate them to multiple columns. I tried to generate day1 percentage with
SELECT attr, count(attr), count(attr) / sum(sub.day1_count) * 100 as percentage from (
SELECT attr, count(*) as day1_count FROM my_table WHERE DATEPART(week, time) = DATEPART(day, GETDate()) GROUP BY attr) as sub
GROUP BY attr;
But this also is not giving me correct answer, I'm getting all zeroes for percentage and count as 1. Any help is appreciated. I'm trying to do this in Redshift which follows postgresql syntax.
Let's nail the logic before presenting:
with CTE1 as
(
select attr, DATEPART(day, time) as theday, count(*) as thecount
from MyTable
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
select t1.attr, t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
From here you can pivot to create a day by day if you feel the need
I am trying to enhance the query #johnHC btw if you needs for 7days then you have to those days in case when
with CTE1 as
(
select attr, time::date as theday, count(*) as thecount
from t group by attr,time::date
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
,
CTE3 as
(
select t1.attr, EXTRACT(DOW FROM t1.theday) as day_nmbr,t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
)
select CTE3.attr,
max(case when day_nmbr=0 then CTE3.thecount end) as day1Cnt,
max(case when day_nmbr=0 then percentofday end) as day1,
max(case when day_nmbr=1 then CTE3.thecount end) as day2Cnt,
max( case when day_nmbr=1 then percentofday end) day2
from CTE3 group by CTE3.attr
http://sqlfiddle.com/#!17/54ace/20
In case that you have only 2 days:
http://sqlfiddle.com/#!17/3bdad/3 (days descending as in your example from left to right)
http://sqlfiddle.com/#!17/3bdad/5 (days ascending)
The main idea is already mentioned in the other answers. Instead of joining the CTEs for calculating the values I am using window functions which is a bit shorter and more readable I think. The pivot is done the same way.
SELECT
attr,
COALESCE(max(count) FILTER (WHERE day_number = 0), 0) as day1_count, -- D
COALESCE(max(percent) FILTER (WHERE day_number = 0), 0) as day1_percent,
COALESCE(max(count) FILTER (WHERE day_number = 1), 0) as day2_count,
COALESCE(max(percent) FILTER (WHERE day_number = 1), 0) as day2_percent
/*
Add more days here
*/
FROM(
SELECT *, (count::float/count_per_day)::decimal(5, 2) as percent -- C
FROM (
SELECT DISTINCT
attr,
MAX(time::date) OVER () - time::date as day_number, -- B
count(*) OVER (partition by time::date, attr) as count, -- A
count(*) OVER (partition by time::date) as count_per_day
FROM test_table
)s
)s
GROUP BY attr
ORDER BY attr
A counting the rows per day and counting the rows per day AND attr
B for more readability I convert the date into numbers. Here I take the difference between current date of the row and the maximum date available in the table. So I get a counter from 0 (first day) up to n - 1 (last day)
C calculating the percentage and rounding
D pivot by filter the day numbers. The COALESCE avoids the NULL values and switched them into 0. To add more days you can multiply these columns.
Edit: Made the day counter more flexible for more days; new SQL Fiddle
Basically, I see this as conditional aggregation. But you need to get an enumerator for the date for the pivoting. So:
SELECT attr,
COUNT(*) FILTER (WHERE day_number = 1) as day1_count,
COUNT(*) FILTER (WHERE day_number = 1) / cnt as day1_percent,
COUNT(*) FILTER (WHERE day_number = 2) as day2_count,
COUNT(*) FILTER (WHERE day_number = 2) / cnt as day2_percent
FROM (SELECT attr,
DENSE_RANK() OVER (ORDER BY time::date DESC) as day_number,
1.0 * COUNT(*) OVER (PARTITION BY attr) as cnt
FROM test_table
) s
GROUP BY attr, cnt
ORDER BY attr;
Here is a SQL Fiddle.

sql query - find distinct user from table

I am trying to solve a problem using SQL query and need some expert's advice.
I have below transaction table.
-- UserID, ProductId, TransactionDate
-- 1 , 2 , 2014-01-01
-- 1 , 3 , 2014-01-05
-- 2 , 2 , 2014-01-02
-- 2 , 3 , 2014-05-07
.
.
.
What I am trying to achieve is to find all user who purchased more than one product WITHIN 30 DAYS .
My query so far is like
select UserID, COUNT(distinct ProductID)
from tableA
GROUP BY UserID HAVING COUNT(distinct ProductID) > 1
I am not sure where to apply "WITH IN 30 DAYS" logic in the query .
The outcome should be :
1, 2
2, 1
Thanks in advance for your help.
Edit: Within 30 Days
SQL Fiddle
SELECT
a.UserID,
COUNT(DISTINCT ProductID)
FROM TableA a
INNER JOIN (
SELECT UserID, TransactionDate = MAX(TransactionDate)
FROM TableA
GROUP BY UserID
) AS t
ON t.UserID = a.UserID
AND a.TransactionDate >= DATEADD(DAY, -30, t.TransactionDate)
AND a.TransactionDate <= t.TransactionDate
GROUP BY a.UserID
You can use GROUP BY YEAR(TransactionDate), MONTH(TransactionDate)
SELECT
UserID,
COUNT(DISTINCT ProductID)
FROM TableA
GROUP BY
UserID, YEAR(TransactionDate), MONTH(TransactionDate)
HAVING
COUNT(DISTINCT ProductID) > 1
Just add a where clause.
SELECT UserID, COUNT(DISTINCT ProductID) cnt
FROM tableA
WHERE TransactionDate >= CAST(DATEADD(DAY,-30,GETDATE()) AS DATE)
GROUP BY UserID
HAVING COUNT(DISTINCT ProductID) > 1
This works because the where clause is performed BEFORE the Group By and Having. So first it filters out all transactions over 30 days old and then returns only people who bought two distinct products.
Query Processing Order:
http://blog.sqlauthority.com/2009/04/06/sql-server-logical-query-processing-phases-order-of-statement-execution/

How to count open records, grouped by hour and day in SQL-server-2008-r2

I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK