SQL Server select with multiple groupings - sql

I have two tables describing users and their payments:
CREATE TABLE test_users
(id int IDENTITY NOT NULL,
name varchar(25),
PRIMARY KEY (id));
CREATE TABLE test_payments
(id int IDENTITY NOT NULL,
user_id int NOT NULL,
money money NOT NULL,
date datetime NOT NULL,
PRIMARY KEY (id));
INSERT INTO test_users (name)
VALUES ('john');
INSERT INTO test_users (name)
VALUES ('peter');
INSERT INTO test_payments (user_id, money, date)
VALUES (1, $1, CONVERT(datetime, '15.12.2012'));
INSERT INTO test_payments (user_id, money, date)
VALUES (1, $2, CONVERT(datetime, '16.12.2012'));
INSERT INTO test_payments (user_id, money, date)
VALUES (2, $1, CONVERT(datetime, '16.12.2012'));
INSERT INTO test_payments (user_id, money, date)
VALUES (2, $3, CONVERT(datetime, '17.12.2012'));
INSERT INTO test_payments (user_id, money, date)
VALUES (1, $1, CONVERT(datetime, '19.12.2012'));
Table test_users:
id name
-------------
1 john
2 peter
Table test_payments:
id user_id money last_activity
---------------------------------------
1 1 1.0000 2012-12-15
2 1 2.0000 2012-12-16
3 2 1.0000 2012-12-16
4 2 3.0000 2012-12-17
5 1 1.0000 2012-12-19
I need to make a users statistic which will show me :
username
total fee for a period of time
the date of the last
user's activity (general, not for a time period).
For example taking the period 15-18.12.12 I expect the following results:
name total last_activity
--------------------------------
peter $4 2012-12-17
john $3 2012-12-19
I've tried the following query:
SELECT u.*, SUM(p.money) total, MAX(p.date) last_activity
FROM test_users u
JOIN test_payments p
ON u.id= p.user_id
WHERE p.date BETWEEN CONVERT(datetime, '15.12.2012') AND CONVERT(datetime, '18.12.2012')
GROUP BY u.id, u.name
ORDER BY total DESC;
but getting wrong result for last_activity as it is also in the date range:
id name total last_activity
--------------------------------
2 peter 4.0000 2012-12-17
1 john 3.0000 2012-12-16
Please suggest a solution.

Looks like a couple of other answers popped up while I worked on mine, but here it is anyhow. There is a working sql fiddle here: http://sqlfiddle.com/#!3/14808/6
Basically, you need a query to pull the max date regardless of the date range. I chose to do this as a correlated subquery.
SELECT
u.id,
u.name,
SUM(IsNull(money,0)) as TotalMoneyInRange,
(SELECT max(date) FROM test_payments where user_id = u.id) AS LastPaymentOverAll
FROM test_users AS u
LEFT JOIN test_payments AS p
ON u.id = p.user_id
WHERE
p.date IS NULL OR
p.date between
CAST('12-11-2012' AS datetime) --range begin
and
CAST('12-16-2012' as datetime) --range end
GROUP BY u.id, u.name

You need to move the condition from the where clause to a case statement:
SELECT u.id, u.name,
SUM(case when p.date BETWEEN CONVERT(datetime, '15.12.2012') AND CONVERT(datetime, '18.12.2012')
then p.money
end) total,
MAX(p.date) last_activity
FROM test_users u JOIN
test_payments p
ON u.id= p.user_id
GROUP BY u.id, u.name
ORDER BY total DESC;
If you only want users who had a payment in that period, then you can include:
having total is not null
If you want the NULL values to appear as 0 instead of NULL, then include else 0 in the case statement.

You can also use subqueries to get the result:
SELECT u.*, total, last_activity
FROM test_users u
JOIN
(
select sum(money) total, user_id
from test_payments
WHERE date BETWEEN CONVERT(datetime, '2012-12-15')
AND CONVERT(datetime, '2012-12-18')
group by user_id
) p
ON u.id= p.user_id
inner join
(
select user_id, max(date) last_activity
from test_payments
group by user_id
) p1
on p.user_id = p1.user_id
ORDER BY total DESC;
See SQL Fiddle with Demo

You could add a sub query for the MAX date that doesn't have the WHERE clause like so:
SELECT
u.*
,SUM(p.money) total
,a.max_date last_activity
FROM test_users u
INNER JOIN test_payments p ON u.id = p.user_id
INNER JOIN (SELECT user_id, MAX(date) AS max_date
FROM test_payments
GROUP BY user_id) a ON u.id = a.user_id
WHERE p.date BETWEEN CONVERT(datetime, '15.12.2012') AND CONVERT(datetime, '18.12.2012')
GROUP BY u.id, u.name, a.max_date
ORDER BY total DESC;

Related

How to get 1 record on the basis of two column values in a single table?

The query is
select distinct b.UserID , cast(b.entrytime as date) ,count(*) as UserCount
from [dbo].[person] as a
join [dbo].[personcookie] as b
on a.UserID = b.UserID
where cast (b.entrytime as date) >= '08/21/2020'
and cast (b.leavetime as date) <= '08/27/2020' and a.distinction = 99
group by cast(b.entrytime as date), b.UserID
If same UserID has count more than 1 for same date, It should consider as 1. Now as it is shown in the image that USERID 10 has count 1 for 2020-08-26 and USERID 10 has count 2 for '2020-08-27'. It should show that user ID 10 has total count 2 for `2020-08-26 and 2020-08-27' (because for 2020-08-27 the count should be 1) as per the requirement.
I have added the image of tables and what output i want
It seems you want one result row per user, so group by user, not by user and date. You want to count dates per user, but each day only once. This is a distinct count.
select
p.userid,
count(distinct cast(pc.entrytime as date)) as date_count
from dbo.person as p
join dbo.personcookie as pc on pc.userid = p.userid
where p.distinction = 99
and pc.entrytime >= '2020-08-08'
and pc.leavetime < '2020-08-28'
group by p.userid
order by p.userid;
You seem to want dense_rank():
select p.UserID, cast(pc.entrytime as date),
dense_rank() over (partition by p.userID order by min(pc.entrytime)) as usercount
from [dbo].[person] p join
[dbo].[personcookie] pc
on pc.UserID = p.UserID
where cast(pc.entrytime as date) >= '2020-08-21' and
cast(pc.leavetime as date) <= '2020-08-27'
group by cast(pc.entrytime as date), p.UserID;
Notes:
The only "real" change is using dense_rank(), which enumerates the days for a given user.
Use meaningful table aliases, rather than arbitrary letters.
Use standard date/time constants. In SQL Server, that is either YYYYMMDD or YYYY-MM-DD.

SQL find average time difference between rows for a given category

I browsed SO but could not quite find the exact answer or maybe it was for a different language.
Let's say I have a table, where each row is a record of a trade:
trade_id customer trade_date
1 A 2013-05-01 00:00:00
2 B 2013-05-01 10:00:00
3 A 2013-05-02 00:00:00
4 A 2013-05-05 00:00:00
5 B 2013-05-06 12:00:00
I would like to have the average time between trades, in days or fraction of days, for each customer, and the number of days since last trade. So for instance for customer A, time between trades 1 and 3 is 1 day and between trades 3 and 4 is 3 days, for an average of 2. So the end table would look like something like this (assuming today it's the 2013-05-10):
customer avg_time_btw_trades time_since_last_trade
A 2.0 5.0
B 5.08 3.5
If a customer has only got 1 trade I guess NULL is fine as output.
Not even sure SQL is the best way to do this (I am working with SQL server), but any help is appreciated!
SELECT
customer,
DATEDIFF(second, MIN(trade_date), MAX(trade_date)) / (NULLIF(COUNT(*), 1) - 1) / 86400.0,
DATEDIFF(second, MAX(trade_date), GETDATE() ) / 86400.0
FROM
yourTable
GROUP BY
customer
http://sqlfiddle.com/#!6/eb46e/7
EDIT: Added final field that I didn't notice, apologies.
The following SQL script uses your data and gives the expected results.
DECLARE #temp TABLE
( trade_id INT,
customer CHAR(1),
trade_date DATETIME );
INSERT INTO #temp VALUES (1, 'A', '20130501');
INSERT INTO #temp VALUES (2, 'B', '20130501 10:00');
INSERT INTO #temp VALUES (3, 'A', '20130502');
INSERT INTO #temp VALUES (4, 'A', '20130505');
INSERT INTO #temp VALUES (5, 'B', '20130506 12:00');
DECLARE #getdate DATETIME
-- SET #getdate = getdate();
SET #getdate = '20130510';
SELECT s.customer
, AVG(s.days_btw_trades) AS avg_time_between_trades
, CAST(DATEDIFF(hour, MAX(s.trade_date), #getdate) AS float)
/ 24.0 AS time_since_last_trade
FROM (
SELECT CAST(DATEDIFF(HOUR, t2.trade_date, t.trade_date) AS float)
/ 24.0 AS days_btw_trades
, t.customer
, t.trade_date
FROM #temp t
LEFT JOIN #temp t2 ON t2.customer = t.customer
AND t2.trade_date = ( SELECT MAX(t3.trade_date)
FROM #temp t3
WHERE t3.customer = t.customer
AND t3.trade_date < t.trade_date)
) s
GROUP BY s.customer
You need a date difference between every trade and average them.
select
a.customer
,avg(datediff(a.trade_date, b.trade_date))
,datediff(now(),max(a.trade_date))
from yourTable a, yourTable b
where a.customer = b.customer
and b.trade_date = (
select max(trade_date)
from yourTable c
where c.customer = a.customer
and a.trade_date > c.trade_date)
#gets the one earlier date for every trade
group by a.customer
Just for grins I added a solution that would use CTE's. You could probably use a temp table if the first query is too large. I used #MatBailie creation script for the table:
CREATE TABLE customer_trades (
id INT IDENTITY(1,1),
customer_id INT,
trade_date DATETIME,
PRIMARY KEY (id),
INDEX ix_user_trades (customer_id, trade_date)
)
INSERT INTO
customer_trades (
customer_id,
trade_date
)
VALUES
(1, '2013-05-01 00:00:00'),
(2, '2013-05-01 10:00:00'),
(1, '2013-05-02 00:00:00'),
(1, '2013-05-05 00:00:00'),
(2, '2013-05-06 12:00:00')
;
;WITH CTE as(
select customer_id, trade_date, datediff(hour,trade_date,ISNULL(LEAD(trade_date,1) over (partition by customer_id order by trade_date),GETDATE())) Trade_diff
from customer_trades
)
, CTE2 as
(SELECT customer_id, trade_diff, LAST_VALUE(trade_diff) OVER(Partition by customer_id order by trade_date) Curr_Trade from CTE)
SELECT Customer_id, AVG(trade_diff) AV, Max(Curr_Trade) Curr_Trade
FROM CTE2
GROUP BY customer_id

Query with subquery-count and groupby

Below is the ERD
I want to count number of gender ('Male' and 'Female') for each month irrespective of year.
What I have tried so far is that I can count number of males and females for each month separately like below
Query
Select u.Gender, datename(month, p.EntryDate) month, COUNT(p.User_Id) count
from [HospitalManagement].[dbo].[Patients] p,[HospitalManagement].[dbo].[Users] u
where u.Id = p.User_Id
group by datename(month, p.EntryDate), u.Gender
Result
I want it like below
Expected Result
Month | MaleCount | FemaleCount
June | 0 | 2
November | 1 | 1
To achieve above I try following query
Query
Select datename(month, p.EntryDate) month,
(select count(u.gender) from [HospitalManagement].[dbo].[Users] u
where u.Id = p.User_Id and u.Gender = 'Female'
group by u.Gender) female,
(select count(u.gender) from [HospitalManagement].[dbo].[Users] u
where u.Id = p.User_Id and u.Gender = 'Male'
group by u.Gender) male
from [HospitalManagement].[dbo].[Patients] p
group by datename(month, p.EntryDate)
Error
Column 'HospitalManagement.dbo.Patients.User_Id' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
Below are the create statements of tables (I am using MSSql)
-- Creating table 'Users'
CREATE TABLE [dbo].[Users] (
[Id] bigint IDENTITY(1,1) NOT NULL,
[Email] nvarchar(max) NULL,
[Password] nvarchar(max) NULL,
[UserName] nvarchar(max) NULL,
[Age] bigint NULL,
[Gender] nvarchar(max) NULL,
[NRIC] nvarchar(max) NULL,
[Comments] nvarchar(max) NULL,
[Address] nvarchar(max) NULL,
[ContactNo] nvarchar(max) NULL,
[FullName] nvarchar(max) NULL
);
GO
-- Creating table 'Patients'
CREATE TABLE [dbo].[Patients] (
[Id] bigint IDENTITY(1,1) NOT NULL,
[Disease] nvarchar(max) NULL,
[Occupation] nvarchar(max) NULL,
[EntryDate] datetime NULL,
[EntryTime] time NULL,
[User_Id] bigint NOT NULL
);
GO
As the error says, you are directly using User_Id column in select clause which is not present in the GROUP BY.
You can change the correlated subqueries to faster LEFT JOINs (assuming the User_Id is unique in the Patients table).
select
datename(month, p.EntryDate) mon,
count(case when u.Gender = 'Female' then 1 end) female_cnt,
count(case when u.Gender = 'Male' then 1 end) male_cnt
from [Patients] p left join [Users] u
on p.User_Id = u.Id
group by datename(month, p.EntryDate);
EDIT:
you can use a lookup CTE to generate all months and then do LEFT JOIN with it like this:
;WITH months(mn, mon) AS
(
SELECT 1, DATENAME(MONTH,DATEADD(month,0,GETDATE())) mon
UNION ALL
SELECT mn+1, DATENAME(MONTH,DATEADD(MONTH,mn,GETDATE()))
FROM months
WHERE mn < 12
)
select
m.mon mon,
count(case when u.Gender = 'Female' then 1 end) female_cnt,
count(case when u.Gender = 'Male' then 1 end) male_cnt
from months m left join [Patients] p
on m.mon = datename(month, p.EntryDate)
left join [Users] u
on p.User_Id = u.Id
group by m.mon;
You can try the following query:
Select
datename(month, p.EntryDate) month,
COUNT(IF(u.Gender = 'Male', 1, NULL) AS MaleCount,
COUNT(IF(u.Gender = 'Female', 1, NULL) AS FemaleCount
from
[HospitalManagement].[dbo].[Patients] p,[HospitalManagement].[dbo].[Users] u
where
u.Id = p.User_Id
group by
datename(month, p.EntryDate)
This is how we do in MySQL or Oracle. Have not tried in MSSQL though. But as these are standard SQL function (and is available in MSSQL), this should work in MSSQL as well.
Select datename(month, p.EntryDate) month , SUM(CASE WHEN u.Gender = 'Male' THEN 1 ELSE 0) MaleCount, SUM(CASE WHEN u.Gender = 'Female' THEN 1 ELSE 0) FemaleCount,
from [HospitalManagement].[dbo].[Patients] p,[HospitalManagement].[dbo].[Users] u
where u.Id = p.User_Id
group by datename(month, p.EntryDate)
Typically I am adding 1 in case of the specific gender to get the result.
Note: If there are any syntax errors please let me know with the error, I can correct it.

SQL query resultset, I need SUM(quantity) COLUMN grouped by DAY

This is the Query I am using on the product table LEFT JOIN on the page table ON the productid column in page being the id in product.. pretty straightforward.
SELECT
COUNT(DISTINCT `p`.`id`) as `quantity`,
DATE_FORMAT(`p`.`created_time`,'%Y-%m-%d') AS `day`
FROM
`product` AS `p`
LEFT JOIN
`page` AS `pg` ON `p`.`id` = `pg`.`productid`
WHERE
`p`.`created_time` BETWEEN '2013-07-03 00:00:00' AND '2013-07-10 23:59:59'
AND
`p`.`group` = '101'
GROUP BY `day`, `p`.`id` HAVING COUNT(`pg`.`productid`)>=10
ORDER BY `p`.`created_time`
The two example tables concerned:
**product**
id created_time
32 2013-07-09
33 2013-07-09
**page**
id productid
1 33
2 33
.. ..
20 33
21 32
22 32
.. ..
54 32
Now my resultset looks like this:
quantity day
1 2013-07-09
1 2013-07-09
1 2013-07-10
But I would like the following output without UNION and without using temp tables:
quantity day
2 2013-07-09
1 2013-07-10
Two tables are now added to my code example on top. I need the number of product with ten or more page grouped by day
I think that is because you are leaving p.id in the group by clause. Try this:
SELECT COUNT(DISTINCT `p`.`id`) as `quantity`,
DATE_FORMAT(`p`.`created_time`,'%Y-%m-%d') AS `day`
FROM `product` AS `p` LEFT JOIN
`page` AS `pg`
ON `p`.`id` = `pg`.`productid`
WHERE `p`.`created_time` BETWEEN '2013-07-03 00:00:00' AND '2013-07-10 23:59:59'
AND `p`.`group` = '101'
GROUP BY `day`
HAVING COUNT(`pg`.`productid`)>=10
ORDER BY `p`.`created_time`
Don't GROUP BY the id value and also the ORDER BY can be on day too
Note that day is not available in the GROUP BY in standard SQL or when sql_mode is using "only_full_group_by". MySQL allows it as an extension but it is misleading
SELECT
COUNT(*) as `quantity`,
DATE_FORMAT(`p`.`created_time`,'%Y-%m-%d') AS `day`
FROM
`product` AS `p`
JOIN
`page` AS `pg` ON `p`.`id` = `pg`.`productid`
WHERE
`p`.`created_time` BETWEEN '2013-07-03 00:00:00' AND '2013-07-10 23:59:59'
AND
`p`.`group` = '101'
GROUP BY
`pg`.`productid`, DATE_FORMAT(`p`.`created_time`,'%Y-%m-%d') AS `day`
HAVING
COUNT(*) >= 10
ORDER BY
`day`;
I found the solution to my query:
SELECT
COUNT(`p`.`id`) as `quantity`,
DATE_FORMAT(`p`.`created_time`,'%Y-%m-%d') AS `day`
FROM
`product` AS `p`
INNER JOIN
(
SELECT
`productid` AS `id`,
count(id) AS pagesNR
FROM
`page`
GROUP BY
`productid` HAVING COUNT(`id`) >= 10
)
AS
`pg` USING (`id`)
WHERE
`p`.`created_time` BETWEEN '2013-07-03 00:00:00' AND '2013-07-10 23:59:59'
AND
`p`.`group` = '101'
GROUP BY
`day`
ORDER BY
`created_time`
Thanks to a friend of my co-worker Daniƫl Versteeg

How to count open records, grouped by hour and day in SQL-server-2008-r2

I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK