SQL Addition Formula - sql

Noob alert...
I have an example table as followed.
I am trying to create a column in SQL that shows the what percentage each customer had of size S per year.
So output should be something like:
(Correction: the customer C for 2019 Percentage should be 1)

Window functions will get you there.
DECLARE #TestData TABLE
(
[Customer] NVARCHAR(2)
, [CustomerYear] INT
, [CustomerCount] INT
, [CustomerSize] NVARCHAR(2)
);
INSERT INTO #TestData (
[Customer]
, [CustomerYear]
, [CustomerCount]
, [CustomerSize]
)
VALUES ( 'A', 2017, 1, 'S' )
, ( 'A', 2017, 1, 'S' )
, ( 'B', 2017, 1, 'S' )
, ( 'B', 2017, 1, 'S' )
, ( 'B', 2018, 1, 'S' )
, ( 'A', 2018, 1, 'S' )
, ( 'C', 2017, 1, 'S' )
, ( 'C', 2019, 1, 'S' );
SELECT DISTINCT [Customer]
, [CustomerYear]
, SUM([CustomerCount]) OVER ( PARTITION BY [Customer]
, [CustomerYear]
) AS [CustomerCount]
, SUM([CustomerCount]) OVER ( PARTITION BY [CustomerYear] ) AS [TotalCount]
, SUM([CustomerCount]) OVER ( PARTITION BY [Customer]
, [CustomerYear]
) * 1.0 / SUM([CustomerCount]) OVER ( PARTITION BY [CustomerYear] ) AS [CustomerPercentage]
FROM #TestData
ORDER BY [CustomerYear]
, [Customer];
Will give you
Customer CustomerYear CustomerCount TotalCount CustomerPercentage
-------- ------------ ------------- ----------- ---------------------------------------
A 2017 2 5 0.400000000000
B 2017 2 5 0.400000000000
C 2017 1 5 0.200000000000
A 2018 1 2 0.500000000000
B 2018 1 2 0.500000000000
C 2019 1 1 1.000000000000

Assuming there are no duplicate rows for a customer in a year, you can use window functions:
select t.*,
sum(count) over (partition by year) as year_cnt,
count * 1.0 / sum(count) over (partition by year) as ratio
from t;

Break it apart into tasks - that's probably the best rule to follow when it comes to SQL. So, I created a variable table #tmp which I populated with your sample data, and started out with this query:
select
customer,
year
from #tmp
where size = 'S'
group by customer, year
... this gets a row for each customer/year combo for 'S' entries.
Next, I want the total count for that customer/year combo:
select
customer,
year,
SUM(itemCount) as customerItemCount
from #tmp
where size = 'S'
group by customer, year
... now, how do we get the count for all customers for a specific year? We need a subquery - and we need that subquery to reference the year from the main query.
select
customer,
year,
SUM(itemCount) as customerItemCount,
(select SUM(itemCount) from #tmp t2 where year=t.year) as FullTotalForYear
from #tmp t
where size = 'S'
GROUP BY customer, year
... that make sense? That new line in the ()'s is a subquery - and it's hitting the table again - but this time, its just getting a SUM() over the particular year that matches the main table.
Finally, we just need to divide one of those columns by the other to get the actual percent (making sure not to make it int/int - which will always be an int), and we'll have our final answer:
select
customer,
year,
cast(SUM(itemCount) as float) /
(select SUM(itemCount) from #tmp t2 where year=t.year)
as PercentageOfYear
from #tmp t
where size = 'S'
GROUP BY customer, year
Make sense?

With a join of 2 groupings:
the 1st by size, year, customer and
the 2nd by size, year.
select
t.customer, t.year, t.count, t.size,
ty.total_count, 1.0 * t.count / ty.total_count percentage
from (
select t.customer, t.year, sum(t.count) count, t.size
from tablename t
group by t.size, t.year, t.customer
) t inner join (
select t.year, sum(t.count) total_count, t.size
from tablename t
group by t.size, t.year
) ty
on ty.size = t.size and ty.year = t.year
order by t.size, t.year, t.customer;
See the demo

Related

Efficient Multiple Group-bys

I have the following table:
Year
Week
Day_1
Day_2
Day_3
2020
1
Walk
Jump
Swim
2020
3
Walk
Swim
Walk
2020
1
Jump
Walk
Swim
I want to group by YEAR, WEEK and Event (Walk, jump, Swim) and count the number of times each event occurs in Day_1, Day_2, Day_3. I.e.
Year
Week
Event
Count_Day_1
Count_Day_2
Count_Day_3
2020
1
Walk
1
1
0
2020
3
Walk
1
0
1
2020
1
Jump
1
1
0
2020
3
Jump
0
0
0
2020
1
Swim
0
0
2
2020
3
Swim
. 0
1
0
How can I do this efficiently?
In BigQuery, I would unpivot using arrays and then aggregate:
with t as (
select 2020 as year, 1 as week, 'Walk' as day_1, 'Jump' as day_2, 'Swim' as day_3 union all
select 2020, 3, 'Walk', 'Swim', 'Walk' union all
select 2020, 1, 'Jump', 'Walk', 'Swim'
)
select t.year, t.week, s.event,
countif(day = 1) as day_1, countif(day = 2) as day_2, countif(day = 3) as day_3
from t cross join
unnest([struct(t.day_1 as event, 1 as day),
struct(t.day_2 as event, 2 as day),
struct(t.day_3 as event, 3 as day)
]) s
group by t.year, t.week, s.event;
Consider this less verbose option
select year, week, event,
countif(offset = 0) as day_1,
countif(offset = 1) as day_2,
countif(offset = 2) as day_3
from `project.dataset.table`,
unnest([day_1, day_2, day_3]) event with offset
where not event is null
group by year, week, event
If applied to sample data in your question - output is
Demo code is MS SQL!
If you want to generate a full grid for every week and every year for every event then there are two pre-aggregates required, one for event and another one for every year and week.
Like:
DECLARE
#OriginalData
TABLE
(
numYear smallint,
numWeek tinyint,
dscDay1 nvarchar(20),
dscDay2 nvarchar(20),
dscDay3 nvarchar(20)
)
;
INSERT INTO
#OriginalData
(
numYear, numWeek, dscDay1, dscDay2, dscDay3
)
VALUES
( 2020, 1, N'Walk', N'Jump', N'Swim' ),
( 2020, 3, N'Walk', N'Swim', N'Walk' ),
( 2020, 1, N'Jump', N'Walk', N'Swim' )
;
SELECT
numYear, numWeek, dscDay1, dscDay2, dscDay3
FROM
#OriginalData
;
WITH
cteNormalise
(
dscActivity
)
AS
(
SELECT
dscDay1
FROM
#OriginalData
GROUP BY
dscDay1
UNION
SELECT
dscDay2
FROM
#OriginalData
GROUP BY
dscDay2
UNION
SELECT
dscDay3
FROM
#OriginalData
GROUP BY
dscDay3
),
cteGrid
(
numYear,
numWeek
)
AS
(
SELECT
numYear,
numWeek
FROM
#OriginalData
GROUP BY
numYear,
numWeek
)
SELECT
--/* Debug output */ *
YearWeek.numYear,
YearWeek.numWeek,
Normalised.dscActivity,
Count( Day1.dscDay1 ) AS CountDay1,
Count( Day2.dscDay2 ) AS CountDay2,
Count( Day3.dscDay3 ) AS CountDay3
FROM
cteNormalise AS Normalised
CROSS JOIN cteGrid AS YearWeek
LEFT OUTER JOIN #OriginalData AS Day1
ON Day1.dscDay1 = Normalised.dscActivity
AND Day1.numYear = YearWeek.numYear
AND Day1.numWeek = YearWeek.numWeek
LEFT OUTER JOIN #OriginalData AS Day2
ON Day2.dscDay2 = Normalised.dscActivity
AND Day2.numYear = YearWeek.numYear
AND Day2.numWeek = YearWeek.numWeek
LEFT OUTER JOIN #OriginalData AS Day3
ON Day3.dscDay3 = Normalised.dscActivity
AND Day3.numYear = YearWeek.numYear
AND Day3.numWeek = YearWeek.numWeek
GROUP BY
YearWeek.numYear,
YearWeek.numWeek,
Normalised.dscActivity
ORDER BY
YearWeek.numYear,
Normalised.dscActivity,
YearWeek.numWeek
;
This will work, however efficiency is questionable due to the steps to normalise the data before the actual aggregation happens.
If possible I suggest converting the table first into a 3NF with just key columns of Year, Week, Event and Day. Then a fairly efficient summary can be produced. At the cost of the normalisation beforehand. Otherwise the cost of transformation is required in the query.
You need to find distinct event, do cross join with your table and use conditional aggregation as follows:
select t.year, t.week, e.event,
count(case when t.day_1 = e.event then 1 end) as count_day_1,
count(case when t.day_2 = e.event then 1 end) as count_day_2,
count(case when t.day_3 = e.event then 1 end) as count_day_3
from your_Table t
cross join (select distinct day_1 as event from your_table
union all select day_2 from your_table
union all select day_3 from your_table) e
group by t.year, t.week, e.event

Group by in columns and rows, counts and percentages per day

I have a table that has data like following.
attr |time
----------------|--------------------------
abc |2018-08-06 10:17:25.282546
def |2018-08-06 10:17:25.325676
pqr |2018-08-05 10:17:25.366823
abc |2018-08-06 10:17:25.407941
def |2018-08-05 10:17:25.449249
I want to group them and count by attr column row wise and also create additional columns in to show their counts per day and percentages as shown below.
attr |day1_count| day1_%| day2_count| day2_%
----------------|----------|-------|-----------|-------
abc |2 |66.6% | 0 | 0.0%
def |1 |33.3% | 1 | 50.0%
pqr |0 |0.0% | 1 | 50.0%
I'm able to display one count by using group by but unable to find out how to even seperate them to multiple columns. I tried to generate day1 percentage with
SELECT attr, count(attr), count(attr) / sum(sub.day1_count) * 100 as percentage from (
SELECT attr, count(*) as day1_count FROM my_table WHERE DATEPART(week, time) = DATEPART(day, GETDate()) GROUP BY attr) as sub
GROUP BY attr;
But this also is not giving me correct answer, I'm getting all zeroes for percentage and count as 1. Any help is appreciated. I'm trying to do this in Redshift which follows postgresql syntax.
Let's nail the logic before presenting:
with CTE1 as
(
select attr, DATEPART(day, time) as theday, count(*) as thecount
from MyTable
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
select t1.attr, t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
From here you can pivot to create a day by day if you feel the need
I am trying to enhance the query #johnHC btw if you needs for 7days then you have to those days in case when
with CTE1 as
(
select attr, time::date as theday, count(*) as thecount
from t group by attr,time::date
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
,
CTE3 as
(
select t1.attr, EXTRACT(DOW FROM t1.theday) as day_nmbr,t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
)
select CTE3.attr,
max(case when day_nmbr=0 then CTE3.thecount end) as day1Cnt,
max(case when day_nmbr=0 then percentofday end) as day1,
max(case when day_nmbr=1 then CTE3.thecount end) as day2Cnt,
max( case when day_nmbr=1 then percentofday end) day2
from CTE3 group by CTE3.attr
http://sqlfiddle.com/#!17/54ace/20
In case that you have only 2 days:
http://sqlfiddle.com/#!17/3bdad/3 (days descending as in your example from left to right)
http://sqlfiddle.com/#!17/3bdad/5 (days ascending)
The main idea is already mentioned in the other answers. Instead of joining the CTEs for calculating the values I am using window functions which is a bit shorter and more readable I think. The pivot is done the same way.
SELECT
attr,
COALESCE(max(count) FILTER (WHERE day_number = 0), 0) as day1_count, -- D
COALESCE(max(percent) FILTER (WHERE day_number = 0), 0) as day1_percent,
COALESCE(max(count) FILTER (WHERE day_number = 1), 0) as day2_count,
COALESCE(max(percent) FILTER (WHERE day_number = 1), 0) as day2_percent
/*
Add more days here
*/
FROM(
SELECT *, (count::float/count_per_day)::decimal(5, 2) as percent -- C
FROM (
SELECT DISTINCT
attr,
MAX(time::date) OVER () - time::date as day_number, -- B
count(*) OVER (partition by time::date, attr) as count, -- A
count(*) OVER (partition by time::date) as count_per_day
FROM test_table
)s
)s
GROUP BY attr
ORDER BY attr
A counting the rows per day and counting the rows per day AND attr
B for more readability I convert the date into numbers. Here I take the difference between current date of the row and the maximum date available in the table. So I get a counter from 0 (first day) up to n - 1 (last day)
C calculating the percentage and rounding
D pivot by filter the day numbers. The COALESCE avoids the NULL values and switched them into 0. To add more days you can multiply these columns.
Edit: Made the day counter more flexible for more days; new SQL Fiddle
Basically, I see this as conditional aggregation. But you need to get an enumerator for the date for the pivoting. So:
SELECT attr,
COUNT(*) FILTER (WHERE day_number = 1) as day1_count,
COUNT(*) FILTER (WHERE day_number = 1) / cnt as day1_percent,
COUNT(*) FILTER (WHERE day_number = 2) as day2_count,
COUNT(*) FILTER (WHERE day_number = 2) / cnt as day2_percent
FROM (SELECT attr,
DENSE_RANK() OVER (ORDER BY time::date DESC) as day_number,
1.0 * COUNT(*) OVER (PARTITION BY attr) as cnt
FROM test_table
) s
GROUP BY attr, cnt
ORDER BY attr;
Here is a SQL Fiddle.

How To Get Running Subtotal with Group By in SQL Server

How can I get a running sub-total of amounts for a group in SQL 2014?
I have a table with transaction amounts. I need to summarize to get a row for each project and quarter for which there is data, and need a running subtotal within each project. The running total would need to reset to zero for each new project.
Here is what I have so far:
SELECT [ProjectId]
, SUM( ActualAmount) AS PeriodAmount
, SUM( ActualAmount) OVER (PARTITION BY ProjectId ORDER BY ProjectId,YearQuarter)
AS FairMarketValue
FROM GLSnapshot
GROUP BY [ProjectId] , [YearQuarter]
I currently get this error:
Msg 8120, Level 16, State 1, Line 3
Column 'GLSnapshot.ActualAmount' is invalid in the
select list because it is not contained in either an
aggregate function or the GROUP BY clause.
Sample Data: Assuming I have the following data for table GLSnapshot:
ProjectId, YearQuarter, ActualAmount
'A', '2015Q1' , 9000.00
'A', '2015Q1' , 100.00
'A', '2015Q2' , 50.00
'A', '2015Q3' , 50.00
'A', '2015Q3' , 200.00
'B', '2015Q1' ,80000.00
I should get the following result for
ProjectId, YearQuarter, PeriodAmount, FairMarketValue (Running Subtotal):
'A', '2015Q1' , 9100.00 , 9100.00
'A', '2015Q2' , 50.00 , 9150.00
'A', '2015Q3' , 250.00 , 9400.00
'B', '2015Q1' ,80000.00 , 80000.00
OLAP functions are calculated after aggregation, you can't use ActualAmount, must be SUM( ActualAmount). And there's no need to order by ProjectId because it's already in PARTITION BY. Finally use ROWS UNBOUNDED PRECEDING otherwise it defaults to RANGE UNBOUNDED PRECEDING which is more expensive and might not return the expected result:
SELECT [ProjectId]
, [YearQuarter]
, SUM( ActualAmount) AS PeriodAmount
, SUM( SUM( ActualAmount))
OVER (PARTITION BY ProjectId
ORDER BY YearQuarter
ROWS UNBOUNDED PRECEDING) AS FairMarketValue
FROM GLSnapshot
GROUP BY [ProjectId] , [YearQuarter]
Try this:
CREATE TABLE #GLSnapshot (ProjectId VARCHAR(5), YearQuarter VARCHAR(6), ActualAmount NUMERIC(18,2))
INSERT INTO #GLSnapshot
SELECT 'A', '2015Q1' , 9000.00 UNION ALL
SELECT 'A', '2015Q1' , 100.00 UNION ALL
SELECT 'A', '2015Q2' , 50.00 UNION ALL
SELECT 'A', '2015Q3' , 50.00 UNION ALL
SELECT 'A', '2015Q3' , 200.00 UNION ALL
SELECT 'B', '2015Q1' ,80000.00
;WITH T
AS
(
SELECT ROW_NUMBER() over(partition by ProjectId ORDER by YearQuarter) RN,
ProjectId,YearQuarter,sum(ActualAmount) PeriodAmount
FROM #GLSnapshot
GROUP BY ProjectId,YearQuarter
)
SELECT T1.ProjectId,T1.YearQuarter,T1.PeriodAmount, SUM(T2.PeriodAmount) FairMarketValue
FROM T T1
INNER JOIN T T2 ON T1.ProjectId = T2.ProjectId and T1.RN >= T2.RN
GROUP BY T1.ProjectId,T1.YearQuarter,T1.PeriodAmount
You can try this query using ROWS UNBOUNDED PRECEDING
;with cte as (
select ProjectID, YearQuarter, ActualAmount
from GLSnapshot
) , cte2 as (
select ProjectID, YearQuarter, sum(ActualAmount) SumActualAmount from cte Group by ProjectID, YearQuarter
) Select *, sum(SumActualAmount) over(partition by projectid order by projectid, yearquarter rows unbounded preceding) as RunningTotal from cte2
You can use rows unbounded preceding which will give running total
;with cte as (
select ProjectID, YearQuarter, ActualAmount
from GLSnapshot
) , cte2 as (
select ProjectID, YearQuarter, sum(ActualAmount) SumActualAmount from cte Group by ProjectID, YearQuarter
) Select *, sum(SumActualAmount) over(partition by projectid order by projectid, yearquarter rows unbounded preceding) as RunningTotal from cte2

How do I select the most frequent value for a specific month and display this value as well as the amount of times it occurs?

I am struggling with a TSQL query and I'm all out of googling, so naturally I figured I might as well ask on SO.
Please keep in mind that I just began trying to learn SQL a few weeks back and I'm not really sure what rules there are and how you can and can not write your queries / sub-queries.
This is what I have so far:
Edit: Updated with DDL that should help create an example, also commented out unnecessary "Client"-column.
CREATE TABLE NumberTable
(
Number varchar(20),
Date date
);
INSERT INTO NumberTable (Number, Date)
VALUES
('55512345', '2015-01-01'),
('55512345', '2015-01-01'),
('55512345', '2015-01-01'),
('55545678', '2015-01-01'),
('55512345', '2015-02-01'),
('55523456', '2015-02-01'),
('55523456', '2015-02-01'),
('55534567', '2015-03-01'),
('55534567', '2015-03-01'),
('55534567', '2015-03-01'),
('55534567', '2015-03-01'),
('55545678', '2015-03-01'),
('55545678', '2015-04-01')
DECLARE
--#ClientNr AS int,
#FromDate AS date,
#ToDate AS date
--SET #ClientNr = 11111
SET #FromDate = '2015-01-01'
SET #ToDate = DATEADD(yy, 1, #FromDate)
SELECT
YEAR(Date) AS [Year],
MONTH(Date) AS [Month],
COUNT(Number) AS [Total Count]
FROM
NumberTable
WHERE
--Client = #ClientNr
Date BETWEEN #FromDate AND #ToDate
AND Number IS NOT NULL
AND Number NOT IN ('888', '144')
GROUP BY MONTH(Date), YEAR(Date)
ORDER BY [Year], [Month]
With this I am getting the Year, Month and Total Count.
I'm happy with only getting the top 1 most called number and count each month, but showing top 5 is preferable.
Heres an example of how I would like the table to look in the end (having the months formatted as JAN, FEB etc instead of numbers is not really important, but would be a nice bonus):
╔══════╦═══════╦═════════════╦═══════════╦══════════╦═══════════╦══════════╗
║ Year ║ Month ║ Total Count ║ #1 Called ║ #1 Count ║ #2 Called ║ #2 Count ║
╠══════╬═══════╬═════════════╬═══════════╬══════════╬═══════════╬══════════╣
║ 2016 ║ JAN ║ 80431 ║ 555-12345 ║ 45442 ║ 555-94564 ║ 17866 ║
╚══════╩═══════╩═════════════╩═══════════╩══════════╩═══════════╩══════════╝
I was told this was "easily" done with a sub-query, but I'm not so sure...
Interesting one this, I believe you can do it with a CTE and PIVOT but this is off the top of my head... This may not work verbatim
WITH Rollup_CTE
AS
(
SELECT Client,MONTH(Date) as Month, YEAR(Date) as Year, Number, Count(0) as Calls, ROW_NUMBER() OVER (PARTITION BY Client,MONTH(Date) as SqNo, YEAR(Date), Number ORDER BY COUNT(0) DESC)
from NumberTable
WHERE Number IS NOT NULL AND Number NOT IN ('888', '144')
GROUP BY Client,MONTH(Date), YEAR(Date), Number
)
SELECT * FROM Rollup_CTE Where SqNo <=5
You may then be able to pivot the data as you wish using PIVOT
artm's query corrected (PARTITION) and the last step (pivoting) simplified.
with data AS
(select '2016-01-01' as called, '111' as number
union all select '2016-01-01', '111'
union all select '2016-01-01', '111'
union all select '2016-01-01', '222'
union all select '2016-01-01', '222'
union all select '2016-01-05', '111'
union all select '2016-01-05', '222'
union all select '2016-01-05', '222')
, ordered AS (
select called
, number
, count(*) cnt
, ROW_NUMBER() OVER (PARTITION BY called ORDER BY COUNT(*) DESC) rnk
from data
group by called, number)
select called, total = sum(cnt)
, n1= max(case rnk when 1 then number end)
, cnt1=max(case rnk when 1 then cnt end)
, n2= max(case rnk when 2 then number end)
, cnt2=max(case rnk when 2 then cnt end)
from ordered
group by called
EDIT Using setup provided by OP
WITH ordered AS(
-- compute order
SELECT
[Year] = YEAR(Date)
, [Month] = MONTH(Date)
, number
, COUNT(*) cnt
, ROW_NUMBER() OVER (PARTITION BY YEAR(Date), MONTH(Date) ORDER BY COUNT(*) DESC) rnk
FROM NumberTable
WHERE Date BETWEEN #FromDate AND #ToDate
AND Number IS NOT NULL
AND Number NOT IN ('888', '144')
GROUP BY YEAR(Date), MONTH(Date), number
)
-- pivot by order
SELECT [Year], [Month]
, total = sum(cnt)
, n1 = MAX(case rnk when 1 then number end)
, cnt1 = MAX(case rnk when 1 then cnt end)
, n2 = MAX(case rnk when 2 then number end)
, cnt2 = MAX(case rnk when 2 then cnt end)
-- n3, cnt3, ....
FROM ordered
GROUP BY [Year], [Month];
This query help you:
IF OBJECT_ID('tempdb..#Test','U') IS NOT NULL DROP TABLE #Test;
CREATE TABLE #Test(Number INT NOT NULL)
INSERT INTO #Test(Number)
VALUES(1),(2),(3),(1)
SELECT TOP 1 WITH TIES
Number
FROM (
SELECT DISTINCT
Number
, COUNT(*) OVER(PARTITION BY Number) AS cnt
FROM #Test) AS T
ORDER BY cnt DESC
I have used TOP 1 WITH TIES for case when max count exists for several values.
Try this, doesn't have to be CTE but I used it to populate data, you can extend it to include 3rd, 4th etc.
;with data AS
(select '2016-01-01' as called, '111' as number
union all select '2016-01-01', '111'
union all select '2016-01-01', '111'
union all select '2016-01-01', '222'
union all select '2016-01-01', '222')
, ordered AS (
select called
, number
, count(*) cnt
, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) rnk
from data
group by called, number)
SELECT distinct *
FROM (SELECT DATENAME(month, called) mnth FROM ordered) AS mnth,
(SELECT number MostCalledNumber FROM ordered WHERE rnk = 1) AS MostCalledNumber,
(SELECT cnt MostCalledTimes FROM ordered WHERE rnk = 1) AS MostCalledTimes,
(SELECT number SecondMostCalledNumber FROM ordered WHERE rnk = 2) AS SecondMostCalledNumber,
(SELECT cnt SecondMostCalledTimes FROM ordered WHERE rnk = 2) AS SecondMostCalledTimes

How to count open records, grouped by hour and day in SQL-server-2008-r2

I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK