How do I transpose a result set and group by week? - sql

I have a view based on query:
SELECT CONVERT(VARCHAR(10), date, 103) AS date,
eventid, name, time, pts
FROM results
WHERE DATEPART(yy, date) = 2019;
This provides a data set such as this:
Date EventID Name Time Points
24/04/2019 10538 Fred Flintstone 22:27 10
24/04/2019 10538 Barney Rubble 22:50 9
24/04/2019 10538 Micky Mouse 23:17 8
24/04/2019 10538 Yogi Bear 23:54 7
24/04/2019 10538 Donald Duck 24:07 6
01/05/2019 10541 Barney Rubble 21:58 10
01/05/2019 10541 Fred Flintstone 22:00 9
01/05/2019 10541 Donald Duck 23:39 8
01/05/2019 10541 Yogi Bear 23:43 7
12/06/2019 10569 Fred Flintstone 22:06 10
12/06/2019 10569 Barney Rubble 22:22 9
12/06/2019 10569 Micky Mouse 23:05 8
12/06/2019 10569 Donald Duck 23:55 7
I need an output row for each name listing the pts per round and a total in the form:
Name 24/04/2019 01/05/2019 12/06/2019 total
Fred Flintstone 10 9 10 29
Barney Rubble 9 10 9 28
Yogi Bear 7 7 7 21
Micky Mouse 8 8 16
Donald Duck 6 8 14
There could be up to 16 non-consecutive event dates for the year.

Nothing wrong with PIVOT but, for me, the easiest and most performant way to do this would be to perform a Cross Tab. The syntax is less verbose, more portable, and easier to understand.
First for some DDL and easily consumable sample data. <<< Learn how to do this it will get you better answers more quickly.
SET NOCOUNT ON;
SET DATEFORMAT dmy; -- I need this because I'm American
-- DDL and easily consumable sample data
DECLARE #Results TABLE
(
[Date] DATE,
EventId INT,
[Name] VARCHAR(40), -- if indexed, go as narrow as possible
[Time] TIME,
Points INT,
INDEX uq_poc_results CLUSTERED([Name],[EventId]) -- a covering index is vital for a query like this
); -- note: ^^^ this bad clustered index candidate, I went this route for simplicity
INSERT #Results VALUES
('4/04/2019', 10538, 'Fred Flintstone', '22:27',10),
('24/04/2019',10538, 'Barney Rubble', '22:50',9),
('24/04/2019',10538, 'Micky Mouse ', '23:17',8),
('24/04/2019',10538, 'Yogi Bear', '23:54',7),
('24/04/2019',10538, 'Donald Duck', '2307',6),
('01/05/2019',10541, 'Barney Rubble', '21:58',10),
('01/05/2019',10541, 'Fred Flintstone', '22:00',9),
('01/05/2019',10541, 'Donald Duck', '23:39',8),
('01/05/2019',10541, 'Yogi Bear', '23:43',7),
('12/06/2019',10569, 'Fred Flintstone', '22:06',10),
('12/06/2019',10569, 'Barney Rubble', '22:22',9),
('12/06/2019',10569, 'Micky Mouse', '23:05',8),
('12/06/2019',10569, 'Donald Duck', '23:55',7);
Note that I created a clustered index on (Name,EventId) - I would use a non-clustered index that covered the columns you need in the real world. If you have a lot of rows then you will want that index.
Basic Cross-Tab
SELECT [Name] = r.[Name],
[24/04/2019] = MAX(CASE r.[Date] WHEN '24/04/2019' THEN r.Points ELSE 0 END),
[01/05/2019] = MAX(CASE r.[Date] WHEN '01/05/2019' THEN r.Points ELSE 0 END),
[12/06/2019] = MAX(CASE r.[Date] WHEN '12/06/2019' THEN r.Points ELSE 0 END)
FROM #Results AS r
GROUP BY r.[Name];
Results:
Name 24/04/2019 01/05/2019 12/06/2019
-------------------- ------------ ------------ ------------
Barney Rubble 9 10 9
Donald Duck 6 8 7
Fred Flintstone 0 9 10
Micky Mouse 8 0 8
Yogi Bear 7 7 0
To get the total we can wrap this in logic in a subquery and add the columns like this:
SELECT
[Name] = piv.N,
[24/04/2019] = piv.D1,
[01/05/2019] = piv.D2,
[12/06/2019] = piv.D3,
Total = piv.D1+piv.D2+piv.D3
FROM
(
SELECT r.[Name],
MAX(CASE r.[Date] WHEN '24/04/2019' THEN r.Points ELSE 0 END),
MAX(CASE r.[Date] WHEN '01/05/2019' THEN r.Points ELSE 0 END),
MAX(CASE r.[Date] WHEN '12/06/2019' THEN r.Points ELSE 0 END)
FROM #Results AS r
GROUP BY r.[Name]
) AS piv(N,D1,D2,D3);
Returns:
Name 24/04/2019 01/05/2019 12/06/2019 Total
------------------- ----------- ----------- ----------- -------
Barney Rubble 9 10 9 28
Donald Duck 6 8 7 21
Fred Flintstone 0 9 10 19
Micky Mouse 8 0 8 16
Yogi Bear 7 7 0 14
Not only does this get you what you need with very little SQL, you benefit from pre-aggregation inside the subquery. A huge benefit of this approach over PIVOT is how you can do multiple aggregations in one query. Below are two examples of how to use this approach for multiple aggregations; this first using a standard GROUP BY twice, the other using window aggregate functions (.. OVER (partition by, order by..):
--==== Traditional Approach
SELECT
[Name] = piv.N,
[24/04/2019] = MAX(piv.D1),
[01/05/2019] = MAX(piv.D2),
[12/06/2019] = MAX(piv.D3),
Total = MAX(f.Ttl),
Avg1 = AVG(piv.D1), -- 1st date (24/04/2019)
Avg2 = AVG(piv.D2), -- 2nd date...
Avg3 = AVG(piv.D3), -- 3rd date...
TotalAvg = AVG(f.Ttl) ,
Mn = MIN(f.Ttl) ,
Mx = MAX(f.Ttl)
FROM
(
SELECT r.[Name],
MAX(CASE r.[Date] WHEN '24/04/2019' THEN r.Points ELSE 0 END),
MAX(CASE r.[Date] WHEN '01/05/2019' THEN r.Points ELSE 0 END),
MAX(CASE r.[Date] WHEN '12/06/2019' THEN r.Points ELSE 0 END)
FROM #Results AS r
GROUP BY r.[Name]
) AS piv(N,D1,D2,D3)
CROSS APPLY (VALUES(piv.D1+piv.D2+piv.D3)) AS f(Ttl)
GROUP BY piv.N;
--==== Leveraging Window Aggregates
SELECT
[Name] = piv.N,
[24/04/2019] = piv.D1,
[01/05/2019] = piv.D2,
[12/06/2019] = piv.D3,
Total = f.Ttl,
Avg1 = AVG(piv.D1) OVER(PARTITION BY piv.N ORDER BY (SELECT NULL)), -- 1st date (24/04/2019)
Avg2 = AVG(piv.D2) OVER(PARTITION BY piv.N ORDER BY (SELECT NULL)), -- 2nd date...
Avg3 = AVG(piv.D3) OVER(PARTITION BY piv.N ORDER BY (SELECT NULL)), -- 3rd date...
TotalAvg = AVG(f.Ttl) OVER(PARTITION BY piv.N ORDER BY (SELECT NULL)),
Mn = MIN(f.Ttl) OVER(PARTITION BY piv.N ORDER BY (SELECT NULL)),
Mx = MAX(f.Ttl) OVER(PARTITION BY piv.N ORDER BY (SELECT NULL))
FROM
(
SELECT r.[Name],
MAX(CASE r.[Date] WHEN '24/04/2019' THEN r.Points ELSE 0 END),
MAX(CASE r.[Date] WHEN '01/05/2019' THEN r.Points ELSE 0 END),
MAX(CASE r.[Date] WHEN '12/06/2019' THEN r.Points ELSE 0 END)
FROM #Results AS r
GROUP BY r.[Name]
) AS piv(N,D1,D2,D3)
CROSS APPLY (VALUES(piv.D1+piv.D2+piv.D3)) AS f(Ttl);
Both Return:
Name 24/04/2019 01/05/2019 12/06/2019 Total Avg1 Avg2 Avg3 TotalAvg Mn Mx
----------------- ----------- ----------- ----------- ------ ------ ------ ------ ---------- ------ ------
Barney Rubble 9 10 9 28 9 10 9 28 28 28
Donald Duck 6 8 7 21 6 8 7 21 21 21
Fred Flintstone 0 9 10 19 0 9 10 19 19 19
Micky Mouse 8 0 8 16 8 0 8 16 16 16
Yogi Bear 7 7 0 14 7 7 0 14 14 14
To handle the columns dynamically you need to have a look at:
Cross Tabs and Pivots, Part 2 - Dynamic Cross Tabs by Jeff Moden.

Related

How to split the column values after a certain number

I have a dataset that looks like this:
ID HoursWorked TotalHours
23 1 1
23 1 2
23 1 3
23 0.5 3.5
23 1 4.5
23 1 5.5
23 1 6.5
23 1 7.5
23 1 8.5
61 1 1
61 1 2
What I want to do is if the total hours hits 8 hours, I want to split that row (e.g. 8.5 in the sample data above) so that an employee always has the total hours of 8. If someone works over 8 hours it should continue after hitting 8 in the totalhours column. For example, I want something like this as my final result.
ID HoursWorked TotalHours
23 1 1
23 1 2
23 1 3
23 0.5 3.5
23 1 4.5
23 1 5.5
23 1 6.5
23 1 7.5
23 0.5 8 *
23 0.5 8.5 *
61 1 1
61 1 2
As you can see the row which originally had 8.5 for its totalhours got broken down into two different rows.
I couldn't think of any way to do this in SQL Server. I'd appreciate any help on this.
see if this works.
select ID,HoursWorked,TotalHours from table_name where TotalHours <=8
union
select ID,(HoursWorked-(TotalHours-8) as HoursWorked ,8 as TotalHours from table_name where TotalHours >8
union
select ID,(TotalHours-8) as HoursWorked ,TotalHours from table_name where TotalHours >8
This seems rather complicated. This approach takes all the rows before 8 hours. It then finds the row that first passes 8 hours and splits that one as needed:
select id, hoursworked, totalhours
from t
where totalhours <= 8
union all
select t.id, v.hoursworked, v.totalhours
from (select t.*, row_number() over (partition by id order by totalhours) as seqnum
from t
where totalhours > 8
) t cross apply
(values (case when seqnum = 1 then totalhours - 8 end,
case when seqnum = 1 then 8 end
),
(case when seqnum = 1 and totalhours >= 8 then totalhours - 8 else hoursworked end,
totalhours
)
) v(hoursworked, totalhours)
where v.hoursworked > 0
order by id, totalhours;
Here is a db<>fiddle.

Fill in gaps in year sequence in SQL Server

I have a table with the columns Age, Period and Year. The column Age always starts with 0 and doesn't have a fixed maximum value (I used 'Age' 0 to 30 in this example but the range could also be 0 to 100 etc.), the values Period and Year only appear in certain rows at certain ages.
However at what Age the values for Period and Year appear, changes and the solution should therefore be dynamic. What is the best way to fill in the NULL values with correct Period and Year?
I am using SQL Server.
Age Period Year
-----------------
0 NULL NULL
1 NULL NULL
2 NULL NULL
3 NULL NULL
4 NULL NULL
5 NULL NULL
6 NULL NULL
7 NULL NULL
8 NULL NULL
9 NULL NULL
10 NULL NULL
11 NULL NULL
12 NULL NULL
13 NULL NULL
14 NULL NULL
15 NULL NULL
16 NULL NULL
17 NULL NULL
18 NULL NULL
19 NULL NULL
20 NULL NULL
21 46 2065
22 NULL NULL
23 NULL NULL
24 NULL NULL
25 NULL NULL
26 51 2070
27 NULL NULL
28 NULL NULL
29 NULL NULL
30 NULL NULL
The result should look like this, the numbers for Period and Year should be increased and/or decrease from the last known values for Period and Year.
Age Period Year
-----------------
0 25 2044
1 26 2045
2 27 2046
3 28 2047
4 29 2048
5 30 2049
6 31 2050
7 32 2051
8 33 2052
9 34 2053
10 35 2054
11 36 2055
12 37 2056
13 38 2057
14 39 2058
15 40 2059
16 41 2060
17 42 2061
18 43 2062
19 44 2063
20 45 2064
21 46 2065
22 47 2066
23 48 2067
24 49 2068
25 50 2069
26 51 2070
27 52 2071
28 53 2072
29 54 2073
30 55 2074
Here is an UPDATE to my question as I didn't specify my requirement detailed enough:
The solution should be able to handle different combinations of Age, Period and Year. My start point will always be a known Age, Period and Year combination. However, the combination Age = 21, Period = 46 and Year = 2065 (or 26|51|2070 as the second combination) in my example is not static. The value at Age = 21 could be anything e.g. Period = 2 and Year = 2021. Whatever the combination (Age, Period, Year) is, the solution should fill in the gaps and finish the sequence counting up and down from the known values for Period and Year. If a Period value sequence becomes negative the solutions should return NULL values, if possible.
Seem you have always the same increment for age and year
so
select age, isnull(period,age +25) Period, isnull(year,age+44) year
from yourtable
or the standard function coalesce (as suggested by Gordon Linoff)
select age, coalesce(period,age +25) Period, coalesce(year,age+44) year
from yourtable
Tabel creation code
create table yourtable ( AGE int , Period int, Year int )
insert into yourtable
Select 0 AS AGE , null As Period , null As Year UNION all
Select 1 AS AGE , null As Period , null As Year UNION all
Select 2 AS AGE , null As Period , null As Year UNION all
Select 3 AS AGE , null As Period , null As Year UNION all
Select 4 AS AGE , null As Period , null As Year UNION all
Select 5 AS AGE , null As Period , null As Year UNION all
Select 6 AS AGE , null As Period , null As Year UNION all
Select 7 AS AGE , null As Period , null As Year UNION all
Select 8 AS AGE , null As Period , null As Year UNION all
Select 9 AS AGE , null As Period , null As Year UNION all
Select 10 AS AGE , null As Period , null As Year UNION all
Select 11 AS AGE , null As Period , null As Year UNION all
Select 12 AS AGE , null As Period , null As Year UNION all
Select 13 AS AGE , null As Period , null As Year UNION all
Select 14 AS AGE , null As Period , null As Year UNION all
Select 15 AS AGE , null As Period , null As Year UNION all
Select 16 AS AGE , null As Period , null As Year UNION all
Select 17 AS AGE , null As Period , null As Year UNION all
Select 18 AS AGE , null As Period , null As Year UNION all
Select 19 AS AGE , null As Period , null As Year UNION all
Select 20 AS AGE , null As Period , null As Year UNION all
Select 21 AS AGE ,46 As Period ,2065 As Year UNION all
Select 22 AS AGE , null As Period , null As Year UNION all
Select 23 AS AGE , null As Period , null As Year UNION all
Select 24 AS AGE , null As Period , null As Year UNION all
Select 25 AS AGE , 51 As Period ,2070 As Year UNION all
Select 26 AS AGE , null As Period , null As Year UNION all
Select 27 AS AGE , null As Period , null As Year UNION all
Select 28 AS AGE , null As Period , null As Year UNION all
Select 29 AS AGE , null As Period , null As Year UNION all
Select 30 AS AGE , null As Period , null As Year
**Steps **
We need to get one row with non null value for Period and year.
Using age get first value for both the column .
Now just add respective age column value and fill full table .
Code to fix the serial
;with tmp as
(select top 1 * from yourtable where Period is not null and year is not null)
update yourtable
set Period = (tmp.Period - tmp.age) + yourtable.age
, year = (tmp.year - tmp.age) + yourtable.age
from yourtable , tmp
OR
Declare #age int ,#Year int ,#Period int
select #age = age , #Year = year - (age +1) ,#Period = Period- (AGE +1)
from yourtable where Period is not null and year is not null
update yourtable
set Period =#Period + age
,Year =#year + age
from yourtable
You finally want three sequences with different start values. Then you simply need to calculate an offset and add it to age:
with cte as
(
select age
,max(period - age) over () + age as period -- adjusted period
,max(yr - age) over () + age as yr -- adjusted yr
from #yourtable
)
select age
-- If a Period value sequence becomes negative the solutions should return NULL
,case when period >0 then period end as period
,yr
from cte
See fiddle
-- hope you can manage the syntax error. but some logic like given below should work in this case where we can make period an origin to calculate other missing values. good luck!
declare #knownperiod int;
declare #knownperiodage int;
declare #agetop int;
declare #agebottom int;
#knownperiod = select top 1 period from table1 where period is not null
#knownperiodage = select top 1 age from table1 where period is not null
while(#knownperiodage >= 0)
begin
#knownperiod = #knownperiod -1 ;
#knownperiodage = #knownperiodage -1;
update table1 set period = #knownperiod, year = YEAR(GetDate())+#knownperiod-1 where age = #knownperiodage
end
-- now for bottom age
#knownperiod = select top 1 period from table1 where period is null or year is null
#knownperiodage = select top 1 age from table1 where period is null or year is null
while(#knownperiodage <= (Select max(age) from table1))
begin
#knownperiod = #knownperiod +1 ;
#knownperiodage = #knownperiodage +1;
update table1 set period = #knownperiod, year = YEAR(GetDate())+#knownperiod-1 where age = #knownperiodage
end
Is the process to first calculate the increments (age -> period and age -> year) then simply add those increments to the age values?
This assumes the differences between age and period, and age and year, are consistent across rows (just not filled in sometimes).
As such, you could use the following to first calculate the increments (PeriodInc, YrInc) and then select the values with the increments added (noting that if period goes negative, it gets NULL).
; WITH PeriodInc AS (SELECT TOP 1 Period - Age AS PeriodInc FROM #yourtable WHERE Period IS NOT NULL),
YrInc AS (SELECT TOP 1 Yr - Age AS YrInc FROM #yourtable WHERE Yr IS NOT NULL)
SELECT Age,
CASE WHEN (Age + PeriodInc) >= 0 THEN (Age + PeriodInc) ELSE NULL END AS Period,
Age + YrInc AS Yr
FROM #yourtable
CROSS JOIN PeriodInc
CROSS JOIN YrInc
Here is a DB_Fiddle with the code
This solution takes 4 inputs:
#list_length -- (integer) the number of rows to generate (up to 12^5=248,832)
#start_age -- (integer) beginning age
#start_period -- (integer) beginning period
#start_year -- (integer) beginning year
For any combination of inputs this code generates the requested output. If either the Age or Year is calculated to be negative then it is converted to NULL. The current limit to the list length could be increased to whatever is necessary. The technique of creating a row_number using cross applied rows is known to be very fast when generating large sequences. Above about 500 rows it's always faster than a recursion based CTE. At small row numbers there's little to no performance difference between the two techniques.
Here are the code and output to match the example data.
Inputs
declare
#list_length int=31,
#start_age int=21,
#start_period int=46,
#start_year int=2065;
Code
with
n(n) as (select * from (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12)) v(n)),
tally_cte(n) as (
select row_number() over (order by (select null))
from n n1 cross join n n2 cross join n n3 cross join n n4 cross join n n5)
select p.Age,
case when p.[Period]<0 then null else p.[Period] end [Period],
case when p.[Year]<0 then null else p.[Year] end [Year]
from tally_cte t
cross apply
(select (t.n-1) [Age], (t.n-1)+(#start_period-#start_age) [Period],
(t.n-1)+(#start_year-#start_age) [Year]) p
where n<=#list_length;
Output
Age Period Year
0 25 2044
1 26 2045
2 27 2046
3 28 2047
4 29 2048
5 30 2049
6 31 2050
7 32 2051
8 33 2052
9 34 2053
10 35 2054
11 36 2055
12 37 2056
13 38 2057
14 39 2058
15 40 2059
16 41 2060
17 42 2061
18 43 2062
19 44 2063
20 45 2064
21 46 2065
22 47 2066
23 48 2067
24 49 2068
25 50 2069
26 51 2070
27 52 2071
28 53 2072
29 54 2073
30 55 2074
Suppose both the Period and the Year are less than the start Age. When the calculated values are negative the value is replaced with a NULL.
Inputs
declare
#list_length int=100,
#start_age int=10,
#start_period int=5,
#start_year int=8;
Output
Age Period Year
0 NULL NULL
1 NULL NULL
2 NULL 0
3 NULL 1
4 NULL 2
5 0 3
6 1 4
7 2 5
8 3 6
9 4 7
10 5 8
11 6 9
12 7 10
...
99 94 97
Imo this is a flexible and efficient way to meet all of the requirements. Please let me know if there are any issues.
This reads like a gaps-and-islands problem, where "empty" rows are the gaps and non-empty rows are the islands.
You want to fill the gaps. Your question is a bit tricky, because you do not clearly describe how to proceed when a gap row has both preceding and following islands - and what to do if they are not consistent.
Let me assume that you want to derive the value from the following island if there is one available, and fall back of the precedng island.
Here is an approach using lateral joins to retrieve the next and preceding non-empty row:
select t.age,
coalesce(t.period, n.period - n.diff, p.period - p.diff) period,
coalesce(t.year, n.year - n.diff, p.year - p.diff) year
from mytable t
outer apply (
select top (1) t1.*, t1.age - t.age diff
from mytable t1
where t1.age > t.age and t1.period is not null and t1.year is not null
order by t1.age
) n
outer apply (
select top (1) t1.*, t1.age - t.age diff
from mytable t1
where t1.age < t.age and t1.period is not null and t1.year is not null
order by t1.age desc
) p
order by t.age
Actually, this would probably be more efficiently performed with window functions. We can implement the very same logic by building groups of records with window counts, then doing the computation within the groups:
select
age,
coalesce(
period,
max(period) over(partition by grp2) - max(age) over(partition by grp2) + age,
max(period) over(partition by grp1) - min(age) over(partition by grp1) + age
) period,
coalesce(
year,
max(year) over(partition by grp2) - max(age) over(partition by grp2) + age,
max(year) over(partition by grp1) - min(age) over(partition by grp1) + age
) year
from (
select t.*,
count(period) over(order by age) grp1,
count(period) over(order by age desc) grp2
from mytable t
) t
order by age
Demo on DB Fiddle - both queries yield:
age | period | year
--: | -----: | ---:
0 | 25 | 2044
1 | 26 | 2045
2 | 27 | 2046
3 | 28 | 2047
4 | 29 | 2048
5 | 30 | 2049
6 | 31 | 2050
7 | 32 | 2051
8 | 33 | 2052
9 | 34 | 2053
10 | 35 | 2054
11 | 36 | 2055
12 | 37 | 2056
13 | 38 | 2057
14 | 39 | 2058
15 | 40 | 2059
16 | 41 | 2060
17 | 42 | 2061
18 | 43 | 2062
19 | 44 | 2063
20 | 45 | 2064
21 | 46 | 2065
22 | 47 | 2066
23 | 48 | 2067
24 | 49 | 2068
25 | 50 | 2069
26 | 51 | 2070
27 | 52 | 2071
28 | 53 | 2072
29 | 54 | 2073
30 | 55 | 2074
Also you can use recursive CTE (it can handle any variation of data in the table except only one that has no populated period and year at all):
WITH cte AS ( -- get any filled period and year
SELECT TOP 1 period - age delta,
[year]-period start_year
FROM tablename
WHERE period is not null and [year] is not null
), seq AS ( --get min and max age values
SELECT MIN(age) as min_age, MAX(age) as max_age
FROM tablename
), go_recursive AS (
SELECT min_age age,
min_age+delta period ,
start_year+min_age+delta year,
max_age
FROM seq
CROSS JOIN cte --That will generate the initial first row
UNION ALL
SELECT age + 1,
period +1,
year + 1,
max_age
FROM go_recursive
WHERE age < max_age --This part increments the data from first row
)
SELECT age,
period,
[year]
FROM go_recursive
OPTION (MAXRECURSION 0)
-- If you know there are some limit of rows in that kind of tables
--use this row count instead 0

Using the earliest date of a partition to determine what other dates belong to that partition

Assume this is my table:
ID DATE
--------------
1 2018-11-12
2 2018-11-13
3 2018-11-14
4 2018-11-15
5 2018-11-16
6 2019-03-05
7 2019-05-07
8 2019-05-08
9 2019-05-08
I need to have partitions be determined by the first date in the partition. Where, any date that is within 2 days of the first date, belongs in the same partition.
The table would end up looking like this if each partition was ranked
PARTITION ID DATE
------------------------
1 1 2018-11-12
1 2 2018-11-13
1 3 2018-11-14
2 4 2018-11-15
2 5 2018-11-16
3 6 2019-03-05
4 7 2019-05-07
4 8 2019-05-08
4 9 2019-05-08
I've tried using datediff with lag to compare to the previous date but that would allow a partition to be inappropriately sized based on spacing, for example all of these dates would be included in the same partition:
ID DATE
--------------
1 2018-11-12
2 2018-11-14
3 2018-11-16
4 2018-11-18
3 2018-11-20
4 2018-11-22
Previous flawed attempt:
Mark when a date is more than 2 days past the previous date:
(case when datediff(day, lag(event_time, 1) over (partition by user_id, stage order by event_time), event_time) > 2 then 1 else 0 end)
You need to use a recursive CTE for this, so the operation is expensive.
with t as (
-- add an incrementing column with no gaps
select t.*, row_number() over (order by date) as seqnum
from t
),
cte as (
select id, date, date as mindate, seqnum
from t
where seqnum = 1
union all
select t.id, t.date,
(case when t.date <= dateadd(day, 2, cte.mindate)
then cte.mindate else t.date
end) as mindate,
t.seqnum
from cte join
t
on t.seqnum = cte.seqnum + 1
)
select cte.*, dense_rank() over (partition by mindate) as partition_num
from cte;

SQLHow do I modify this query to select unique by hour

(Looking for a better title)
Hello I have the query below
Declare #CDT varchar(23)
Declare #CDT2 varchar(23)
set #cdt = '2016-01-18 00:00:00.000'
set #cdt2 = '2016-01-26 00:00:00.000'
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (5) and calldatetime between #cdt and #cdt2) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (124) and calldatetime between #cdt and #cdt2) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (576) and calldatetime between #cdt and #cdt2) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
I now need to select a unique number per hour rather than a distinct ammount overall.
for instance if I run this with the "select distinct(Agentid), rest of query here, it will give me a count of agentids, independant of the cases, how do I "WHEN AGENTID is unique"?
I copied examples from the original question
Project id Datetime Agentid
---------- ----------------------- ---------
5 11-23-2015 09:00:00.000 12
5 11-23-2015 10:00:00.000 12
6 11-23-2015 11:00:00.000 12
1 11-23-2015 12:00:00.000 3
3 11-23-2015 13:00:00.000 4
124 11-23-2015 14:00:00.000 7
124 11-23-2015 15:00:00.000 9
124 11-23-2015 16:00:00.000 10
576 11-23-2015 17:00:00.000 10
576 11-23-2015 18:00:00.000 44
576 11-23-2015 19:00:00.000 69
etc 11-23-2015 20:00:00.000 23
Expected output (Ignore the incorrect counts, assume they are correct from above^):
Datetime 5 124 576
------------- --- --- ---
09:00 - 09:59 0 4 5
10:00 - 10:59 4 3 1
11:00 - 11:59 5 2 1
12:00 - 12:59 1 1 1
13:00 - 13:59 6 1 1
14:00 - 14:59 6 1 1
15:00 - 15:59 7 1 2
16:00 - 16:59 8 1 3
17:00 - 17:59 9 1 3
18:00 - 18:59 1 1 2
19:00 - 19:59 12 1 0
20:00 - 20:59 0 0 0
so far
Hour of Day Project 5 Project 124 Project 576
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
7 0 0 0
8 0 0 0
9 0 0 0
10 0 0 0
11 0 0 0
I'm pretty sure you need to do this with subqueries:
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (5)) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (124)) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (576)) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
Here is the table used by these queries:
DECLARE #wt TABLE (
projectid varchar(4) not null,
edate datetime not null,
agentid int not null );
If you want to get the counts by time and project, use this query:
SELECT edate, projectid, COUNT(*) as nentries
FROM #wt
GROUP BY edate, projectid;
I haven't dealt with bucketing the dates by hour; that is a separate issue.
To get a tabular result set as you have shown:
SELECT edate, [5] AS [Project 5], [124] AS [Project 124], [576] AS [Project 576]
FROM (
SELECT edate, CAST(projectid AS int) AS projectid
FROM #wt
WHERE ISNUMERIC(projectid) <> 0 ) AS s
PIVOT (
COUNT(projectid)
FOR projectid IN ([5], [124], [576])) AS p;
Here is the result set for the PIVOT query using the above data:
However, you have to specify the projects of interest in the query. If you want to have an arbitrary number of projects and get columns for each one, that is going to require dynamic SQL to construct the PIVOT query.
#Tab Alleman: I added some data to illustrate the conditions that will test your scenario. Here is the result set with the same PIVOT query:

Count parts of total value as columns per row (pivot table)

I'm stuck with a seemingly easy query, but couldn't manage to get it working the last hours.
I have a table files that holds file names and some values like records in this file, DATE of creation (create_date), DATE of processing (processing_date) and so on. There can be multiple files for a create date in different hours and it is likely that they will not get processed in the same day of creaton, in fact it can even take up to three days or longer for them to get processed.
So let's assume I have these rows, as an example:
create_date | processing_date
------------------------------
2012-09-10 11:10:55.0 | 2012-09-11 18:00:18.0
2012-09-10 15:20:18.0 | 2012-09-11 13:38:19.0
2012-09-10 19:30:48.0 | 2012-09-12 10:59:00.0
2012-09-11 08:19:11.0 | 2012-09-11 18:14:44.0
2012-09-11 22:31:42.0 | 2012-09-21 03:51:09.0
What I want in a single query is to get a grouped column truncated to the day create_date with 11 additional columns for the differences between the processing_date and the create_date, so that the result should roughly look like this:
create_date | diff0days | diff1days | diff2days | ... | diff10days
------------------------------------------------------------------------
2012-09-10 | 0 2 1 ... 0
2012-09-11 | 1 0 0 ... 1
and so on, I hope you get the point :)
I have tried this and so far it works getting a single aggregated column for a create_date with a difference of - for example - 3:
SELECT TRUNC(f.create_date, 'DD') as created, count(1) FROM files f WHERE TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD') = 3 GROUP BY TRUNC(f.create_date, 'DD')
I tried combining the single queries and I tried sub-queries, but that didn't help or at least my knowledge about SQL is not sufficient.
What I need is a hint so that I can include the various differences as columns, like shown above. How could I possibly achieve this?
That's basically the pivoting problem:
SELECT TRUNC(f.create_date, 'DD') as created
, sum(case TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD')
when 0 then 1 end) as diff0days
, sum(case TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD')
when 1 then 1 end) as diff1days
, sum(case TRUNC(f.process_date, 'DD') - trunc(f.create_date, 'DD')
when 2 then 1 end) as diff2days
, ...
FROM files f
GROUP BY
TRUNC(f.create_date, 'DD')
SELECT CreateDate,
sum(CASE WHEN DateDiff(day, CreateDate, ProcessDate) = 1 THEN 1 ELSE 0 END) AS Diff1,
sum(CASE WHEN DateDiff(day, CreateDate, ProcessDate) = 2 THEN 1 ELSE 0 END) AS Diff2,
...
FROM table
GROUP BY CreateDate
ORDER BY CreateDate
As you are using Oracle 11g you can also get desired result by using pivot query.
Here is an example:
-- sample of data from your question
SQL> create table Your_table(create_date, processing_date) as
2 (
3 select '2012-09-10', '2012-09-11' from dual union all
4 select '2012-09-10', '2012-09-11' from dual union all
5 select '2012-09-10', '2012-09-12' from dual union all
6 select '2012-09-11', '2012-09-11' from dual union all
7 select '2012-09-11', '2012-09-21' from dual
8 )
9 ;
Table created
SQL> with t2 as(
2 select create_date
3 , processing_date
4 , to_date(processing_date, 'YYYY-MM-DD')
- To_Date(create_date, 'YYYY-MM-DD') dif
5 from your_table
6 )
7 select create_date
8 , max(diff0) diff0
9 , max(diff1) diff1
10 , max(diff2) diff2
11 , max(diff3) diff3
12 , max(diff4) diff4
13 , max(diff5) diff5
14 , max(diff6) diff6
15 , max(diff7) diff7
16 , max(diff8) diff8
17 , max(diff9) diff9
18 , max(diff10) diff10
19 from (select *
20 from t2
21 pivot(
22 count(dif)
23 for dif in ( 0 diff0
24 , 1 diff1
25 , 2 diff2
26 , 3 diff3
27 , 4 diff4
28 , 5 diff5
29 , 6 diff6
30 , 7 diff7
31 , 8 diff8
32 , 9 diff9
33 , 10 diff10
34 )
35 ) pd
36 ) res
37 group by create_date
38 ;
Result:
Create_Date Diff0 Diff1 Diff2 Diff3 Diff4 Diff5 Diff6 Diff7 Diff8 Diff9 Diff10
--------------------------------------------------------------------------------
2012-09-10 0 2 1 0 0 0 0 0 0 0 0
2012-09-11 1 0 0 0 0 0 0 0 0 0 1