SQL COUNT by each date with multiple date fields - sql

I have the following table and query which is not giving the correct result.
JOB table have Open, Finished and Closed dates.
Now I need to pull the count of Open, Finished and Closed Jobs between on the selected dates group by each dates and Location.
Please help me to get the result like in below expected result
+-------+-----------+------------+-----------+----------+
| JOB_id| DateOpen | DateFinish | DateClose | Location |
+-------+-----------+------------+-----------+----------+
| 100 | 16-Dec-18 | 18-Dec-18 | 19-Dec-18 | A |
| 101 | 16-Dec-18 | 18-Dec-18 | 19-Dec-18 | A |
| 102 | 17-Dec-18 | 19-Dec-18 | 20-Dec-18 | C |
| 103 | 10-Dec-18 | 11-Dec-18 | 16-Dec-18 | D |
| 104 | 17-Dec-18 | 19-Dec-18 | 18-Dec-18 | E |
+-------+-----------+------------+-----------+----------+
Query:
SELECT count(DateOpen) as Opened,
count(DateFinish) as Finised,
count(DateClose) as Closed,
(DateOpen) as Date
FROM JOBS
WHERE DateOpen BETWEEN '12/16/2018' AND DATEADD(DAY, 1, '12/17/2018')
group by DateOpen
Expected Result:
+-----------+------+----------+--------+----------+
| Date | Open | Finished | Closed | Location |
+-----------+------+----------+--------+----------+
| 16-Dec-18 | 2 | 0 | 0 | A |
| 16-Dec-18 | 0 | 0 | 1 | D |
| 17-Dec-18 | 1 | 0 | 0 | C |
| 17-Dec-18 | 1 | 0 | 0 | E |
+-----------+------+----------+--------+----------+

You could pull all open, finish and closed dates in a single column and left join your jobs table with it:
DECLARE #date1 AS DATE = '2018-12-16';
DECLARE #date2 AS DATE = '2018-12-17';
WITH dates(date) AS (
SELECT DateOpen FROM jobs
UNION
SELECT DateFinish FROM jobs
UNION
SELECT DateClose FROM jobs
)
SELECT dates.date
, Location
, COUNT(CASE WHEN dates.date = DateOpen THEN 1 END) AS Opened
, COUNT(CASE WHEN dates.date = DateFinish THEN 1 END) AS Finished
, COUNT(CASE WHEN dates.date = DateClose THEN 1 END) AS Closed
FROM dates
LEFT JOIN jobs ON dates.date IN (DateOpen, DateFinish, DateClose)
WHERE dates.date BETWEEN #date1 AND #date2
GROUP BY dates.date
, Location
Result:
| date | Location | Opened | Finished | Closed |
|------------|----------|--------|----------|--------|
| 16/12/2018 | A | 2 | 0 | 0 |
| 16/12/2018 | D | 0 | 0 | 1 |
| 17/12/2018 | C | 1 | 0 | 0 |
| 17/12/2018 | E | 1 | 0 | 0 |
Demo on DB Fiddle

You can use below query for the desired result set.
select coalesce(t1.date, t2.date, t3.date) as date, t1.Opened, t2.Finished, t3.Closed,
coalesce(t1.location, t2.location, t3.location) as location
from
(SELECT Convert(date,DateOpen) as Date ,count(JobID) as Opened, location
FROM JOBS
WHERE DateOpen BETWEEN '12/16/2018' AND DATEADD(DAY, 1, '12/17/2018')
group by Convert(date,DateOpen), location
) t1
Full join
(SELECT Convert(date,DateFinish) as Date ,count(JobID) as Finished, location
FROM JOBS
WHERE DateFinish BETWEEN '12/16/2018' AND DATEADD(DAY, 1, '12/17/2018')
group by Convert(date,DateFinish), location
) t2 ON(t1.date = t2.date and t1.location = t2.location)
FULL JOIN
(SELECT Convert(date,DateClose) as Date ,count(JobID) as Closed, location
FROM JOBS
WHERE DateClose BETWEEN '12/16/2018' AND DATEADD(DAY, 1, '12/17/2018')
group by Convert(date,DateClose), location
) t3 ON(t2.date = t3.date and t2.location = t3.location)

First, I would recommend you to stop storing dates as text and use proper data types.
To do what you need, make a list of all dates and locations by selecting each of the dates columns and location and union them into one result (first cte - allDates). Then we need a distinct list (second cte - aggregated) to select from, and count the number of rows from your table that match the current date/location values.
Here is the whole solution:
declare #JOBS table(JOB_ID int, DateOpen varchar(10), DateFinish varchar(10), DateClose varchar(10), Location varchar(5))
insert into #JOBS values
( 100, '16-Dec-18', '18-Dec-18', '19-Dec-18', 'A'),
( 101, '16-Dec-18', '18-Dec-18', '19-Dec-18', 'A'),
( 102, '17-Dec-18', '19-Dec-18', '20-Dec-18', 'C'),
( 103, '10-Dec-18', '11-Dec-18', '16-Dec-18', 'D'),
( 104, '17-Dec-18', '19-Dec-18', '18-Dec-18', 'E')
;with allDates as (
select convert(date, DateOpen) as [Date], Location from #JOBS
union
select convert(date, DateFinish), Location from #JOBS
union
select convert(date, DateClose), Location from #JOBS
),
aggregated as (
select [Date], Location
from allDates
group by [Date], Location
)
select
a.Date
, (select count(*) from #JOBS where a.[Date] = DateOpen and a.Location = Location) [Open]
, (select count(*) from #JOBS where a.[Date] = DateFinish and a.Location = Location) Finished
, (select count(*) from #JOBS where a.[Date] = DateClose and a.Location = Location) Closed
, Location
from aggregated a
where a.Date between '20181216' and '20181217'

You can use a case statement with sum,
SELECT Convert(date,DateOpen) as Date ,
sum(case when DateOpen =DateOpen then 1 else 0 end) as Opened,
sum(case when DateFinish=DateOpen then 1 else 0 end) as Finised,
sum(case when DateClose=DateOpen then 1 else 0 end) as Closed,
Location
FROM JOBS
WHERE DateOpen BETWEEN '12/16/2018' AND DATEADD(DAY, 1, '12/17/2018')
group by Convert(date,DateOpen),Location
UNION
SELECT Convert(date,DateClose) as Date ,
sum(case when DateOpen =DateClose then 1 else 0 end) as Opened,
sum(case when DateFinish=DateClose then 1 else 0 end) as Finised,
sum(case when DateClose=DateClose then 1 else 0 end) as Closed,
Location
FROM JOBS
WHERE DateClose BETWEEN '12/16/2018' AND DATEADD(DAY, 1, '12/16/2018')
group by Convert(date,DateClose),Location

DECLARE #startDate DATETIME = '12/16/2018'
DECLARE #endDate DATETIME = '12/17/2018'
SELECT
count(CASE when DateOpen BETWEEN #startDate AND #endDate THEN 1 end) as Opened,
count(CASE when DateFinish BETWEEN #startDate AND #endDate THEN 1 end) as Finised,
count(CASE when DateClose BETWEEN #startDate AND #endDate THEN 1 end) as Closed,
DateOpen as Date,
Location
FROM JOBS
WHERE DateOpen BETWEEN #startDate AND #endDate
group by DateOpen, Location

You can try the following code, using SUM function that sums the state of processes on inputed dates:
SELECT Convert(date1,DateOpen) as Date,
sum(case
when DateOpen = Convert(date1,DateOpen) then 1
else 0
end) as Open,
sum(case
when DateFinish = Convert(date1,DateOpen) then 1
else 0
end) as Finished,
sum(case
when DateClose = Convert(date1,DateOpen) then 1
else 0
end) as Closed,
Location
FROM JOBS
group by Location
UNION ALL
SELECT Convert(date2,DateOpen) as Date,
sum(case
when DateOpen = Convert(date2,DateOpen) then 1
else 0
end) as Open,
sum(case
when DateFinish = Convert(date2,DateOpen) then 1
else 0
end) as Finished,
sum(case
when DateClose = Convert(date2,DateOpen) then 1
else 0
end) as Closed,
Location
FROM JOBS
group by Location;
EDITED: date1 and date2 are input parameters.

You can give it a try using unpivot to see how the performance is
with cols_to_rows
as (
select *
from t
unpivot(col for val in (dateopen,datefinish,dateclose))m
)
select col
,location
,count(case when val='dateopen' then 1 end) as open1
,count(case when val='datefinish' then 1 end) as finish
,count(case when val='dateclose' then 1 end) as close1
from cols_to_rows
where col between cast('2018-12-16' as date)
and cast('2018-12-17' as date)
group by col,location,val
order by col,location
| 10/12/2018 00:00:00 | D | 1 | 0 | 0 |
| 11/12/2018 00:00:00 | D | 0 | 1 | 0 |
| 16/12/2018 00:00:00 | A | 2 | 0 | 0 |
| 16/12/2018 00:00:00 | D | 0 | 0 | 1 |
| 17/12/2018 00:00:00 | C | 1 | 0 | 0 |
| 17/12/2018 00:00:00 | E | 1 | 0 | 0 |
| 18/12/2018 00:00:00 | A | 0 | 2 | 0 |
| 18/12/2018 00:00:00 | E | 0 | 0 | 1 |
| 19/12/2018 00:00:00 | A | 0 | 0 | 2 |
| 19/12/2018 00:00:00 | C | 0 | 1 | 0 |
| 19/12/2018 00:00:00 | E | 0 | 1 | 0 |
| 20/12/2018 00:00:00 | C | 0 | 0 | 1 |
Here is a dbfiddle link
https://dbfiddle.uk/?rdbms=sqlserver_2012&fiddle=1ca0c0180a4d31d6e112e9f3b1b99715

Try this:
DECLARE #MinDate DATE = '12/16/2018',
#MaxDate DATE = '12/17/2018'
DECLARE #DateTable TABLE (DateOpen DATETIME)
INSERT INTO #DateTable
SELECT TOP (DATEDIFF(DAY, #MinDate, #MaxDate) + 1)
Date = DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY a.object_id) - 1, #MinDate)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;
;with cte
As
(
SELECT DISTINCT d.DateOpen,
SUM(CASE WHEN j.DateOpen =d.DateOpen THEN 1 ELSE 0 END) OVER(partition by d.DateOpen,Location) As [Open]
,SUM(CASE WHEN j.DateFinish =d.DateOpen THEN 1 ELSE 0 END) OVER(partition by d.DateOpen,Location) As [Finished]
,SUM(CASE WHEN j.DateClose =d.DateOpen THEN 1 ELSE 0 END) OVER(partition by d.DateOpen,Location) As [Closed]
,Location
FROM #DateTable d
CROSS JOIN JOBS j
)
Select * from cte where [Open]>0 or Finished>0 or Closed>0
Order by DateOpen,Location

Try this one
WITH CTE AS
(
SELECT *
FROM
(
SELECT Location
FROM T
GROUP BY Location
) L CROSS APPLY
(
SELECT DateOpen
FROM T
WHERE DateOpen BETWEEN '2018-12-16' AND '2018-12-18'
) D
GROUP BY Location, DateOpen
),
F AS
(
SELECT *,
(SELECT COUNT(1) FROM T WHERE Location = CTE.Location AND DateOpen = CTE.DateOpen) [Open],
(SELECT COUNT(1) FROM T WHERE Location = CTE.Location AND DateFinish = CTE.DateOpen)[Finish],
(SELECT COUNT(1) FROM T WHERE Location = CTE.Location AND DateClose = CTE.DateOpen) [Close]
FROM CTE
)
SELECT DateOpen,
[Open],
[Finish],
[Close],
Location
FROM F
WHERE [Open] > 0
OR
[Finish] > 0
OR
[Close] > 0
ORDER BY DateOpen
Returns:
+---------------------+------+--------+-------+----------+
| DateOpen | Open | Finish | Close | Location |
+---------------------+------+--------+-------+----------+
| 16/12/2018 00:00:00 | 2 | 0 | 0 | A |
| 16/12/2018 00:00:00 | 0 | 0 | 1 | D |
| 17/12/2018 00:00:00 | 1 | 0 | 0 | C |
| 17/12/2018 00:00:00 | 1 | 0 | 0 | E |
+---------------------+------+--------+-------+----------+
Demo

Related

Overlap SQL with group by

I have a question regarding overlap in SQL, I have the following structure and data from my database:
Table A (Id = uniqueidentifier)
| Name | StartDate | EndDate | DaysToReceive |
------------------------------------------------------
| A | 2019-08-26 | 2020-04-13 | 232 |
| A | 2019-12-15 | 2020-04-11 | 119 |
| A | 2020-03-06 | 2020-03-31 | 26 |
| B | 2020-01-07 | 2020-01-31 | 25 |
| B | 2020-02-11 | 2020-02-29 | 19 |
I need to get the days to receive, but if there is an overlap I need the difference between the min date and the max date otherwise I use the DaysToReceive Summed Column.
I'm trying to get the result to look like this:
| Name | DaysToReceive |
------------------------
A | 232
B | 44
I have managed to get this query but only works for overlap days.
select DATEDIFF(d, MIN(t1.dt),MAX(t1.enddt)) + 1 as DaysToReceive
from (
select distinct cp1.dt, min(cp2.dt) enddt
from ( select StartDate as dt, Id from TableA ) cp1,
( select EndDate as dt from TableA ) cp2
where cp2.dt > cp1.dt cp1.Id = cp2.Id
group by cp1.dt
) t1, TableA t2
where t2.StartDate between t1.dt and t1.enddt
group by t1.dt, t1.enddt
Thanks in advance.
Cheers
Check this
Select [name], Case when [InRange] = 1
then Max(DateDiff(dd, MinStartdate, MaxEnddate) + 1)
Else
Sum(DateDiff(dd, Startdate, Enddate) + 1)
End as [Days]
from
(
Select Distinct a.[name], StartDate, EndDate, MinStartdate, MaxEnddate,
Case when StartDate > MinStartdate and EndDate < MaxEnddate or
(StartDate = MinStartdate and EndDate = MaxEnddate) then
1 Else 0
End as [InRange]
from
(
SELECT [name],
Min(StartDate) AS MinStartdate, Max(EndDate) AS MaxEnddate
FROM A
Group By [Name]
) Q
inner join A a
on a.[name] = q.[name]
) QQ
and here is the fiddle

Psql - generate series with running total

I have the following table:
create table account_info(
id int not null unique,
creation_date date,
deletion_date date,
gather boolean)
Adding sample data to it:
insert into account_info(id,creation_date,deletion_date,gather)
values(1,'2019-09-10',null,true),
(2,'2019-09-12',null,true),
(3,'2019-09-14','2019-10-08',true),
(4,'2019-09-15','2019-09-18',true),
(5,'2019-09-22',null,false),
(6,'2019-09-27','2019-09-29',true),
(7,'2019-10-04','2019-10-17',false),
(8,null,'2019-10-20',true),
(9,'2019-10-12',null,true),
(10,'2019-10-18',null,true)
I would like to see how many accounts have been added grouped by week and how many accounts have been deleted grouped by week.
I have tried the following:
select dd, count(distinct ai.id) as created ,count(distinct ai2.id) as deleted
from generate_series('2019-09-01'::timestamp,
'2019-10-21'::timestamp, '1 week'::interval) dd
left join account_info ai on ai.creation_date::DATE <= dd::DATE
left join account_info ai2 on ai2.deletion_date::DATE <=dd::DATE
where ai.gather is true
and ai2.gather is true
group by dd
order by dd asc
This produces the following output:
dd | Created | Deleted |
+------------+---------+---------+
| 2019-09-22 | 4 | 1 |
| 2019-09-29 | 5 | 2 |
| 2019-10-06 | 5 | 2 |
| 2019-10-13 | 6 | 3 |
| 2019-10-20 | 7 | 4 |
This output shows me the the running total of how many have been created and how many been deleted.
I would like to see however something like this:
+------------+---------+---------+-------------------+-------------------+
| dd | Created | Deleted | Total Sum Created | Total Sum Deleted |
+------------+---------+---------+-------------------+-------------------+
| 2019-09-22 | 4 | 1 | 4 | 1 |
| 2019-09-29 | 1 | 1 | 5 | 2 |
| 2019-10-06 | NULL | NULL | 5 | 2 |
| 2019-10-13 | 1 | 1 | 6 | 3 |
| 2019-10-20 | 1 | 1 | 7 | 4 |
I get an error message, when trying to sum up the created and deletedcolumns in psql. As I cannot nest aggregate functions.
You could just turn your existing query to a subquery and use lag() to compute the difference between consecutive records:
select
dd,
created - coalesce(lag(created) over(order by dd), 0) created,
deleted - coalesce(lag(deleted) over(order by dd), 0) deleted,
created total_sum_created,
deleted total_sum_deleted
from (
select
dd,
count(distinct ai.id) as created ,
count(distinct ai2.id) as deleted
from
generate_series(
'2019-09-01'::timestamp,
'2019-10-21'::timestamp,
'1 week'::interval
) dd
left join account_info ai
on ai.creation_date::DATE <= dd::DATE and ai.gather is true
left join account_info ai2
on ai2.deletion_date::DATE <=dd::DATE and ai2.gather is true
group by dd
) x
order by dd asc
I moved conditions ai[2].gather = true to the on side of the join: putting these conditions in the where clause basically turns you left joins to inner joins.
Demo on DB Fiddle:
| dd | created | deleted | total_sum_created | total_sum_deleted |
| ------------------------ | ------- | ------- | ----------------- | ----------------- |
| 2019-09-01T00:00:00.000Z | 0 | 0 | 0 | 0 |
| 2019-09-08T00:00:00.000Z | 0 | 0 | 0 | 0 |
| 2019-09-15T00:00:00.000Z | 4 | 0 | 4 | 0 |
| 2019-09-22T00:00:00.000Z | 0 | 1 | 4 | 1 |
| 2019-09-29T00:00:00.000Z | 1 | 1 | 5 | 2 |
| 2019-10-06T00:00:00.000Z | 0 | 0 | 5 | 2 |
| 2019-10-13T00:00:00.000Z | 1 | 1 | 6 | 3 |
| 2019-10-20T00:00:00.000Z | 1 | 1 | 7 | 4 |
Another option would be to use lag() in combination with generate_series() to generate a list of date ranges. Then you can do just one join on the original table, and do conditional aggregation in the outer query:
select
dd,
count(distinct case
when ai.creation_date::date <= dd::date and ai.creation_date::date > lag_dd::date
then ai.id
end) created,
count(distinct case
when ai.deletion_date::date <= dd::date and ai.deletion_date::date > lag_dd::date
then ai.id
end) deleted,
count(distinct case
when ai.creation_date::date <= dd::date
then ai.id
end) total_sum_created,
count(distinct case
when ai.deletion_date::date <= dd::date
then ai.id
end) total_sum_deleted
from
(
select dd, lag(dd) over(order by dd) lag_dd
from generate_series(
'2019-09-01'::timestamp,
'2019-10-21'::timestamp,
'1 week'::interval
) dd
) dd
left join account_info ai on ai.gather is true
group by dd
order by dd
Demo on DB Fiddle
A lateral join and aggregation is soooo well suited to this problem. If you are content with the weeks in the data:
select date_trunc('week', dte) as week,
sum(is_create) as creates_in_week,
sum(is_delete) as deletes_in_week,
sum(sum(is_create)) over (order by min(v.dte)) as running_creates,
sum(sum(is_delete)) over (order by min(v.dte)) as running_deletes
from account_info ai cross join lateral
(values (ai.creation_date, 1, 0), (ai.deletion_date, 0, 1)
) v(dte, is_create, is_delete)
where v.dte is not null and ai.gather
group by week
order by week;
If you want it for a specified set of weeks:
select gs.wk,
sum(v.is_create) as creates_in_week,
sum(v.is_delete) as deletes_in_week,
sum(sum(v.is_create)) over (order by min(v.dte)) as running_creates,
sum(sum(v.is_delete)) over (order by min(v.dte)) as running_deletes
from generate_series('2019-09-01'::timestamp,
'2019-10-21'::timestamp, '1 week'::interval) gs(wk) left join
( account_info ai cross join lateral
(values (ai.creation_date, 1, 0), (ai.deletion_date, 0, 1)
) v(dte, is_create, is_delete)
)
on v.dte >= gs.wk and
v.dte < gs.wk + interval '1 week'
where dte is not null and ai.gather
group by gs.wk
order by gs.wk;
Here is a db<>fiddle.
You can generate the results you want using a series of CTEs to build up the data tables:
with dd as
(select *
from generate_series('2019-09-01'::timestamp,
'2019-10-21'::timestamp, '1 week'::interval) d),
ddl as
(select d, coalesce(lag(d) over (order by d), '1970-01-01'::timestamp) as pd
from dd),
counts as
(select d, count(distinct ai.id) as created, count(distinct ai2.id) as deleted
from ddl
left join account_info ai on ai.creation_date::DATE > ddl.pd::DATE AND ai.creation_date::DATE <= ddl.d::DATE AND ai.gather is true
left join account_info ai2 on ai2.deletion_date::DATE > ddl.pd::DATE AND ai2.deletion_date::DATE <= ddl.d::DATE AND ai2.gather is true
group by d)
select d, created, deleted,
sum(created) over (rows unbounded preceding) as "total created",
sum(deleted) over (rows unbounded preceding) as "total deleted"
from counts
order by d asc
Note that the gather condition needs to be part of the left join to avoid turning those into inner joins.
Output:
d created deleted total created total deleted
2019-09-01 00:00:00 0 0 0 0
2019-09-08 00:00:00 0 0 0 0
2019-09-15 00:00:00 4 0 4 0
2019-09-22 00:00:00 0 1 4 1
2019-09-29 00:00:00 1 1 5 2
2019-10-06 00:00:00 0 0 5 2
2019-10-13 00:00:00 1 1 6 3
2019-10-20 00:00:00 1 1 7 4
Note this query gives the results for the week ending with d. If you want results for the week starting with d, the lag can be changed to lead. You can see this in my demo.
Demo on dbfiddle

Select Latest 3 records

Using SQL Server 2014. I have data that lists a Unique Identifier, a Sale Data and a Sale Price. I would like to extract into a VIEW the last 3 sales for each unique Id.
Example of data:
+------+-----------+------------+-------------+
| ID | UNIQUE_ID | SaleDate | SalePrice |
+------+-----------+------------+-------------+
| 8210 | 1-5 | 2015-09-29 | 0 |
| 8211 | 1-6 | 2016-11-01 | 485672 |
| 8212 | 1-7 | 1994-06-24 | 120000 |
| 8213 | 1-1 | 1996-09-06 | 170000 |
| 8214 | 1-1 | 2000-01-28 | 265000 |
| 8215 | 1-1 | 2013-10-02 | 305000 |
| 8216 | 1-1 | 2015-11-20 | 1425000 |
| 8217 | 1-3 | 1994-01-12 | 1 |
| 8218 | 1-3 | 2001-04-30 | 1 |
| 8219 | 1-3 | 2004-09-30 | 0 |
+------+-----------+------------+-------------+
The result in the view would list each Unique ID and then 6 fields:
SaleDate1
SalePrice1
SaleDate2
SalePrice2
SaleDate3
SalePrice3
Any hints appreciated.
You can use row_number() :
SELECT t.*
FROM (SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY UNIQUE_ID ORDER BY SaleDate DESC, SalePrice DESC) AS Seq
FROM table t
) t
WHERE Seq <= 3;
You can use a window function to filter data and then conditional aggregation to get the 6 columns you need:
declare #tmp table(ID int, UNIQUE_ID varchar(50), SaleDate date, SalePrice int)
insert into #tmp values
(8210, '1-5','2015-09-29', 0 )
,(8211, '1-6','2016-11-01', 485672 )
,(8212, '1-7','1994-06-24', 120000 )
,(8213, '1-1','1996-09-06', 170000 )
,(8214, '1-1','2000-01-28', 265000 )
,(8215, '1-1','2013-10-02', 305000 )
,(8216, '1-1','2015-11-20', 1425000)
,(8217, '1-3','1994-01-12', 1 )
,(8218, '1-3','2001-04-30', 1 )
,(8219, '1-3','2004-09-30', 0 )
SELECT t.UNIQUE_ID
,max(case when t.Seq = 1 then SaleDate else null end) as SaleDate1
,sum(case when t.Seq = 1 then SalePrice else null end) as SalePrice1
,max(case when t.Seq = 2 then SaleDate else null end) as SaleDate2
,sum(case when t.Seq = 2 then SalePrice else null end) as SalePrice2
,max(case when t.Seq = 3 then SaleDate else null end) as SaleDate3
,sum(case when t.Seq = 3 then SalePrice else null end) as SalePrice3
FROM (SELECT x.*,
ROW_NUMBER() OVER (PARTITION BY UNIQUE_ID
ORDER BY SaleDate DESC, SalePrice DESC) AS Seq
FROM #tmp x
) t
WHERE t.Seq < 4
group by t.UNIQUE_ID
Results:
The following query return the 3 most recent sold rows of each item
select * from
(
select UNIQUE_ID,SaleDate,SalePrice,rank() over (partition by UNIQUE_ID order by SaleDate desc) as rnk
from salestable
) where rnk<4

Select records from database in specific date period

Ok I have this example table:
+-------+--------+-----------+
| users | groups | startDate |
+-------+--------+-----------+
| Foo | A | 1 Aug 18 |
| Foo | B | 1 Jan 18 |
| Boo | C | 1 Jan 18 |
| Doo | B | 1 Jan 18 |
| Loo | B | 1 Sep 18 |
+-------+--------+-----------+
and I want to select (Group B) users with "startDate" not higher than today and also without any other records for other groups in more recent "startDate" also not higher than today, so the correct result should be:
+-------+--------+-----------+
| users | groups | startDate |
+-------+--------+-----------+
| Doo | B | 1 Jan 18 |
+-------+--------+-----------+
I tried the following code but didn't get what I need:
DECLARE #StartDate date = '2018-08-01'
DECLARE #GroupID varchar(1) = 'B';
WITH CurrentUsers AS (SELECT users, groups, startDate, ROW_NUMBER() OVER(PARTITION BY users ORDER BY CASE WHEN startDate>#StartDate THEN 0 ELSE 1 END, ABS(DATEDIFF(DAY, #StartDate, startDate)) ASC) AS RowNum FROM usersTable) SELECT users FROM CurrentUsers WHERE groups=#GroupID AND RowNum = 1
If I understand correctly, you seem to want:
select user
from currentusers cu
group by user
having sum(case when groups = #GroupID then 1 else 0 end) > 0 and -- in Group B
max(startdate) < #StartDate;
EDIT:
The above is based on a misunderstanding. You want people whose active group is today. I think you want:
WITH CurrentUsers AS (
SELECT users, groups, startDate,
ROW_NUMBER() OVER (PARTITION BY users
ORDER BY startDate DESC
) as seqnum
FROM usersTable
WHERE startDate <= #StartDate
)
SELECT users
FROM CurrentUsers
WHERE groups=#GroupID AND seqnum = 1;

doing a simple pivot on year

I have a table:
+----+-------+------+
| id | times | year |
+----+-------+------+
| 5 | 2 | 2008 |
| 6 | 76 | 2008 |
| 2 | 43 | 2009 |
| 4 | 5 | 2009 |
| 1 | 3 | 2010 |
| 9 | 6 | 2010 |
| 7 | 444 | 2011 |
| 8 | 3 | 2011 |
| 3 | 65 | 2012 |
+----+-------+------+
I would like to create a pivot out of this table which buckets times per year :
+--------+------+------+------+------+------+
| | 2008 | 2009 | 2010 | 2011 | 2012 |
+--------+------+------+------+------+------+
| 0 | | | | | |
| 1-30 | 1 | 1 | 2 | 1 | |
| 31-60 | | 1 | | | |
| 61-90 | 1 | | | | 1 |
| 91-120 | | | | | |
| 121+ | | | | 1 | |
+--------+------+------+------+------+------+
how do i start to tackle this challenge with sql? thank you so much for your guidance.
You can use the sql server PIVOT function for this. If you know the all of the values for the years as well as the buckets then you can hard-code the query:
select *
from
(
select
case
when times = 0 then '0'
when times >= 1 and times <=30 then '1-30'
when times >= 31 and times <=60 then '31-60'
when times >= 61 and times <=90 then '61-90'
when times >= 91 and times <=120 then '91-120'
else '121+' end bucket,
year
from yourtable
) src
pivot
(
count(year)
for year in ([2008], [2009], [2010], [2011], [2012])
) piv;
See SQL Fiddle with Demo
If you don't have access to the PIVOT function then you can use an aggregate function with a CASE:
select bucket,
sum(case when year = 2008 then 1 else 0 end) [2008],
sum(case when year = 2009 then 1 else 0 end) [2009],
sum(case when year = 2010 then 1 else 0 end) [2010],
sum(case when year = 2011 then 1 else 0 end) [2011],
sum(case when year = 2012 then 1 else 0 end) [2012]
from
(
select
case
when times = 0 then '0'
when times >= 1 and times <=30 then '1-30'
when times >= 31 and times <=60 then '31-60'
when times >= 61 and times <=90 then '61-90'
when times >= 91 and times <=120 then '91-120'
else '121+' end bucket,
year
from yourtable
) src
group by bucket
See SQL Fiddle with Demo
If you need all of the buckets to be listed, then you will want to have the bucket ranges stored in either a table or using a CTE query, then you can use the following:
;with buckets(startbucket, endbucket, rnk) as
(
select 0, 0, 1
union all
select 1, 30, 2
union all
select 31, 60, 3
union all
select 61, 90, 4
union all
select 91, 120, 5
union all
select 121, null, 6
)
select
case when startbucket = 0 then '0'
when endbucket is null then cast(startbucket as varchar(50)) + '+'
else cast(startbucket as varchar(50)) + '-'+cast(endbucket as varchar(50)) end buckets,
[2008], [2009], [2010], [2011], [2012]
from
(
select rnk,
year,
startbucket,
endbucket
from buckets b
left join yourtable t
on t.times >= b.startbucket and t.times <= coalesce(b.endbucket, 100000)
) src
pivot
(
count(year)
for year in ([2008], [2009], [2010], [2011], [2012])
) piv;
See SQL Fiddle with Demo
Result:
| BUCKETS | 2008 | 2009 | 2010 | 2011 | 2012 |
----------------------------------------------
| 0 | 0 | 0 | 0 | 0 | 0 |
| 1-30 | 1 | 1 | 2 | 1 | 0 |
| 31-60 | 0 | 1 | 0 | 0 | 0 |
| 61-90 | 1 | 0 | 0 | 0 | 1 |
| 91-120 | 0 | 0 | 0 | 0 | 0 |
| 121+ | 0 | 0 | 0 | 1 | 0 |
The above will work great if you have a known number of values (years) that you need to transpose. If you have an unknown number then you will want to implement dynamic sql, similar to this:
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT distinct ',' + QUOTENAME(year)
from yourtable
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'with buckets(startbucket, endbucket, rnk) as
(
select 0, 0, 1
union all
select 1, 30, 2
union all
select 31, 60, 3
union all
select 61, 90, 4
union all
select 91, 120, 5
union all
select 121, null, 6
)
select
case when startbucket = 0 then ''0''
when endbucket is null then cast(startbucket as varchar(50)) + ''+''
else cast(startbucket as varchar(50)) + ''-''+cast(endbucket as varchar(50)) end buckets,
'+#cols+'
from
(
select rnk,
year,
startbucket, endbucket
from buckets b
left join yourtable t
on t.times >= b.startbucket and t.times <= coalesce(b.endbucket, 100000)
) src
pivot
(
count(year)
for year in ('+#cols+')
) piv;'
execute(#query)
See SQL Fiddle with Demo
The result will be the same for both the static (hard-coded) version and the dynamic version.
Darn it! Bluefeet beat me to it. My attempt is similar but uses a table to configure the buckets.
CREATE TABLE Bucket
(
id int,
minbound int,
maxbound int
)
INSERT INTO Bucket VALUES(1, 0, 30)
,(2, 31, 60)
,(3, 61, 90)
,(4, 91, 120)
,(5, 121, null)
Then one can calculate the bucket for each record in a CTE like so....
;WITH RecordBucket
AS
(
SELECT
r.*,
b.id as bucketid
FROM
Record r
INNER JOIN Bucket b ON r.times BETWEEN b.minbound and ISNULL(b.maxbound, 20000000)
)
...and outer join back to the buckets for the final query to allow ordering and empty buckets to be included:
select
b.id as BucketId,
CASE
WHEN b.maxbound IS NULL THEN CONVERT(VARCHAR(16), b.minbound) + '+'
ELSE CONVERT(VARCHAR(16), b.minbound) + ' - ' + CONVERT(VARCHAR(16), b.maxbound)
END as BucketName,
[2008],[2009],[2010],[2011]
from
Bucket b
LEFT JOIN
(
SELECT
bucketid,
times,
year
from
RecordBucket
) rb
pivot (count(times) for year in ([2008],[2009],[2010],[2011]))
as pvt ON b.id = pvt.bucketid
order by
bucketid