SQL self join pairwise - sql

Suppose I have a table consisting of entries like
ID Arrival Date Arrival City Departure Date Departure City
1 Jun 27 2015 Berlin Jun 20 2015 Paris
1 Jul 1 2015 Rome Jun 29 2015 Berlin
1 Jul 30 2015 Vienna Jul 15 2015 Rome
2 Jun 28 2015 Prague Jun 23 2015 Vienna
2 Jul 1 2015 Rome Jun 29 2015 Prague
2 Jul 30 2015 Vienna Jul 15 2015 Moscow
...
and for each ID I want to join this data on itself such that observations with subsequent Departure Date and Arrival Date are grouped pairwise - i.e. a departure is paired with the previous arrival for each ID.
In the example above (where the observations are sorted for convenience) the 2nd row would be appended to the 1st, the 3rd to the 2nd, the 5th to the 4th and the 6th to the 5th (thus producing 4 rows with fields ID Arrival Date Arrival City Departure Date Departure City Arrival Date2 Arrival City2 Departure Date2 Departure City2).
There could potentially be more than three departures for each ID so a general approach is required. Also please note that there can be holes in the data where Arrival City and Departure City does not match - e.g. the Arrival City of the 5th row is not the Departure City of the 6th row but they should still be merged. In fact a major goal is to get a better view of how many holes there are in the data.

A solution is to use a CTE and consider that the difference between two consecutive rows (identified by the rowno) is 1 all the time (and also consider the dates):
;WITH CTE AS (
SELECT
rownum = ROW_NUMBER() OVER (ORDER BY t.ID, t.arrivalDate),
t.ID,
t.arrivalDate,
t.arrivalCity,
t.departureDate,
t.departureCity
FROM #test t
)
SELECT *
FROM CTE c1
JOIN CTE c2
ON c1.ID = c2.ID
AND c2.departureDate > c1.arrivalDate
AND c2.rownum - c1.rownum = 1
GO
-- structure of the #test table
CREATE TABLE #test (
ID int,
arrivalDate date,
arrivalCity varchar(30),
departureDate date,
departureCity varchar(30)
)
SQL fiddle here: SQLFiddle

Try this:
SELECT a.id
,a.arrival_date
,a.arrival_city
,a.departure_date
,a.departure_city
,b.arrival_date arrival_date_2
,b.arrival_city arrival_city_2
,b.departure_date departure_date_2
,b.departure_city departure_city_2
FROM triptable a
JOIN triptable b ON a.id = b.id
AND a.departure_date = (SELECT min(departure_date) FROM so34815894 x WHERE x.departure_date > b.arrival_date AND x.id = b.id)
Edited based on your comment to:
find the record with the earliest departure date after the previous record's
arrival date, and
ignore the fact that the sample data's 6th record has a different
departure city than the 5th record's arrival city.

Not entirely sure what result set your looking for.. but I thought I'd give this a shot and see if any of these help you out.
drop table #t1
create table #t1 (id int, ArrivalDate datetime, ArrivalCity varchar(50), Departuredate datetime, DepartureCity varchar(50))
insert into #t1
values (1, 'Jun 27 2015', 'Berlin', 'Jun 20 2015','Paris'),
(1, 'Jul 1 2015', 'Rome','Jun 29 2015','Berlin'),
(1, 'Jul 30 2015', 'Vienna','Jul 15 2015','Rome'),
(2, 'Jun 28 2015','Prague','Jun 23 2015','Vienna'),
(2, 'Jul 1 2015','Rome','Jun 29 2015','Prague'),
(2, 'Jul 30 2015','Vienna','Jul 15 2015','Moscow')
select *, case when lead(departurecity) over (partition by id order by Arrivaldate) = ArrivalCity or lead(departurecity) over (partition by id order by Arrivaldate) is null then 1 else 0 end as PairID into #t2 from #t1
update #t2
set PairID = id
where pairid != id
and pairid != 0
This is the code to start up..
select * from #t2
will result in:
id ArrivalDate ArrivalCity Departuredate DepartureCity PairID
1 2015-06-27 Berlin 2015-06-20 Paris 1
1 2015-07-01 Rome 2015-06-29 Berlin 1
1 2015-07-30 Vienna 2015-07-15 Rome 1
2 2015-06-28 Prague 2015-06-23 Vienna 2
2 2015-07-01 Rome 2015-06-29 Prague 0
2 2015-07-30 Vienna 2015-07-15 Moscow 2
Any location where the pair id = 0 ... you have a gap/baddata however you want to put it..
You could also:
select *, lead(departurecity) over (partition by ID order by ArrivalDate) as PreviousDepartureCity, lead(Departuredate) over (partition by ID order by ArrivalDate) as PreviousDepartureDate from #t2
This will add previous departure city and date.. and you can do what you want with the nulls.. they will signify the first flight.. or a gap if the subsequent pair id = 0 ...
The select options become endless.... if null and lag(pairid) = 0 then you have the row with the gap.. if null and pair id = id.. and lag(pairid) = id then you have your first flight..
I mean I can keep going.. and give you more details but I'm not sure this is what your looking for.. Hope it helped anyway..
Good luck!
P.S Didn't see why you needed to join the table to itself .. maybe I missed the whole point..lol..sorry if that's the case..

It sounds to me like you're wanting to pivot the results and put the results in additional columns. I used ROW_NUMBER() for the ordering. I concatenated the columns in a row prior to the pivot, pivoted, then used a function to reverse the concatenation.
SELECT
p.ID,
dbo.SplitString(p.[1], CHAR(13), 1) AS arrivalDate1,
dbo.SplitString(p.[1], CHAR(13), 2) AS arrivalCity1,
dbo.SplitString(p.[1], CHAR(13), 3) AS departureDate1,
dbo.SplitString(p.[1], CHAR(13), 4) AS departureCity1,
*
FROM
(
SELECT *
FROM
(
SELECT
ID,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY arrivalDate) RowNum,
CAST(arrivalDate AS VARCHAR(MAX)) + CHAR(13)
+ arrivalCity + CHAR(13)
+ CAST(departureDate AS VARCHAR(MAX)) + CHAR(13)
+ departureCity TripDetails
FROM trip t
) t
PIVOT (MIN(t.TripDetails) FOR t.RowNum IN ([1], [2], [3], [4], [5] /* , ... */)) p
) p;
using this SplitString function
CREATE FUNCTION dbo.SplitString (
#stringToSplit VARCHAR(MAX),
#delim VARCHAR(255),
#occurence INT )
RETURNS VARCHAR(MAX) AS
BEGIN
DECLARE #name NVARCHAR(255);
DECLARE #pos INT;
DECLARE #orderNum INT;
SET #orderNum=0;
WHILE CHARINDEX(#delim, #stringToSplit) > 0
BEGIN
SELECT #orderNum=#orderNum+1;
SELECT #pos = CHARINDEX(#delim, #stringToSplit) ;
SELECT #name = SUBSTRING(#stringToSplit, 1, #pos-1);
IF #orderNum = #occurence
BEGIN
RETURN #name;
END
SELECT #stringToSplit = SUBSTRING(#stringToSplit, #pos+1, LEN(#stringToSplit)-#pos)
END
SELECT #orderNum=#orderNum+1;
IF #orderNum = #occurence
BEGIN
RETURN #stringToSplit;
END
RETURN NULL;
END

This should work:
with cte as(select *, row_number() over(partition by id order by date) rn from table)
select * from cte c1
join cte c2 on c1.id = c2.id and c1.rn = c2.rn - 1

try this,
declare #t table(ID int,ArrivalDate datetime, ArrivalCity varchar(50)
,DepartureDate datetime,DepartureCity varchar(50))
insert into #t values
(1, 'Jun 27 2015', 'Berlin', 'Jun 20 2015', 'Paris ')
,(1, 'Jul 1 2015 ', 'Rome ', 'Jun 29 2015', 'Berlin ')
,(1, 'Jul 30 2015', 'Vienna', 'Jul 15 2015', 'Rome ')
,(2, 'Jun 28 2015', 'Prague', 'Jun 23 2015', 'Vienna ')
,(2, 'Jul 1 2015 ', 'Rome ', 'Jun 29 2015', 'Prague ')
,(2 , 'Jul 30 2015', 'Vienna', 'Jul 15 2015', 'Moscow ')
;WITH CTE
AS (
SELECT *
,ROW_NUMBER() OVER (
ORDER BY id
,arrivaldate
) rn
FROM #t
)
SELECT A.arrivaldate
,a.arrivalcity
,a.DepartureDate
,a.DepartureCity
,b.arrivaldate
,b.arrivalcity
,b.DepartureDate
,b.DepartureCity
FROM CTE A
LEFT JOIN CTE b ON a.rn + 1 = b.rn

Related

Rolling total in SQL that Resets to 0 when going above 90

First time post. Learning SQL over the past 6 months so help is appreciated. I have data structured as below:
DECLARE #tmp4 as TABLE (
AccountNumber int,
Date date,
DateRank int
)
INSERT INTO #tmp4
VALUES (001, '11/13/2018' , 1)
, (002, '12/19/2018', 2)
, (003, '1/23/2019' , 3)
, (004, '2/5/2019' , 4)
, (005, '3/10/2019' , 5)
, (006, '3/20/2019' , 6)
, (007, '4/8/2019' , 7)
, (008, '5/20/2019' , 8)
What I need to do with this data is calculate a rolling total that resets to 0 once a threshold of 90 days is reached. I have used the DateDiff function to calculate the DateDiffs between consecutive dates and have tried multiple things using LAG and other window functions but can't make it reset. The goal is to find "index visits" which can only occur once every 90 days. So my plan is to have a field that reads 0 on the first visit and resets to 0 for the next stay after 90 days is up from the first visit then only pull visits with a value of 0.
One solution I tried was correct for most sets but did not return the right values for the above set (rows 4 and 8 should start over as "index visits").
The results I would expect for this query would be:
Account Date DateRank RollingTotal
001 |'11/13/2018' | 1 | 0
002 |'12/19/2018' | 2 | 35
003 |'1/23/2019' | 3 | 71
004 |'2/5/2019' | 4 | 84
005 |'3/10/2019' | 5 | 0 (not 117)
006 |'3/20/2019' | 6 | 10
007 |'4/8/2019' | 7 | 29
008 |'5/20/2019' | 8 | 71
Thanks for any help.
Here's the code I tried:
DECLARE #tmp2 as TABLE
(EmrNumber varchar(255)
, AdmitDateTime datetime
, DateRank int
, LagDateDiff int
, RunningTotal int
)
INSERT INTO #tmp2
SELECT tmp1.EmrNumber
, tmp1.AdmitDateTime
, tmp1.DateRank
--, LAG(tmp1.AdmitDateTime) OVER(PARTITION BY tmp1.EmrNumber ORDER BY tmp1.DateRank) as NextAdmitDate
, -DATEDIFF(DAY, tmp1.AdmitDateTime, LAG(tmp1.AdmitDateTime) OVER(PARTITION BY tmp1.EmrNumber ORDER BY tmp1.DateRank)) LagDateDiff
, IIF((SELECT SUM(sumt.total)
FROM (
SELECT -DATEDIFF(DAY, tmpsum.AdmitDateTime, LAG(tmpsum.AdmitDateTime) OVER(PARTITION BY tmpsum.EmrNumber ORDER BY tmpsum.DateRank)) total
FROM #tmp tmpsum
WHERE tmp1.EmrNumber = tmpsum.EmrNumber
AND tmpsum.AdmitDateTime <= tmp1.AdmitDateTime
) sumt) IS NULL, 0, (SELECT SUM(sumt.total)
FROM (
SELECT -DATEDIFF(DAY, tmpsum.AdmitDateTime, LAG(tmpsum.AdmitDateTime) OVER(PARTITION BY tmpsum.EmrNumber ORDER BY tmpsum.DateRank)) total
FROM #tmp tmpsum
WHERE tmp1.EmrNumber = tmpsum.EmrNumber
AND tmpsum.AdmitDateTime <= tmp1.AdmitDateTime
) sumt) ) as RunningTotal
FROM #tmp tmp1
SELECT *
, CASE WHEN LagDateDiff >90 THEN 0
WHEN RunningTotal = 0 THEN 0
ELSE LAG(LagDateDiff) OVER(PARTITION BY EmrNumber ORDER BY DateRank) + RunningTotal END AS RollingTotal
FROM #tmp2
You need a recursive query for this, because the running total has to be checked iteratively, row after row:
with cte as (
select
Account,
Date,
DateRank,
0 RollingTotal
from #tmp4
where DateRank = 1
union all
select
t.Account,
t.Date,
t.DateRank,
case when RollingTotal + datediff(day, c.Date, t.Date) > 90
then 0
else RollingTotal + datediff(day, c.Date, t.Date)
end
from cte c
inner join #tmp4 t on t.DateRank = c.DateRank + 1
)
select * from cte
The anchor of the cte selects the first record (as indicated by DateRank. Then, the recursive part processes rows one by one, and resets the running count when it crosses 90.

Fill up date gap by month

I have table of products and their sales quantity in months.
Product Month Qty
A 2018-01-01 5
A 2018-02-01 3
A 2018-05-01 5
B 2018-08-01 10
B 2018-10-01 12
...
I'd like to first fill in the data gap between each product's min and max dates like below:
Product Month Qty
A 2018-01-01 5
A 2018-02-01 3
A 2018-03-01 0
A 2018-04-01 0
A 2018-05-01 5
B 2018-08-01 10
B 2018-09-01 0
B 2018-10-01 12
...
Then I would need to perform an accumulation of each product's sales quantity by month.
Product Month total_Qty
A 2018-01-01 5
A 2018-02-01 8
A 2018-03-01 8
A 2018-04-01 8
A 2018-05-01 13
B 2018-08-01 10
B 2018-09-01 10
B 2018-10-01 22
...
I fumbled over the "cross join" clause, however it seems to generate some unexpected results for me. Could someone help to give a hint how I can achieve this in SQL?
Thanks a lot in advance.
I think a recursive CTE is a simple way to do this. The code is just:
with cte as (
select product, min(mon) as mon, max(mon) as end_mon
from t
group by product
union all
select product, dateadd(month, 1, mon), end_mon
from cte
where mon < end_mon
)
select cte.product, cte.mon, coalesce(qty, 0) as qty
from cte left join
t
on t.product = cte.product and t.mon = cte.mon;
Here is a db<>fiddle.
Hi i think this example can help you and perform what you excepted :
CREATE TABLE #MyTable
(Product varchar(10),
ProductMonth DATETIME,
Qty int
);
GO
CREATE TABLE #MyTableTempDate
(
FullMonth DATETIME
);
GO
INSERT INTO #MyTable
SELECT 'A', '2019-01-01', 214
UNION
SELECT 'A', '2019-02-01', 4
UNION
SELECT 'A', '2019-03-01', 50
UNION
SELECT 'B', '2019-01-01', 214
UNION
SELECT 'B', '2019-02-01', 10
UNION
SELECT 'C', '2019-04-01', 150
INSERT INTO #MyTableTempDate
SELECT '2019-01-01'
UNION
SELECT '2019-02-01'
UNION
SELECT '2019-03-01'
UNION
SELECT '2019-04-01'
UNION
SELECT '2019-05-01'
UNION
SELECT '2019-06-01'
UNION
SELECT '2019-07-01';
------------- FOR NEWER SQL SERVER VERSION > 2005
WITH MyCTE AS
(
SELECT T.Product, T.ProductMonth AS 'MMonth', T.Qty
FROM #MyTable T
UNION
SELECT T.Product, TD.FullMonth AS 'MMonth', 0 AS 'Qty'
FROM #MyTable T, #MyTableTempDate TD
WHERE NOT EXISTS (SELECT 1 FROM #MyTable TT WHERE TT.Product = T.Product AND TD.FullMonth = TT.ProductMonth)
)
-- SELECT * FROM MyCTE;
SELECT Product, MMonth, Qty, SUM( Qty) OVER(PARTITION BY Product ORDER BY Product
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as 'TotalQty'
FROM MyCTE
ORDER BY Product, MMonth ASC;
DROP TABLE #MyTable
DROP TABLE #MyTableTempDate
I have other way to perform this in lower SQL Server Version (like 2005 and lower)
It's a SELECT on SELECT if it's your case let me know and i provide some other example.
You can create the months with a recursive CTE
DECLARE #MyTable TABLE
(
ProductID CHAR(1),
Date DATE,
Amount INT
)
INSERT INTO #MyTable
VALUES
('A','2018-01-01', 5),
('A','2018-02-01', 3),
('A','2018-05-01', 5),
('B','2018-08-01', 10),
('B','2018-10-01', 12)
DECLARE #StartDate DATE
DECLARE #EndDate DATE
SELECT #StartDate = MIN(Date), #EndDate = MAX(Date) FROM #MyTable
;WITH dates AS (
SELECT #StartDate AS Date
UNION ALL
SELECT DATEADD(Month, 1, Date)
FROM dates
WHERE Date < #EndDate
)
SELECT A.ProductID, d.Date, COALESCE(Amount,0) AS Amount, COALESCE(SUM(Amount) OVER(PARTITION BY A.ProductID ORDER BY A.ProductID, d.Date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0) AS Total
FROM
(
SELECT ProductID, MIN(date) as DateStart, MAX(date) as DateEnd
FROM #MyTable
GROUP BY ProductID -- As I read in your comments that you need different min and max dates per product
) A
JOIN dates d ON d.Date >= A.DateStart AND d.Date <= A.DateEnd
LEFT JOIN #MyTable T ON A.ProductID = T.ProductID AND T.Date = d.Date
ORDER BY A.ProductID, d.Date
Try this below
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
;WITH CTE(Product,[Month],Qty)
AS
(
SELECT 'A','2018-01-01', 5 UNION ALL
SELECT 'A','2018-02-01', 3 UNION ALL
SELECT 'A','2018-05-01', 5 UNION ALL
SELECT 'B','2018-08-01', 10 UNION ALL
SELECT 'D','2018-10-01', 12
)
SELECT ct.Product,[MonthDays],ct.Qty
INTO #Temp
FROM
(
SELECT c.Product,[Month],
ISNULL(Qty,0) AS Qty
FROM CTE c
)ct
RIGHT JOIN
(
SELECT -- This code is to get month data
CONVERT(VARCHAR(10),'2018-'+ RIGHT('00'+CAST(MONTH(DATEADD(MM, s.number, CONVERT(DATETIME, 0)))AS VARCHAR),2) +'-01',120) AS [MonthDays]
FROM master.dbo.spt_values s
WHERE [type] = 'P' AND s.number BETWEEN 0 AND 11
)DT
ON dt.[MonthDays] = ct.[Month]
SELECT
MAX(Product)OVER(ORDER BY [MonthDays])AS Product,
[MonthDays],
ISNULL(Qty,0) Qty,
SUM(ISNULL(Qty,0))OVER(ORDER BY [MonthDays]) As SumQty
FROM #Temp
Result
Product MonthDays Qty SumQty
------------------------------
A 2018-01-01 5 5
A 2018-02-01 3 8
A 2018-03-01 0 8
A 2018-04-01 0 8
A 2018-05-01 5 13
A 2018-06-01 0 13
A 2018-07-01 0 13
B 2018-08-01 10 23
B 2018-09-01 0 23
D 2018-10-01 12 35
D 2018-11-01 0 35
D 2018-12-01 0 35
First of all, i would divide month and year to get easier with statistics.
I will give you an example query, not based on your table but still helpful.
--here i create the table that will be used as calendar
Create Table MA_MonthYears (
Month int not null ,
year int not null
PRIMARY KEY ( month, year) )
--/////////////////
-- here i'm creating a procedure to fill the ma_monthyears table
declare #month as int
declare #year as int
set #month = 1
set #year = 2015
while ( #year != 2099 )
begin
insert into MA_MonthYears(Month, year)
select #month, #year
if #month < 12
set #month=#month+1
else
set #month=1
if #month = 1
set #year = #year + 1
end
--/////////////////
--here you are the possible result you are looking for
select SUM(Ma_saledocdetail.taxableamount) as Sold, MA_MonthYears.month , MA_MonthYears.year , item
from MA_MonthYears left outer join MA_SaleDocDetail on year(MA_SaleDocDetail.DocumentDate) = MA_MonthYears.year
and Month(ma_saledocdetail.documentdate) = MA_MonthYears.Month
group by MA_SaleDocDetail.Item, MA_MonthYears.year , MA_MonthYears.month
order by MA_MonthYears.year , MA_MonthYears.month

Order By Month Name in Sql-Server

I have Below Table named session
SessionID SessionName
100 August
101 September
102 October
103 November
104 December
105 January
106 May
107 June
108 July
I executed the following query I got the output as below.
Select SessionID, SessionName
From dbo.Session
SessionID SessionName
100 August
101 September
102 October
103 November
104 December
105 January
106 May
107 June
108 July
the results get ordered by Session ID. But I need the output as below,
SessionID SessionName
106 May
107 June
108 July
100 August
101 September
102 October
103 November
104 December
105 January
How to achieve this in sql-server? thanks for the help
I'd use a case expression, like:
order by case SessionName when 'August' then 1
when 'September' then 2
...
when 'Juty' then 12
end
August has 1 because "in the application logic a session started with august", easy to renumber if you want to start with January and end with December.
To avoid any hassel with culture dependencies, you might get the month's index out of sys languages with a query like this:
(I'd eventually create a TVF from this and pass in the langid as parameter)
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) MyMonthIndex
,Mnth.value('.','varchar(100)') AS MyMonthName
FROM
(
SELECT CAST('<x>' + REPLACE(months,',','</x><x>') + '</x>' AS XML) AS XmlData
FROM sys.syslanguages
WHERE langid=0
) AS DataSource
CROSS APPLY DataSource.XmlData.nodes('/x') AS The(Mnth)
The result
1 January
2 February
3 March
4 April
5 May
6 June
7 July
8 August
9 September
10 October
11 November
12 December
EDIT: An UDF for direct usage (e.g. in an order by)
CREATE FUNCTION dbo.GetMonthIndexFromMonthName(#MonthName VARCHAR(100),#langId INT)
RETURNS INT
AS
BEGIN
RETURN
(
SELECT MyMonthIndex
FROM
(
SELECT CAST(ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS INT) MyMonthIndex
,Mnth.value('.','varchar(100)') AS MyMonthName
FROM
(
SELECT CAST('<x>' + REPLACE(months,',','</x><x>') + '</x>' AS XML) AS XmlData
FROM sys.syslanguages
WHERE langid=#langId
) AS DataSource
CROSS APPLY DataSource.XmlData.nodes('/x') AS The(Mnth)
) AS tbl
WHERE MyMonthName=#MonthName
);
END
GO
SELECT dbo.GetMonthIndexFromMonthName('February',0)
I think you can do something like this:
SELECT * FROM session
ORDER BY MONTH(session.SessionName + ' 1 2014')
The part:
MONTH(session.SessionName + ' 1 2014')
Will return 3 of the month is march and you do not really need to care about the year in this case
Try CAST your Month Name (SessionName) into DATETIME format, just like this
SELECT * FROM table
ORDER BY DATEPART(mm, CAST(SessionName + '1900' AS DATETIME)) asc
You can convert the string month into number and sort by numer form month
SELECT SessionID, SessionName, DATEPART(MM, SessionName)
FROM dbo.Session
ORDER BY DATEPART(MM, SessionName)
otherwise, if you have something like "DateCreation" column, you can do something like this
SELECT SessionID, SessionName, MONTH(DateCreation)
FROM dbo.Session
ORDER BY MONTH(DateCreation)
Try with this .. I am assuming that you want next month of current Date is starting point so I am using "Month(GetDate()" in script otherwise you can hard-code to 4 to get desired result
Declare #Session Table (SessionID INT, SessionName Varchar(20))
Insert Into #Session Values (100,'August'),(101,'September'),(102,'October'),(103,'November'),(104,'December')
,(105,'January'),(106,'May'),(107,'June'),(108,'July')
Select * From #Session
ORDER BY CASE WHEN Month(CAST('01-' + SessionName + ' 2016' AS DateTime)) - Month(GetDate()) <= 0
THEN 12 + Month(CAST('01-' + SessionName + ' 2016' AS DateTime)) - Month(GetDate())
ELSE Month(CAST('01-' + SessionName + ' 2016' AS DateTime)) - Month(GetDate()) END
For GERMAN Language ..use below script .. In this i used soundex for month comparision
SET LANGUAGE GERMAN;
Declare #Session Table (SessionID INT, SessionName Varchar(20))
Insert Into #Session Values (100,'August'),(101,'September'),(102,'October'),(103,'November'),(104,'December')
,(105,'January'),(106,'May'),(107,'June'),(108,'July')
Select * From #Session S
INNER JOIN
(SELECT Number + 1 as [MonthNumber],
DateName(mm,DATEADD(mm,Number,0)) as [MonthName]
FROM master..spt_values
WHERE Type = 'P' and Number < 12
)M ON SoundEx(M.MonthName)=SoundEx(S.SessionName)
ORDER BY CASE WHEN [MonthNumber] - Month(GetDate()) <= 0
THEN 12 + [MonthNumber] - Month(GetDate())
ELSE [MonthNumber] - Month(GetDate()) END

Latest value for each timeperiod, person and category

I have tried browsing the problems & answers in this forum, but neither of them fit's my case sufficiently.
I have some people reporting in their status for 2 categories, which looks like this:
TimeStamp | PersonID | Category | Value
2015-07-02 01:25:00 | 2303 | CatA | 8.2
2015-07-02 01:25:00 | 2303 | CatB | 10.1
2015-07-02 03:35:00 | 2303 | CatA | 8.0
2015-07-02 03:35:00 | 2303 | CatB | 9.9
2015-07-02 02:30:00 | 4307 | CatA | 8.7
2015-07-02 02:30:00 | 4307 | CatB | 12.7
.
.
.
2015-07-31 22:15:00 | 9011 | CatA | 7.9
2015-07-31 22:15:00 | 9011 | CatB | 8.9
Some people report status several times per hour, but others only a couple of times per day.
I need to produce an an output, which shows latest know status for each day, for each hour of the day, for each person and category. This should look like this:
Date |Hour| Person | Category | Value
2015-07-02 | 1 | 2307 | CatA | Null
2015-07-02 | 1 | 2307 | CatB | Null
2015-07-02 | 2 | 2307 | CatA | 8.2
2015-07-02 | 2 | 2307 | CatB | 10.2
2015-07-02 | 3 | 2307 | CatA | 8.2
2015-07-02 | 3 | 2307 | CatB | 10.2
2015-07-02 | 4 | 2307 | CatA | 8.0
2015-07-02 | 4 | 2307 | CatB | 9.9
.
.
.
2015-07-31 | 23 | 9011 | CatA | 7.9
2015-07-31 | 23 | 9011 | CatB | 8.9
The first row(s) for each person and category will probably be null as there will be no known values as this is "beginning of time"
I have tried using a sub query like this:
SELECT Date
,hour
,Person
,Category
,(SELECT TOP 1 status FROM readings WHERE (readings.Date<=structure.Date) AND readings.Hour<=structure.hour)....and so forth.... order by TimeStamp DESC
FROM structure
This works - except in terms of performance because I need to do this for a month, for 2.000 persons for 2 categories and that means that the sub query must run (30*24*2000*2=2,880,000) times, and given the fact that table containing the readings also contains hundreds of thousands of readings, this don't work.
I have also tried messing round with row_number(), but have not succeed in this.
Any suggestions?
Edit (19-10-2015 15:34): In my query example above I am referring to a "structure" table. This is actually just (for the time being) a view, with the following SQL:
SELECT Calendar.CalendarDay, Hours.Hour, Persons.Person, Categories.Category
FROM Calendar CROSS JOIN Hours CROSS JOIN Persons CROSS JOIN Categories
This in order to produce a table containing a row for each day, for each hour for each person and each category. This table then contains (30*24*2000*2=2,880,000) rows.
For each of these rows, I need to locate the latest status from the readings table. So for each Day, for each hour, for each person and each category I need to read the latest available status from the readings table.
Let me guess.
Based on the task "to produce an output, which shows latest know status for each day, for each hour of the day, for each person and category" you need to take three steps:
(1) Find latest records for every hour;
(2) Get a table of all date and hours to show;
(3) Multiply that date-hours-table by persons and categories and left join the result with latest-records-for-every-hour.
-- Test data
declare #t table ([Timestamp] datetime2(0), PersonId int, Category varchar(4), Value decimal(3,1));
insert into #t values
('2015-07-02 01:25:00', 2303, 'CatA', 8.2 ),
('2015-07-02 01:45:00', 2303, 'CatA', 9.9 ),
('2015-07-02 01:25:00', 2303, 'CatB', 10.1 ),
('2015-07-02 03:35:00', 2303, 'CatA', 8.0 ),
('2015-07-02 03:35:00', 2303, 'CatB', 9.9 ),
('2015-07-02 02:30:00', 4307, 'CatA', 8.7 ),
('2015-07-02 02:30:00', 4307, 'CatB', 12.7 );
-- Latest records for every hour
declare #Latest table (
[Date] date,
[Hour] tinyint,
PersonId int,
Category varchar(4),
Value decimal(3,1)
primary key ([Date], [Hour], PersonId, Category)
);
insert into #Latest
select top 1 with ties
[Date] = cast([Timestamp] as date),
[Hour] = datepart(hour, [Timestamp]),
PersonId ,
Category ,
Value
from
#t
order by
row_number() over(partition by cast([Timestamp] as date), datepart(hour, [Timestamp]), PersonId, Category order by [Timestamp] desc);
-- Date-hours table
declare #FromDateTime datetime2(0);
declare #ToDateTime datetime2(0);
select #FromDateTime = min([Timestamp]), #ToDateTime = max([Timestamp]) from #t;
declare #DateDiff int = datediff(day, #FromDateTime, #ToDateTime);
declare #FromDate date = cast(#FromDateTime as date);
declare #FromHour int = datepart(hour, #FromDateTime);
declare #ToHour int = datepart(hour, #ToDateTime);
declare #DayHours table ([Date] date, [Hour] tinyint, primary key clustered ([Date], [Hour]) );
with N as
(
select n from (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) t(n)
),
D as (
select
row_number() over(order by (select 1))-1 as d
from
N n1, N n2, N n3
),
H as (
select top 24
row_number() over(order by (select 1)) - 1 as h
from
N n1, N n2
)
insert into #DayHours
select dateadd(day, d, #FromDate), h
from
D, h
where
#FromHour <= (d * 100 + h)
and (d * 100 + h) <= (#DateDiff * 100 + #ToHour);
-- #PersonsIds & #Categories tables (just an imitation of the real tables)
declare #PersonsIds table (Id int primary key);
declare #Categories table (Category varchar(4) primary key);
insert into #PersonsIds select distinct PersonId from #t;
insert into #Categories select distinct Category from #t;
-- The result
select
dh.[Date],
dh.[Hour],
PersonId = p.Id,
c.Category,
l.Value
from
#PersonsIds p cross join #Categories c cross join #DayHours dh
left join #Latest l on l.[Date] = dh.[Date] and l.[Hour] = dh.[Hour] and l.PersonId = p.Id and l.Category = c.Category
order by
[Date], [Hour], PersonId, Category;
Edit (1):
OK.
In order to bring over the previous values to empty spaces,
let's replace the last select statement with this one:
select top 1 with ties
dh.[Date],
dh.[Hour],
PersonId = p.Id,
c.Category,
l.Value
from
#PersonsIds p cross join #Categories c cross join #DayHours dh
left join #Latest l
on (l.[Date] = dh.[Date] and l.[Hour] <= dh.[Hour] or l.[Date] < dh.[Date])
and l.PersonId = p.Id and l.Category = c.Category
order by
row_number()
over (partition by dh.[Date], dh.[Hour], p.Id, c.Category
order by l.[Date] desc, l.[Hour] desc);
Edit (2):
Let's try to collect the Cartesian product in temporary table with clustered index: PersonId, Category, [Date], [Hour].
And then update the table dragging non-changed values:
declare #Result table (
[Date] date,
[Hour] tinyint,
PersonId int,
Category varchar(4),
Value decimal(3,1)
primary key (PersonId, Category, [Date], [Hour]) -- Important !!!
)
insert into #Result
select
dh.[Date],
dh.[Hour],
PersonId = p.Id,
c.Category,
l.Value
from
#PersonsIds p cross join #Categories c cross join #DayHours dh
left join #Latest l on l.[Date] = dh.[Date] and l.[Hour] = dh.[Hour] and l.PersonId = p.Id and l.Category = c.Category
order by
[Date], [Hour], PersonId, Category;
declare #PersonId int;
declare #Category varchar(4);
declare #Value decimal(3,1);
update #Result set
#Value = Value = isnull(Value, case when #PersonId = PersonId and #Category = Category then #Value end),
#PersonId = PersonId,
#Category = Category;
For yet better performance consider changing table variables with temporary tables and applying indexes in accordance with query plan recommendations.
If i got it correctly ..it should give you desired result.
select st.Date,
case when hour =1 then NULL
else hour
end as hour
,st.Person,st.Category,
(select status from reading qualify row_number() over (partition by personid
order by status desc)=1)
from structure;
You can achieve this in SQL, but it will be quite slow, because for every person, category, day and hour you will have to look for the latest entry for the person and category until then. Just think of the process: Pick a record in your big table, find all statuses until then, order them and find the latest thus and pick its value. And this will be done for every record in your big table.
You might be better of to simply retrieve all data with a program written in a programming language and collect the data with a control-break algorithm.
However, let's see how it's done in SQL.
One problem is SQL Server's poor date/time functions. We want to compare date plus hour, which would be easiest with strings in 'yyyymmddhh' format, e.g. '2015101923' < '2015102001'. In your big table you have date and hour and in your status table you have datetimes. Let's see how we can get the desired strings:
convert(varchar(8), bigtable.calendarday, 112) +
right('0' + convert(varchar(2), bigtable.hour), 2)
and
convert(varchar(8), status.timestamp, 112) +
right('0' + convert(varchar(2), datepart(hour, status.timestamp)), 2)
As this is - along with person and category - our key criterion to find records, you may want to have it as computed columns and add indexes (person + category + dayhourkey) in both tables.
You'd select from your big table and get the status value in a subquery. In order to get the latest matching record, you'd order by timestamp and limit to 1 record.
select
personid,
calendarday,
hour,
category,
(
select value
from status s
where s.personid = b.personid
and s.category = b.category
and convert(varchar(8), s.timestamp, 112) + right('0' + convert(varchar(2), datepart(hour, s.timestamp)), 2) <=
convert(varchar(8), b.calendarday, 112) + right('0' + convert(varchar(2), b.hour), 2)
order by s.timestamp desc limit 1
) as value
from bigtable b;

SQL query to calculate days worked per Month

Im stuck on a SQL query. Im using SQL Server.
Given a table that contains Jobs with a start and end date. These jobs can span days or months. I need to get the total combined number of days worked each month for all jobs that intersected those months.
Jobs
-----------------------------------
JobId | Start | End | DayRate |
-----------------------------------
1 | 1.1.13 | 2.2.13 | 2500 |
2 | 5.1.13 | 5.2.13 | 2000 |
3 | 3.3.13 | 2.4.13 | 3000 |
The results i need are:
Month | Days
--------------
Jan | 57
Feb | 7
Mar | 28
Apr | 2
Any idea how i would right such a query ?
I would also like to work out the SUM for each month based on multiplying the dayrate by number of days worked for each job, how would i add this to the results ?
Thanks
You can use recursive CTE to extract all days from start to end for each JobID and then just group by month (and year I guess).
;WITH CTE_TotalDays AS
(
SELECT [Start] AS DT, JobID FROM dbo.Jobs
UNION ALL
SELECT DATEADD(DD,1,c.DT), c.JobID FROM CTE_TotalDays c
WHERE c.DT < (SELECT [End] FROM Jobs j2 WHERE j2.JobId = c.JobID)
)
SELECT
MONTH(DT) AS [Month]
,YEAR(DT) AS [Year]
,COUNT(*) AS [Days]
FROM CTE_TotalDays
GROUP BY MONTH(DT),YEAR(DT)
OPTION (MAXRECURSION 0)
SQLFiddle DEMO
PS: There are 58 days in Jan in your example and not 57 ;)
You can do it using following approach:
/* Your table with periods */
declare #table table(JobId int, Start date, [End] date, DayRate money)
INSERT INTO #table (JobId , Start, [End], DayRate)
VALUES
(1, '20130101','20130202', 2500),
(2,'20130105','20130205', 2000),
(3,'20130303','20130402' , 3000 )
/* create table where stored all possible dates
if this code are supposed to be executed often you can create
table with dates ones to avoid overhead of filling it */
declare #dates table(d date)
declare #d date='20000101'
WHILE #d<'20500101'
BEGIN
INSERT INTO #dates (d) VALUES (#d)
SET #d=DATEADD(DAY,1,#d)
END;
/* and at last get desired output */
SELECT YEAR(d.d) [YEAR], DATENAME(month,d.d) [MONTH], COUNT(*) [Days]
FROM #dates d
CROSS JOIN #table t
WHERE d.d BETWEEN t.Start AND t.[End]
GROUP BY YEAR(d.d), DATENAME(month,d.d)
This only have 1 recursive call instead of 1 for each row. I imagine this will perform better than the chosen answer when you have large amount of data.
declare #t table(JobId int, Start date, [End] date, DayRate int)
insert #t values
(1,'2013-01-01','2013-02-02', 2500),(2,'2013-01-05','2013-02-05', 2000),(3,'2013-03-03', '2013-04-02',3000)
;WITH a AS
(
SELECT min(Start) s, max([End]) e
FROM #t
), b AS
(
SELECT s, e from a
UNION ALL
SELECT dateadd(day, 1, s), e
FROM b WHERE s <> e
)
SELECT
MONTH(b.s) AS [Month]
,YEAR(b.s) AS [Year]
,COUNT(*) AS [Days]
,SUM(DayRate) MonthDayRate
FROM b
join #t t
on b.s between t.Start and t.[End]
GROUP BY MONTH(b.s),YEAR(b.s)
OPTION (MAXRECURSION 0)
Result:
Month Year Days MonthDayRate
1 2013 58 131500
2 2013 7 15000
3 2013 29 87000
4 2013 2 6000