Modify query sql - sql-server-express

I have a query to aggregate (compress) data from 1 min to any other time frame, and it works perfectly.
Use StockDataFromSella;
DECLARE #D1 DateTime
DECLARE #D2 DateTime
DECLARE #Interval FLOAT
SET #D1 = '2008-09-21T09:00:00.000'
SET #D2 = '2010-10-20T17:30:00.000'
SET #Interval = 15
;WITH
L0 AS (SELECT 1 AS c UNION ALL SELECT 1),
L1 AS (SELECT 1 AS c FROM L0 A CROSS JOIN L0 B),
L2 AS (SELECT 1 AS c FROM L1 A CROSS JOIN L1 B),
L3 AS (SELECT 1 AS c FROM L2 A CROSS JOIN L2 B),
L4 AS (SELECT 1 AS c FROM L3 A CROSS JOIN L3 B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS i FROM L4),
Ranges AS(
SELECT
DATEADD(MINUTE,#Interval*(i-1),#D1) AS StartRange,
DATEADD(MINUTE,#Interval*i,#D1) AS NextRange
FROM Nums where i <= 1+CEILING(DATEDIFF(MINUTE,#D1,#D2)/#Interval))
,cte AS (
SELECT
*
,ROW_NUMBER() OVER (PARTITION BY Simbolo,r.StartRange ORDER BY [DataOra]) AS RN_ASC
,ROW_NUMBER() OVER (PARTITION BY Simbolo,r.StartRange ORDER BY [DataOra] DESC) AS RN_DESC
FROM Ranges r
JOIN dbo.tbl1MinENI p ON p.[DataOra] >= r.StartRange and p.[DataOra] < r.NextRange )
SELECT
Simbolo,
MAX(CASE WHEN RN_ASC=1 THEN [DataOra] END) AS DataOra,
MAX(CASE WHEN RN_ASC=1 THEN [Apertura] END) AS [Apertura],
MAX(Massimo) Massimo,
MIN(Minimo) Minimo,
MAX(CASE WHEN RN_DESC=1 THEN [Chiusura] END) AS [Chiusura],
SUM(Volume) Volume
/*MAX(CASE WHEN RN_DESC=1 THEN [DataOra] END) AS ChiusuraDataOra*/
FROM cte
GROUP BY Simbolo,StartRange
ORDER BY DataOra
I would like to split second column DataOra(DateTime) into two diferent columns, one for Date (if possibile in dd/mm/yyyy format) and the second for Time only.
Any help very appreciated, thanks.
Alberto

The easiest way to get these values would be to use CONVERT. SQL Server has some built-in date formatting when you convert a Date.
CONVERT(VARCHAR, MAX(CASE WHEN RN_ASC=1 THEN [DataOra] END), 103) AS DataOraDate,
CONVERT(VARCHAR, MAX(CASE WHEN RN_ASC=1 THEN [DataOra] END), 114) AS DataOraTime,
The two codes (103 and 114) correspond to the formats that you want. See http://msdn.microsoft.com/en-us/library/ms187928.aspx for more details about those and the other formats you can convert to.

As it seems you just only need to change the select.
Instead of this
MAX(CASE WHEN RN_ASC=1 THEN [DataOra] END) AS DataOra,
Use this:
CONVERT(VARCHAR(8),MAX(CASE WHEN RN_ASC=1 THEN [DataOra] END),108) AS HourMinuteSecond,
CONVERT(VARCHAR(8),MAX(CASE WHEN RN_ASC=1 THEN [DataOra] END),101) AS DateOnly,
Hope it helps.

Related

SQL with while loop to DAX conversion

Trying to convert the SQL with while loop code into DAX. Trying to build this query without using temp tables as access is an issue on the database and only have views to work with. I believe best option for me is to code it in DAX. Could someone help with it.
DECLARE #sd DATETIME
DECLARE #ed DATETIME
SELECT #sd = CONVERT(DATETIME, '2021-01-31')
SELECT #ed = GETDATE()
DECLARE #date DATETIME = EOMONTH(#sd)
WHILE ( (#date) <= #ed )
BEGIN
SELECT MONTH(#date) as Month, YEAR(#date) as Year, DAY(#date) as Day, A.*
FROM [people] A
WHERE A.effective_date = (SELECT MAX(B.effective_date)
FROM [people] B
WHERE B.employee_id = A.employee_id
AND B.record_id = A.record_id
AND B.effective_date <= #date)
AND A.effective_sequence = (SELECT MAX(C.effective_sequence)
FROM [people] C
WHERE C.employee_id = A.employee_id
AND C.record_id = A.record_id
AND C.effective_date = A.effective_date)
ORDER BY A.employee_id;
SET #date = EOMONTH(DATEADD(MONTH,1,#date))
END
While you could do this as a view, you would either have to hard-code the start and end dates, or filter them afterwards (which is likely to be inefficient). Instead you can do this as an inline Table Valued Function.
We can use a virtual tally-table (generated with a couple cross-joins) to generate a row for each month
We can use row-numbering instead of the two subqueries
CREATE FUNCTION dbo.GetData (#sd DATETIME, #ed DATETIME)
RETURNS TABLE AS RETURN
WITH L0 AS (
SELECT *
FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) v(n)
),
L1 AS (
SELECT 1 n FROM L0 a CROSS JOIN L0 b
)
SELECT
MONTH(m.Month) as Month,
YEAR(m.Month) as Year,
DAY(m.Month) as Day,
p.* -- specify columns
FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY m.Month, p.employee_id, p.record_id ORDER BY p.effective_date, p.effective_sequence) AS rn
FROM [people] p
CROSS JOIN (
SELECT TOP (DATEDIFF(month, #sd, #ed) + 1)
DATEADD(month, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1, EOMONTH(#sd)) AS Month
FROM L1
) m
WHERE p.effective_date <= m.Month
) p
WHERE p.rn = 1
;
Then in PowerBI you can just do for example
SELECT *
FROM dbo.GetData ('2021-01-31', GETDATE()) d
ORDER BY
d.employee_id
Note that you cannot put the ORDER BY within the function, it doesn't work.

SQL - find row with closest date but different column value

i'm new to SQL and i would need an help.
I have a TAB and I need to find for any item B in the TAB the item A with the closest date. In this case the A with 02.09.2021 04:25:30
Date.
Item
07.09.2021 05:02:05
A
06.09.2021 05:01:02
A
05.09.2021 05:00:02
A
04.09.2021 04:59:01
A
03.09.2021 04:58:03
A
02.09.2021 04:56:55
A
02.09.2021 04:33:56
B
02.09.2021 04:25:30
A
WITH CTE(DATE,ITEM)AS
(
SELECT '20210907 05:02:05' , 'A'UNION ALL
SELECT '20210906 05:01:02' , 'A'UNION ALL
SELECT '20210905 05:00:02' , 'A'UNION ALL
SELECT'20210904 04:59:01' , 'A'UNION ALL
SELECT'20210903 04:58:03' , 'A'UNION ALL
SELECT'20210902 04:56:55' , 'A'UNION ALL
SELECT'20210902 04:33:56' , 'B'UNION ALL
SELECT'20210902 04:25:30' , 'A'
)
SELECT
CAST(C.DATE AS DATETIME)X_DATE,C.ITEM,Q.CLOSEST
FROM CTE AS C
OUTER APPLY
(
SELECT TOP 1 CAST(X.DATE AS DATETIME)CLOSEST
FROM CTE AS X
WHERE X.ITEM='A'AND CAST(X.DATE AS DATETIME)<CAST(C.DATE AS DATETIME)
ORDER BY CAST(X.DATE AS DATETIME) ASC
)Q
WHERE C.ITEM='B'
You can use OUTER APPLY-approach as in the above query.
Please also take a look that datetime-column (DATE)is written in the ISO-compliant form
Your data has only two columns. If you want the only the closest A timestamp, then the fastest way is probably window functions:
select t.*,
(case when prev_a_date is null then next_a_date
when next_a_date is null then prev_a_date
when datediff(second, prev_a_date, date) <= datediff(second, date, next_a_date) then prev_a_date
else next_a_date
end) as a_date
from (select t.*,
max(case when item = 'A' then date end) over (order by date) as prev_a_date,
min(case when item = 'A' then date end) over (order by date desc) as next_a_date
from t
) t
where item = 'B';
This uses seconds to measure the time difference, but you can use a smaller unit if appropriate.
You can also do this using apply if you have more columns from the "A" rows that you want:
select tb.*, ta.*
from t b outer apply
(select top (1) ta.*
from t ta
where item = 'A'
order by abs(datediff(second, a.date, b.date))
) t
where item = 'B';

Running totals with initial value then adding the totals as stated by the date

Imagine we have a table:
SELECT SUM(A) AS TOTALS,DATE,STUFF FROM TABLE WHERE DATE BETWEEN 'DATESTART' AND 'DATEEND'
GROUP BY DATE,STUFF
Normally this gets the totals as:
totals stuff date
23 x 01.01.1900
3 x 02.01.1900
44 x 06.01.1900
But what if we have the previous the data before the startdate,and i want to add those initial data to my startdate value; for example; from the begining of time i already have a sum value of x lets say 100
so i want my table to start from 123 and add the previous data such as:
123
126
126+44 and so on...
totals stuff date
123 x 01.01.1900
126 x 02.01.1900
170 x 06.01.1900
How can i achieve that?
Source data:
WITH Stocks
AS (
SELECT
Dep.Dept_No ,
SUM(DSL.Metre) AS Metre ,
CONVERT(VARCHAR(10), Date, 112) AS Date
FROM
DS (NOLOCK) DSL
JOIN TBL_Depts (NOLOCK) Dep ON Dep.Dept_No = DSL.Dept
WHERE
1 = 1 AND
DSL.Sil = 0 AND
DSL.Depo IN ( 5000, 5001, 5002, 5003, 5004, 5014, 5018, 5021, 5101, 5109, 5303 ) AND
Dep.Dept_No NOT IN ( 6002 ) AND
Dep.Dept_No IN ( 6000, 6001, 6003, 6004, 6005, 6011, 6024, 6030 ) AND
DSL.Date BETWEEN '2013-06-19' AND '2013-06-20'
GROUP BY
Dep.Dept_No ,
CONVERT(VARCHAR(10), Date, 112)
)
SELECT
Stocks.Metre ,
Dep.Dept AS Dept ,
Stocks.Date
FROM
Stocks
LEFT JOIN TBL_Depts (NOLOCK) Dep ON Stocks.Dept = Dep.Dept
ORDER BY
Stocks.Metre DESC
Any RDBMS with window and analytic functions (SQL Server 2012, PostgreSQL but not MySQL)
SELECT
SumA + SUM(SumARange) OVER (ORDER BY aDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TOTALS,
other, aDate
FROM
(
SELECT
SUM(a) AS SumARange,
other, aDate
FROM
SomeTable
WHERE
aDate BETWEEN '20130101' AND '20130106'
GROUP BY
other, aDate
) X
CROSS JOIN
(
SELECT
SUM(a) AS SumA
FROM
SomeTable
WHERE
aDate < '20130101'
) Y
ORDER BY
aDate;
or
SELECT
SUM(SumA) OVER () + SUM(SumARange) OVER (ORDER BY aDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TOTALS,
other, aDate
FROM
(
SELECT
SUM(CASE WHEN aDate < '20130101' THEN a ELSE 0 END) AS SumA,
SUM(CASE WHEN aDate BETWEEN '20130101' AND '20130106' THEN a ELSE 0 END) AS SumARange,
other, aDate
FROM
SomeTable
WHERE
aDate <= '20130106'
GROUP BY
other, aDate
) X
ORDER BY
aDate;
SQLFiddle example and another
Use option with APPLY operator to calculate the totals. You need also add additional CASE expression in the GROUP BY clause
;WITH cte AS
(
SELECT SUM(a) AS sumA, [stuff], MAX([Date]) AS [Date]
FROM SomeTable
WHERE [Date] <= '20130106'
GROUP BY [stuff], CASE WHEN [Date] <= '20130101' THEN 1 ELSE [Date] END
)
SELECT o.total, [stuff], [Date]
FROM cte c CROSS APPLY (
SELECT SUM(c2.sumA) AS total
FROM cte c2
WHERE c.[Date] >= c2.[Date]
) o
See example on SQLFiddle

How to merge time intervals in SQL Server

Suppose I have the following an event table with personId, startDate and endDate.
I want to know how much time the person X spent doing an event (the events can override each other).
If the person just has 1 event, its easy: datediff(dd, startDate, endDate)
If the person has 2 events it gets tricky.
I'll set some scenarios for the expected results.
Scenario 1
startDate endDate
1 4
3 5
This means he the results should be the datediff from 1 to 5
Scenario 2
startDate endDate
1 3
6 9
this means he the results should be the some of datediff(dd,1,3) and datediff(dd,6,9)
How can I get this result on an sql query? I can only think of a bunch of if statements, but the same person can have n events so the query will be really confusing.
Shredder Edit: I'd like to add a 3rd scenario:
startDate endDate
1 5
4 8
11 15
Desired result to Shredder scenario:
(1,5) and (4,8) merge in (1,8) since they overlap then we need to datediff(1,8) + datediff(11,15) => 7 + 4 => 11
You can use a recursive CTE to build a list of dates and then count the distinct dates.
declare #T table
(
startDate date,
endDate date
);
insert into #T values
('2011-01-01', '2011-01-05'),
('2011-01-04', '2011-01-08'),
('2011-01-11', '2011-01-15');
with C as
(
select startDate,
endDate
from #T
union all
select dateadd(day, 1, startDate),
endDate
from C
where dateadd(day, 1, startDate) < endDate
)
select count(distinct startDate) as DayCount
from C
option (MAXRECURSION 0)
Result:
DayCount
-----------
11
Or you can use a numbers table. Here I use master..spt_values:
declare #MinStartDate date
select #MinStartDate = min(startDate)
from #T
select count(distinct N.number)
from #T as T
inner join master..spt_values as N
on dateadd(day, N.Number, #MinStartDate) between T.startDate and dateadd(day, -1, T.endDate)
where N.type = 'P'
Here's a solution that uses the Tally table idea (which I first heard of in an article by Itzk Ben-Gan -- I still cut and paste his code whenver the subject comes up). The idea is to generate a list of ascending integers, join the source data by range against the numbers, and then count the number of distinct numbers, as follows. (This code uses syntax from SQL Server 2008, but with minor modifications would work in SQL 2005.)
First set up some testing data:
CREATE TABLE #EventTable
(
PersonId int not null
,startDate datetime not null
,endDate datetime not null
)
INSERT #EventTable
values (1, 'Jan 1, 2011', 'Jan 4, 2011')
,(1, 'Jan 3, 2011', 'Jan 5, 2011')
,(2, 'Jan 1, 2011', 'Jan 3, 2011')
,(2, 'Jan 6, 2011', 'Jan 9, 2011')
Determine some initial values
DECLARE
#Interval bigint
,#FirstDay datetime
,#PersonId int = 1 -- (or whatever)
Get the first day and the maximum possible number of dates (to keep the cte from generating extra values):
SELECT
#Interval = datediff(dd, min(startDate), max(endDate)) + 1
,#FirstDay = min(startDate)
from #EventTable
where PersonId = #PersonId
Cut and paste over the one routine and modify and test it to only return as many integers as we'll need:
/*
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
select Number from Tally where Number <= #Interval
*/
And now revise it by first joining to the intervals defined in each source row, and then count each distinct value found:
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
SELECT PersonId, count(distinct Number) EventDays
from #EventTable et
inner join Tally
on dateadd(dd, Tally.Number - 1, #FirstDay) between et.startDate and et.endDate
where et.PersonId = #PersonId
and Number <= #Interval
group by PersonId
Take out the #PersonId filter and you'd get it for all persons. And with minor modification you can do it for any time interval, not just days (which is why I set the Tally table to generate severely large numbers.)
The following SQL is for the three scenarios you've described
with sampleData
AS (
SELECT 1 personid,1 startDate,4 endDate
UNION SELECT 1,3,5
UNION SELECT 2,1,3
UNION SELECT 2,6,9
UNION SELECT 3,1,5
UNION SELECT 3,4,8
UNION SELECT 3,11, 15
),
cte
AS (SELECT personid,
startdate,
enddate,
Row_number() OVER(ORDER BY personid, startdate) AS rn
FROM sampledata),
overlaps
AS (SELECT a.personid,
a.startdate,
b.enddate,
a.rn id1,
b.rn id2
FROM cte a
INNER JOIN cte b
ON a.personid = b.personid
AND a.enddate > b.startdate
AND a.rn = b.rn - 1),
nooverlaps
AS (SELECT a.personid,
a.startdate,
a.enddate
FROM cte a
LEFT JOIN overlaps b
ON a.rn = b.id1
OR a.rn = b.id2
WHERE b.id1 IS NULL)
SELECT personid,
SUM(timespent) timespent
FROM (SELECT personid,
enddate - startdate timespent
FROM nooverlaps
UNION
SELECT personid,
enddate - startdate
FROM overlaps) t
GROUP BY personid
Produces this result
Personid timeSpent
----------- -----------
1 4
2 5
3 11
Notes: I used the simple integers but the DateDiffs should work too
Correctness issue There is a correctness issue if your data is allowed to have multiple overlaps as Cheran S noted, the results won't be correct and you should use one of the other answers instead. His example used [1,5],[4,8],[7,11] for the same person ID
Algebra. If B-n is the ending time of the nth event, and A-n is the starting time of the nth event, then the sum of the differences is the difference of the sums. So you can write
select everything else, sum(cast(endDate as int)) - sum(cast(startDate as int)) as daysSpent
If your dates have no time component, this works. Otherwise, you could use a real.
Try something like this
select
personId,
sum(DateDuration) as TotalDuration
from
(
select personId, datediff(dd, startDate, endDate) as DateDuration
from yourEventTable
) a
group by personId
;WITH cte(gap)
AS
(
SELECT sum(b-a) from xxx GROUP BY uid
)
SELECT * FROM cte
Edit 1: I have modified both solutions to get correct results.
Edit 2: I have done comparative tests using the solutions proposed by Mikael Eriksson, Conrad Frix, Philip Kelley and me. All tests use an EventTable with the following structure:
CREATE TABLE EventTable
(
EventID INT IDENTITY PRIMARY KEY
,PersonId INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,CONSTRAINT CK_StartDate_Before_EndDate CHECK(StartDate < EndDate)
);
Also, all tests use warm buffer (no DBCC DROPCLEANBUFFERS) and cold [plan] cache (I have executed DBCC FREEPROCCACHE before every test). Because some solutions use a filter(PersonId = 1) and others not, I have inserted into EventTable rows for only one person (INSERT ...(PersonId,...) VALUES (1,...)).
These are the results:
My solutions use recursive CTEs.
Solution 1:
WITH BaseCTE
AS
(
SELECT e.StartDate
,e.EndDate
,e.PersonId
,ROW_NUMBER() OVER(PARTITION BY e.PersonId ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e
), RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM BaseCTE b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN BaseCTE crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);
Solution 2:
DECLARE #Base TABLE --or a temporary table: CREATE TABLE #Base (...)
(
PersonID INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,RowNumber INT NOT NULL
,PRIMARY KEY(PersonID, RowNumber)
);
INSERT #Base (PersonID, StartDate, EndDate, RowNumber)
SELECT e.PersonId
,e.StartDate
,e.EndDate
,ROW_NUMBER() OVER(PARTITION BY e.PersonID ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e;
WITH RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM #Base b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN #Base crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);

Data Aggregation in SQL Server 2005

I need a query for SQl server 2005 (SQL server management studio express).
I have data stored as 1 minute time frame (1 minute each row), for each table columns are ID, Symbol, DateTime, Open, High, Low, Close, Volume.
I need to convert (compress) to every possibile multiple time frame, so let's say 10 minutes, 13, 15, and so on.
Provide full details if somebody could help.
Thanks
Alberto
Alberto, it looks like you need a "Group By" clause in SQL statements (as Leppie stated). So, you should better look for it.
First you should filter the rows that is subject for aggregation by using begin and end date/time and then group them by the mentioned clause.
Here is the first link when i search "sql group by" keywords via Google.
;WITH cte AS
(SELECT *,
(32 * CAST([DATETIME] AS INT)) + DATEPART(HOUR,[DATETIME]) + (DATEPART(MINUTE,[DATETIME])/15)/4.0 AS Seg
FROM prices
)
,cte1 AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY Symbol,Seg ORDER BY [DATETIME]) AS RN_ASC ,
ROW_NUMBER() OVER (PARTITION BY Symbol,Seg ORDER BY [DATETIME] DESC) AS RN_DESC
FROM cte
)
SELECT
Symbol,
Seg,
MAX(CASE WHEN RN_ASC=1 THEN [DATETIME] END) AS OpenDateTime,
MAX(CASE WHEN RN_ASC=1 THEN [OPEN] END) AS [OPEN],
MAX(High) High,
MIN(Low) Low,
SUM(Volume) Volume,
MAX(CASE WHEN RN_DESC=1 THEN [CLOSE] END) AS [CLOSE],
MAX(CASE WHEN RN_DESC=1 THEN [DATETIME] END) AS CloseDateTime
FROM cte1
GROUP BY Symbol,Seg
ORDER BY OpenDateTime
Or another approach that may be worth testing to see if it is any faster.
DECLARE #D1 DATETIME
DECLARE #D2 DATETIME
DECLARE #Interval FLOAT
SET #D1 = '2010-10-18 09:00:00.000'
SET #D2 = '2010-10-19 18:00:00.000'
SET #Interval = 15
;WITH
L0 AS (SELECT 1 AS c UNION ALL SELECT 1),
L1 AS (SELECT 1 AS c FROM L0 A CROSS JOIN L0 B),
L2 AS (SELECT 1 AS c FROM L1 A CROSS JOIN L1 B),
L3 AS (SELECT 1 AS c FROM L2 A CROSS JOIN L2 B),
L4 AS (SELECT 1 AS c FROM L3 A CROSS JOIN L3 B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS i FROM L4),
Ranges AS(
SELECT
DATEADD(MINUTE,#Interval*(i-1),#D1) AS StartRange,
DATEADD(MINUTE,#Interval*i,#D1) AS NextRange
FROM Nums where i <= 1+CEILING(DATEDIFF(MINUTE,#D1,#D2)/#Interval))
,cte AS (
SELECT
*
,ROW_NUMBER() OVER (PARTITION BY Symbol,r.StartRange ORDER BY [DateTime]) AS RN_ASC
,ROW_NUMBER() OVER (PARTITION BY Symbol,r.StartRange ORDER BY [DateTime] DESC) AS RN_DESC
FROM Ranges r
JOIN prices p ON p.[DateTime] >= r.StartRange and p.[DateTime] < r.NextRange )
SELECT
Symbol,
MAX(CASE WHEN RN_ASC=1 THEN [DateTime] END) AS OpenDateTime,
MAX(CASE WHEN RN_ASC=1 THEN [Open] END) AS [Open],
MAX(High) High,
MIN(Low) Low,
SUM(Volume) Volume,
MAX(CASE WHEN RN_DESC=1 THEN [Close] END) AS [Close],
MAX(CASE WHEN RN_DESC=1 THEN [DateTime] END) AS CloseDateTime
FROM cte
GROUP BY Symbol,StartRange
ORDER BY OpenDateTime
Not simple "Group By" - Open and Close values need taken for first and correspondingly last row in group. Or at least so is it for Forex data :)
Would be prettier with a stored proc to extract MIN(datetime) first, but here's a sketch:
WITH quarters(q) AS (
SELECT DISTINCT
15*CAST(DATEDIFF("n",'2000/01/01',dataora) / 15 as Int) AS primo
FROM
Prezzi
)
SELECT
simbolo, DATEADD("n",q,'2000/01/01') AS tick,
MIN(minimo) AS minimo, MAX(massimo) AS massimo,
(SELECT
TOP 1 apertura FROM Prezzi P
WHERE
P.simbolo = simbolo AND
P.dataora >= DATEADD("n",q,'2000/01/01')
ORDER BY
P.dataora ASC
) as primaapertura,
(SELECT
TOP 1 chiusura FROM Prezzi P
WHERE
P.simbolo = simbolo AND
P.dataora < DATEADD("s",14*60+59,DATEADD("n",q,'2000/01/01'))
ORDER BY
P.dataora DESC
) as ultimachiusara,
SUM(volume) / COUNT(*) AS volumemedio
FROM
quarters INNER JOIN Prezzi
ON dataora BETWEEN DATEADD("n",q,'2000/01/01')
AND DATEADD("s",14*60+59,DATEADD("n",q,'2000/01/01'))
GROUP BY
simbolo, DATEADD("n",q,'2000/01/01')
ORDER BY
1, 2
The WITH clause gets a list of 15 minute intervals, rounded down, in your dataset (let's assume nothing before 2000).
Then use those intervals to group by 14:59 interval.
For the volume, you'll have to decide if you want average or the total.
The syntax might be a tad off, but you should get the idea.
EDIT: Adjusted MIN(open), MIN(close) to pick up FIRST and LAST. In reality this won't change much, as the concept of Open and Close depend on knowing the time difference between the exchange where the quote originated and the clock of the computer collecting the data.
In addition, unless the OP has the privilege of a real-time feed from all the exchanges, all the quotes are delayed by 20 minutes anyway.
EDIT(2): Quite right, FIRST and LAST are carry-overs from my IBM days >;-)
Solution now selects first and last quotes during the interval using TOP with ASC/DESC.
Declare #tbl1MinENI Table
(ID int identity,
Simbolo char(3),
DataOra datetime,
Apertura numeric(15,4),
Massimo numeric(15,4),
Minimo numeric(15,4),
Chiusura numeric(15,4),
Volume int)
Insert Into #tbl1MinENI ( Simbolo, DataOra, Apertura, Massimo, Minimo, Chiusura, Volume)
Values
('ENI', '2010/10/18 09:00:00', 16.1100, 16.1800, 16.1100, 16.1400, 244015),
('ENI', '2010/10/18 09:01:00', 16.1400, 16.1400, 16.1300, 16.1400, 15692 ),
('ENI', '2010/10/18 09:02:00', 16.1400, 16.1500, 16.1400, 16.1500, 147035),
('ENI', '2010/10/18 09:03:00', 16.1500, 16.1600, 16.1500, 16.1600, 5181 ),
('ENI', '2010/10/18 09:04:00', 16.1600, 16.2000, 16.1600, 16.1900, 5134 ),
('ENI', '2010/10/18 09:05:00', 16.1900, 16.1900, 16.1800, 16.1800, 15040 ),
('ENI', '2010/10/18 09:06:00', 16.1900, 16.1900, 16.1600, 16.1600, 68867 ),
('ENI', '2010/10/18 09:07:00', 16.1600, 16.1600, 16.1600, 16.1600, 7606 ),
('ENI', '2010/10/18 09:08:00', 16.1500, 16.1500, 16.1500, 16.1500, 725 ),
('ENI', '2010/10/18 09:09:00', 16.1600, 16.1600, 16.1600, 16.1600, 81 ),
('ENI', '2010/10/18 09:10:00', 16.1700, 16.1800, 16.1700, 16.1700, 68594 ),
('ENI', '2010/10/18 09:11:00', 16.1800, 16.1800, 16.1800, 16.1800, 6619 )
Declare #nRowsPerGroup int = 3
;With Prepare as
(
Select datediff(minute, '2010/10/18 09:00:00', DataOra)/#nRowsPerGroup as Grp,
Row_Number() over (partition by datediff(minute, '2010/10/18 09:00:00', DataOra)/#nRowsPerGroup order by dataora) as rn,
*
From tbl1MinENI
), b as
(
Select a.Grp,
Min(a.DataOra) as GroupDataOra,
Min(ID) AperturaID,
max(a.Massimo) as Massimo,
Min(a.Minimo) as Minimo,
max(id) ChiusuraID,
sum(a.Volume) as Volume
From Prepare a
Group by Grp
)
Select b.grp,
b.GroupDataOra,
ta.Apertura,
b.Massimo,
b.Minimo,
tc.Chiusura,
b.Volume
From b
Inner Join tbl1MinENI ta on ta.ID=b.AperturaID
Inner Join tbl1MinENI tc on tc.ID=b.ChiusuraID
;