finding local maximums and local minimums in SQL - sql-server-2005

In order to find the max draw down of a stock price versus time graph, you first have to find all local maximums (peaks) and local minimums (valleys) for a given set of prices and days. How would you do this in SQL Server 2005?
edit:
There is a brute force way of doing this with cursors:
compare the high of the first day to the high of the next day.
if the high of the first day is higher than the high of the next day, the high of the first day is a local Max.
effectively, I need to find every point at which the trend of the price graph changes direction.
edit2: I should note that the database table to work from has the following columns:
stockid int
day date
hi int --this is in pennies
low int --also in pennies
so for a given date range, you'll see the same stockid every day for that date range.

OK, step by step here is what I am thinking:
1 - Find all your "peaks" which are max values with LOWER max values the next day:
DECLARE #HiTable (hi int, day date)
INSERT INTO #HiTable
SELECT hi, day
FROM table t1
WHERE EXISTS (
SELECT t2.hi
FROM Table t2
WHERE t1.hi > t2.hi AND t1.day < t2.day and StockID = X)
2 - Find all your "valleys" which are the min values with HIGHER min values the next day:
DECLARE #LowTable (low int, day date)
INSERT INTO #LowTable
SELECT low, day
FROM table t1
WHERE EXISTS (
SELECT t2.low
FROM Table t2
WHERE t1.low < t2.low AND t1.day < t2.day and StockID = X)
3 - Combine these into a table ordered by date with a identity value to keep us in order
DECLARE #TableVar (low int, hi int, day date, autoid int IDENTITY)
INSERT INTO #TableVar
(SELECT low, hi, day
FROM (
SELECT Low, NULL as 'hi', date FROM #LowTable
UNION ALL
SELECT NULL as 'Low', hi, date FROM #HiTable
)
ORDER BY DATE)
4 - Delete outliers
DELETE FROM #TableVar WHERE AutoID > (SELECT MAX(AutoID) FROM #Table WHERE low IS NULL)
DELETE FROM #TableVar WHERE AutoID < (SELECT MIN(AutoID) FROM #Table WHERE hi IS NULL)

Admitedly not thoroughly tested - but how about using a CTE, and ROWNUMBER() to do this in two steps
1) Identify all the nextsubseqent hi's for each row
2) any row that immediate next row has a subsequent high less than the current row - then current row must be a local max.
or something like that:
begin
DECLARE #highTable as table (high bigint, day date)
declare #securityid int,
#start datetime,
#end datetime
set #start = '1-1-2010'
set #end = '2-1-2010'
select #securityid = id from security where riccode = 'MSFT.OQ' ;
with highsandlows_cte as (
SELECT
ROW_NUMBER() over (order by day) i
, high
, day
, (select top 1 day from quotes nextHi where nextHi.high > today.high and nextHi.day >= today.day and nextHi.securityId = today.securityId order by day asc) nextHighestDay
FROM
quotes today
WHERE
today.securityid = #securityid )
select
*
, (Coalesce((select 1 from highsandlows_cte t2 where t1.i + 1 = t2.i and t1.nextHighestDay > t2.nextHighestDay),0)) as isHigh
from
highsandlows_cte t1
order by
day
end
ok the above is wrong - this appears to be more on track:
begin
DECLARE #highTable as table (high bigint, day date)
declare #securityid int,
#start datetime,
#end datetime
set #start = '1-1-2010'
set #end = '2-1-2010'
select #securityid = id from security where riccode = 'MSFT.OQ' ;
with highsandlows_cte as (
SELECT
ROW_NUMBER() over (order by day) i
, high
, day
, low
FROM
quote today
WHERE
today.securityid = #securityid and today.day > convert(varchar(10), #start, 111) and convert(varchar(10), #end, 111) >today.day)
select
cur.day
, cur.high
, cur.low
, case when ((cur.high > prv.high or prv.high IS null)and(cur.high > nxt.high or nxt.high is null)) then 1 else 0 end as isLocalMax
, case when ((cur.low < prv.low or prv.low IS null)and(cur.low < nxt.low or nxt.low is null)) then 1 else 0 end as isLocalMin
from
highsandlows_cte cur left outer join highsandlows_cte nxt
on cur.i + 1 = nxt.i
left outer join highsandlows_cte prv
on cur.i - 1 = prv.i
order by
cur.day
end
Get issues with duplicates (highs / lows) though...

Related

Selecting count of consecutives dates before and after a specified date based on start/end

I'm trying to determine the number of records with consecutive dates (previous record ends on the same date as the start date of the next record) before and after a specified date, and ignore any consecutive records as soon as there is a break in the chain.
If I have the following data:
-- declare vars
DECLARE #dateToCheck date = '2020-09-20'
DECLARE #numRecsBefore int = 0
DECLARE #numRecsAfter int = 0
DECLARE #tempID int
-- temp table
CREATE TABLE #dates
(
[idx] INT IDENTITY(1,1),
[startDate] DATETIME ,
[endDate] DATETIME,
[prevEndDate] DATETIME
)
-- insert temp table
INSERT INTO #dates
( [startDate], [endDate] )
VALUES ( '2020-09-01', '2020-09-04' ),
( '2020-09-04', '2020-09-10' ),
( '2020-09-10', '2020-09-16' ),
( '2020-09-17', '2020-09-19' ),
( '2020-09-19', '2020-09-20' ),
--
( '2020-09-20', '2020-09-23' ),
( '2020-09-25', '2020-09-26' ),
( '2020-09-27', '2020-09-28' ),
( '2020-09-28', '2020-09-30' ),
( '2020-10-01', '2020-09-05' )
-- update with previous records endDate
DECLARE #maxRows int = (SELECT MAX(idx) FROM #dates)
DECLARE #intCount int = 0
WHILE #intCount <= #maxRows
BEGIN
UPDATE #dates SET prevEndDate = (SELECT endDate FROM #dates WHERE idx = (#intCount - 1) ) WHERE idx=#intCount
SET #intCount = #intCount + 1
END
-- clear any breaks in the chain?
-- number of consecutive records before this date
SET #numRecsBefore = (SELECT COUNT(idx) FROM #dates WHERE startDate = prevEndDate AND endDate <= #dateToCheck)
-- number of consecutive records after this date
SET #numRecsAfter = (SELECT COUNT(idx) FROM #dates WHERE startDate = prevEndDate AND endDate >= #dateToCheck)
-- return & clean up
SELECT * FROM #dates
SELECT #numRecsBefore AS numBefore, #numRecsAfter AS numAfter
DROP TABLE #dates
With the specified date being '2020-09-20, I would expect #numRecsBefore = 2 and #numRecsAfter = 1. That is not what I am getting, as its counting all the consecutive records.
There has to be a better way to do this. I know the loop isn't optimal, but I couldn't get LAG() or LEAD() to work. I've spend all morning trying different methods and searching, but everything I find doesn't deal with two dates, or breaks in the chain.
This reads like a gaps-and-island problem. Islands represents rows whose date ranges are adjacent, and you want to count how many records preceed of follow a current date in the same island.
You could do:
select
max(case when #dateToCheck > startdate and #dateToCheck <= enddate then numRecsBefore end) as numRecsBefore,
max(case when #dateToCheck >= startdate and #dateToCheck < enddate then numRecsAfter end) as numRecsAfter
from (
select d.*,
count(*) over(partition by grp order by startdate) as numRecsBefore,
count(*) over(partition by grp order by startdate desc) as numRecsAfter
from (
select d.*,
sum(case when startdate = lag_enddate then 0 else 1 end) over(order by startdate) as grp
from (
select d.*,
lag(enddate) over(order by startdate) as lag_enddate
from #dates d
) d
) d
) d
This uses lag() and a cumulative sum() to define the islands. The a window count gives the number and preceding and following records on the same island. The final step is conditional aggrgation; extra care needs to be taken on the inequalities to take in account various possibilites (typically, the date you search for might not always match a range bound).
Demo on DB Fiddle
I think this is what you are after, however, this does not give the results in your query; I suspect that is because they aren't the expected results? One of the conditional aggregated may also want to be a >= or <=, but I don't know which:
WITH CTE AS(
SELECT startDate,
endDate,
CASE startDate WHEN LAG(endDate) OVER (ORDER BY startDate ASC) THEN 1 END AS IsSame
FROM #dates d)
SELECT COUNT(CASE WHEN startDate < #dateToCheck THEN IsSame END) AS numBefore,
COUNT(CASE WHEN startDate > #dateToCheck THEN IsSame END) AS numAfter
FROM CTE;

How to calculate MTD given daily account balance in SQL Server?

I have a table with columns [accountid], [DateEnding], and [AccountBalance].
I need to calculate MTD using the balance of the current month and subtracting the account balance from the last day of the previous month for each accountid.
So far I have this:
SELECT [accountid]
,[DateEnding]
,[AccountBalance]
,[AccountBalance MTD Last] = AccountBalance - FIRST_VALUE(AccountBalance) OVER (PARTITION BY accountid, YEAR(DATEADD(mm,-1,[DateEnding])), MONTH(DATEADD(mm,-1,[DateEnding])) ORDER BY [DateEnding] DESC)
FROM [test]
ORDER BY accountid, DateEnding;
Here, for each distinct account, we find the latest record available according to DateEnding
we then find the last day of the last month by taking a number of days away equal to the current day number. e.g 23rd April 2019 we subtract 23 days to get 1st March 2019
we can then find the balance on that day.
Then put the calculation together in the SELECT
SELECT Q1.accountid,
Q2.DateEnding ,
Q3.EOMbalance,
Q2.LatestBalance,
Q2.LatestBalance - Q3.EOMbalance EOM
FROM (
SELECT Distinct t1.accountid FROM test t1
) Q1
CROSS APPLY (
SELECT TOP 1 t2.AccountBalance LatestBalance, t2.[DateEnding]
FROM test t2
WHERE t2.[accountid] = Q1.accountid
ORDER BY t2.[DateEnding] DESC
) Q2
CROSS APPLY (
SELECT Top 1 t3.AccountBalance EOMbalance
FROM test t3
WHERE t3.[accountid] = Q1.accountid
AND t3.[DateEnding]
= dateadd(day,0 - DAY(q2.dateending), q2.dateending)
ORDER BY t3.[DateEnding] DESC
) Q3
The first answer seems a little complicated for this problem (Cross Apply isn't necessary here).
The following may be easier for you:
I first look at the current day's account balances in subquery 'a'.
Then I look at the account balances from the last day of last month's data, in subquery 'b'.
Then it's just a matter of subtracting the two to show the MTD delta:
select a.accountid,
a.DateEnding,
a.AccountBalance as [Current AccountBalance],
b.AccountBalance as [EOM prior AccountBalance], --added for clarity
a.AccountBalance-b.AccountBalance as [AccountBalance MTD Last]
from
(select accountid, DateEnding, AccountBalance
from #test
where DateEnding = cast(getdate() as date)
/* getdate() returns today's date, so this query will also be with respect to today */
) a
left join
(select *
from #test
where DateEnding = DATEADD(MONTH, DATEDIFF(MONTH, -1, GETDATE())-1, -1)
/*this returns the last day of last month, always*/
) b
on a.accountid = b.accountid
Here is the SQL that makes this sample data and #test table. Simply execute it to have your own '#test' table to run against:
/*drop table #test
drop table #dates */
create table #test ([accountid] varchar(255),[DateEnding] date, [AccountBalance] decimal(16,2))
create table #dates (rnk int,dt date)
insert into #dates (dt)
values (cast('20180101' as date))
DECLARE
#basedate DATE,
#d INT
SELECT
#basedate = '20180101',
#d = 1
WHILE #d < (select datediff(day,cast('20180101' as date),getdate())+2) --select datediff(day,getdate(),cast('20180101' as datetime))
BEGIN
INSERT INTO #dates (dt)
values (DATEADD(day, 1, (select max(dt) from #dates)))
set #d = #d+1
END
update a
set a.rnk = b.rnk
from #dates a
left join (select rank() over (order by dt) rnk,dt from #dates) b on a.dt = b.dt
declare #a int
set #a = 1
declare #i int
set #i = 1
while #a <20
begin
while #i < (select max(rnk) from #dates)
begin
insert into #test
values (#a,(select dt from #dates where rnk = #i),cast(rand()*1000.0+#i as decimal(16,2)))
set #i=#i+1
end
set #a=#a+1
set #i = 1
end

sql server while date not weekend or a specific date

I'm trying to write an sql while loop to increment a date until it doesn't mate a date in two other tables and is not a Saturday or a Sunday.
Something like this
DECLARE #DueDate datetime
SELECT #DueDate = datetime FROM tbl_status WHERE (parent_id = #ComplaintId)
WHILE((SELECT COUNT(date) FROM tbl1 WHERE(date = #DueDate)) > 0 AND (SELECT COUNT(date) FROM tbl2 WHERE(date = #DueDate)) > 0 AND DATEPART(d,#DueDate) = 'Saturday' AND DATEPART(d,#DueDate) = 'Sunday')
BEGIN
#DueDate = DATEADD(d,1,#DueDate)
END
Can anyone help
thanks
As I mentioned in my comment, you are going about this in a very inefficient manner with your while loop.
If you don't have a table of dates to use in a lookup, you can create one with a derived table, otherwise known as a Common Table Expression:
-- Set up the test data:
declare #t1 table (d date);
declare #t2 table (d date);
insert into #t1 values('20161230'),('20170111'),('20170110');
insert into #t2 values('20161225'),('20170105'),('20170106');
-- Declare your DueDate:
declare #DueDate date = '20170105';
-- Use a CTE to build a table of dates. You will want to set the Start and End dates automatically with SELECT statements:
declare #DatesStart date = '20161201';
declare #DatesEnd date = '20170225';
with Tally0 as
(
select x from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) as x(x)
)
,Tally1 as
(
select row_number() over (order by (select null))-1 as rn
from Tally0 t1 -- 10 rows -- Add more CROSS APPLY joins
cross apply Tally0 t2 -- 100 rows -- to get enough rows to cover
cross apply Tally0 t3 -- 1000 rows -- your date range.
)
,Dates as
(
select dateadd(d,t.rn,#DatesStart) as DateValue
from Tally1 t
where t.rn <= datediff(d,#DatesStart,#DatesEnd)
)
select min(d.DateValue) as NextDate -- SELECT the next available Date.
from Dates d
left join #t1 t1
on(d.DateValue = t1.d)
left join #t2 t2
on(d.DateValue = t2.d)
where t1.d is null -- That isn't in either table
and t2.d is null -- and isn't on a Saturday or Sunday.
and datename(weekday,d.DateValue) not in('Saturday','Sunday')
and d.DateValue > #DueDate

get best sales rep weekly SQL

I need a bit of help with a SQL Server issue.
I have 2 tables:
complete_sales_raw
(
Id int Identity(1,1) PK,
RepId int FK in sale_reps,
Revenue decimal(15,2),
Sale_date datetime2(7)
)
and
sale_reps
(
Id int Identity(1,1) PK,
RepName nvarchar(50)
)
What I need to do is get best sales rep based on the total revenue for each week, starting with 2014-06-01 and ending at current date.
Each week has 7 days and the first day is 2014-06-01.
So far I got to here:
SELECT TOP(1)
sr.RepName as RepName,
SUM(csr.Revenue) as Revenue
INTO #tmp1
FROM complete_sales_raw csr
JOIN sale_reps sr on csr.RepId = sr.Id
WHERE DATEDIFF( d,'2014-06-01', Sale_date ) BETWEEN 0 and 6
GROUP BY sr.RepName
ORDER BY 2 desc
But this only returns the best sale rep for the first week and I need it for each week.
All help is appreciated.
ok so, I created a week table like so
IF ( OBJECT_ID('dbo.tmp4') IS NOT NULL )
DROP TABLE dbo.tmp4
GO
Create Table tmp4(
StartDate datetime,Enddate datetime,WeekNo varchar(20)
)
DECLARE
#start_date DATETIME,
#end_date DATETIME,
#start_date1 DATETIME,
#end_date1 DATETIME
DECLARE #Table table(StartDate datetime,Enddate datetime,WeekNo varchar(20))
Declare #WeekDt as varchar(10)
SET #start_date = '2014-06-01'
SET #end_date = '2015-01-03'
Set #WeekDt = DATEPART(WEEK,#start_date)
SET #start_date1 = #start_date
While #start_date<=#end_date
Begin
--Select #start_date,#start_date+1
IF #WeekDt<>DATEPART(WEEK,#start_date)
BEGIN
Set #WeekDt = DATEPART(WEEK,#start_date)
SET #end_date1=#start_date-1
INSERT INTO tmp4 Values(#start_date1,#end_date1,DATEPART(WEEK,#start_date1))
SET #start_date1 = #start_date
END
set #start_date = #start_date+1
END
GO
and then I used Gordon's answer and made this:
SELECT t.StartDate as StartDate, sr.RepName as RepName, SUM(csr.Revenue) as Revenue,
RANK() OVER (PARTITION BY (t.StartDate) ORDER BY SUM(csr.Revenue) desc) as seqnum into tmp1
FROM tmp4 t,
complete_sales_raw csr
JOIN sale_reps sr on csr.RepId = sr.Id
WHERE DATEDIFF( d,t.StartDate, MAS_PostDate ) BETWEEN 0 and 6
GROUP BY sr.RepName, t.StartDate
SELECT * FROM tmp1
WHERE seqnum = 1
ORDER BY StartDate
which returns the best sales_rep for each week
You can do an aggregation to get the total sales by week. This requires some manipulation of the dates to calculate the number of weeks -- basically dividing the days by 7.
Then, use rank() (or row_number() if you only want one when there are ties) to get the top value:
SELECT s.*
FROM (SELECT tsr.RepName as RepName,
(DATEDIFF(day, '2014-06-01', MAS_PostDate ) - 1) / 7 as weeknum,
SUM(csr.Revenue) as Revenue,
RANK() OVER (PARTITION BY (DATEDIFF(day, '2014-06-01', MAS_PostDate ) - 1) / 7 ORDER BY SUM(csr.Revenue)) as seqnum
FROM complete_sales_raw csr JOIN
sale_reps sr
on csr.RepId = sr.Id
WHERE DATEDIFF(day, '2014-06-01', MAS_PostDate ) BETWEEN 0 and 6
GROUP BY sr.RepName, (DATEDIFF(day, '2014-06-01', MAS_PostDate ) - 1) / 7
) s
WHERE seqnum = 1;

Best way to pairing & finding anomalies in SQL data

The problem is that it takes way to long in SQL and there must be a better way. I’ve picked out the slow part for the scenario bellow.
Scenario:
Two (temp) tables with event times for start and end for vehicles that have to be paired up to figure idle durations. The issue is that some of the event data is missing. I figured out a rudimentary way of going through and determining when the last end time is after the next start time and removing the invalid start. Again not elegant + very slow.
Tables :
create table #start(VehicleIp int null, CurrentDate datetime null,
EventId int null,
StartId int null)
create table #end(VehicleIp int null,
CurrentDate datetime null,
EventId int null,
EndId int null)
--//Note: StartId and EndId are both pre-filled with something like:
ROW_NUMBER() Over(Partition by VehicleIp order by VehicleIp, CurrentDate)
--//Slow SQL
while exists(
select top 1 tOn.EventId
from #start as tOn
left JOIN #end tOff
on tOn.VehicleIp = tOff.VehicleIp and
tOn.StartID = tOff.EndID +1
)
begin
declare #badEntry int
select top 1 #badEntry = tOn.EventId
from #s as tOn
left JOIN #se tOff
on tOn.VehicleIp = tOff.VehicleIp and
tOn.StartID = tOff.EndID +1
order by tOn.CurrentDate
delete from #s where EventId = #badEntry
;with _s as ( select VehicleIp, CurrentDate, EventId,
ROW_NUMBER() Over(Partition by VehicleIp
order by VehicleIp, CurrentDate) StartID
from #start)
update #start
set StartId = _s.StartId
from #s join _s on #s.EventId = _s.EventId
end
Assuming you start with a table containing Vehicle and interval in which it was used, this query will identify gaps.
select b.VehicleID, b.IdleStart, b.IdleEnd
from
(
select VehicleID,
-- If EndDate is not inclusive, remove +1
EndDate + 1 IdleStart,
-- First date after current for this vehicle
-- If you don't want to show unused vehicles to current date remove isnull part
isnull((select top 1 StartDate
from TableA a
where a.VehicleID = b.VehicleID
and a.StartDate > b.StartDate
order by StartDate
), getdate()) IdleEnd
from TableA b
) b
where b.IdleStart < b.IdleEnd
If dates have time portion they should be truncated to required precision, here is for day:
dateadd(dd, datediff(dd,0, getDate()), 0)
Replace dd with hh, mm or whatever precision is needed.
And here is Sql Fiddle with test