How do i take the 12th value in a SQL select statement? - sql

I have a very large SQL database that I am pulling data to a web page. Instead of pulling every value, I want to take every 12th value. Is there a way to modify my current select statement?
SELECT *
FROM (
SELECT CAST(DateTimeUTC as SmallDateTime) as [DateTime],
CASE When DataValue = '-9999' Then null
When DataValue < '-60' Then null
Else DataValue
End DataValue, VariableID
FROM DataValues
WHERE SiteID = #siteID and VariableID IN(9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
) TableDate
PIVOT (SUM(DataValue) FOR VariableID IN ([9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30])) PivotTable ORDER BY [DateTime]
END
This works except the data is staggered from one column to the next. I am not sure why all the data points don't start at the same location.
See the screen shot below.
=

Using this theory (SQL Server) -
with rNum As(
SELECT t.*,RowNum = row_number() over (order by date)
FROM testdb.dbo.testtable t
)
select * from rNum where (RowNum % 12) = 0
Something like this -
with dVal As(
Select RowNum = row_number() over (order by datetime),DataValues.*
from datavalues)
SELECT *
FROM (
SELECT CAST(DateTimeUTC as SmallDateTime) as [DateTime],
CASE When DataValue = '-9999' Then null
When DataValue < '-60' Then null
Else DataValue
End DataValue, VariableID
FROM dVal
WHERE
/* divide by 12 has no remainder */
(RowNum % 12) = 0 and
SiteID = #siteID and VariableID IN(9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
) TableDate
PIVOT (SUM(DataValue) FOR VariableID IN ([9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30])) PivotTable ORDER BY [DateTime]

or,
Select * from DataValues d
where (Select count(*) from datavalues
where DateTimeUTC < d.DateTimeUTC) % 12 = 0
to start at the 12th row, instead of the first row,
Select * from DataValues d
where (Select count(*) from datavalues
where DateTimeUTC <= d.DateTimeUTC) % 12 = 0
or
Select * from DataValues d
where (Select count(*) from datavalues
where DateTimeUTC < d.DateTimeUTC) % 12 = 11

Related

Iterate value dynamically

I'm using the below query to calculate a budget value dynamically means iterating upto selected date value.
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinMonth) end) as [Budget]
Here variable #DaysPastinMonth should be dynamic. Means if I select a date as 03/31/2017. Then the query should run upto the previous month value. Another example is if I select August, then I need to run query from Jan-Aug.
For Jan
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinJanMonth) end) as [Budget]
For Feb
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinFebMonth) end) as [Budget]
For Mar
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinMarMonth) end) as [Budget]
Also I have created variables for all the 12 months which holds DaysPastinMonth.
Can anyone suggest how this can be achieved using case statement.
You are thinking about this in loop when you could do it with set based operations.
----------------------------------------------------------
--Create a table of dates for testing
----------------------------------------------------------
if object_id('tempdb..#dates') is not null
drop table #dates
create table #dates(d date
,RN bigint)
declare #sdate datetime='2017-01-01 00:00'
declare #edate datetime='2017-7-31 00:00'
insert into #dates
select
DATEADD(d,number,#sdate)
,row_number() over (order by (select null)) as RN
from
master..spt_values
where
type='P'
and number<=datediff(d,#sdate,#edate)
declare #numOfDays int = (select count(*) from #dates)
----------------------------------------------------------
--Populate Test Data
----------------------------------------------------------
if object_id('tempdb..#testTable') is not null
drop table #testTable
create table #testTable([Name] varchar(64),
[Value] decimal (16,4),
DT datetime)
insert into #testTable ([Name],[Value],DT)
select
'Budget'
,r.randomNumber
,d.d
from
#dates d
inner join
(SELECT TOP (select #numOfDays)
randomNumber,
row_number() over (order by (select null)) as RN
FROM (
SELECT CAST(ABS(CAST(NEWID() AS binary(6)) %100000) + RAND() AS DECIMAL (16,4)) + 1 randomNumber
FROM sysobjects) sample
GROUP BY randomNumber
ORDER BY randomNumber DESC) r on r.RN = d.RN
union all
select
'Not The Budget'
,r.randomNumber
,d.d
from
#dates d
inner join
(SELECT TOP (select #numOfDays)
randomNumber,
row_number() over (order by (select null)) as RN
FROM (
SELECT CAST(ABS(CAST(NEWID() AS binary(6)) %100000) + RAND() AS DECIMAL (16,4)) + 1 randomNumber
FROM sysobjects) sample
GROUP BY randomNumber
ORDER BY randomNumber DESC) r on r.RN = d.RN
----------------------------------------------------------
--Instead of making your variables "dynamic" which
--would likely consist of some loop, just pass in the
--month you care about and let SQL do the work
----------------------------------------------------------
declare #month datetime = '2016-03-31'
select
DT
,[Value]
,[Name]
,sum(case when [Name] = 'Budget'
then [Value] +
(([Value] / (DATEDIFF(day,DATEADD(month, DATEDIFF(month, 0, #month), 0),#month)))
*
(DATEDIFF(DAY,DATEADD(MONTH, DATEDIFF(MONTH, 0, #month)-1, 0),DATEADD(MONTH, DATEDIFF(MONTH, -1, #month)-1, -1)))) end) as Budget
from
#testTable
where
DT >= DATEADD(yy, DATEDIFF(yy, 0, #month), 0) --this is Jan 1 of the year associated with your vairable
group by
DT
,[Name]
,[Value]

SQL - Selecting rows with dates before and after column value change

I have a table called test.
In test I have An ID, a value and a date.
The dates are ordered for each ID.
I want to select rows for an ID, before and after a change of value, so the following example table.
RowNum--------ID------- Value -------- Date
1------------------001 ---------1----------- 01/01/2015
2------------------001 ---------1----------- 02/01/2015
3------------------001 ---------1----------- 04/01/2015
4------------------001 ---------1----------- 05/01/2015
5------------------001 ---------1----------- 06/01/2015
6------------------001 ---------1----------- 08/01/2015
7------------------001 ---------0----------- 09/01/2015
8------------------001 ---------0----------- 10/01/2015
9------------------001 ---------0----------- 11/01/2015
10-----------------001 ---------1----------- 12/01/2015
11-----------------001 ---------1----------- 14/01/2015
12------------------002 ---------1----------- 01/01/2015
13------------------002 ---------1----------- 04/01/2015
14------------------002 ---------0----------- 05/01/2015
15------------------002 ---------0----------- 07/01/2015
The result would return rows 6, 7, 9, 10, 13, 14
You could use analytic functions LAG() and LEAD() to access value in preceding and following rows, then check that it does not match value in current row.
SELECT *
FROM (
SELECT RowNum,
ID,
Value,
Date,
LAG(VALUE, 1, VALUE) OVER(ORDER BY RowNum) PrevValue,
LEAD(VALUE, 1, VALUE) OVER(ORDER BY RowNum) NextValue
FROM test)
WHERE PrevValue <> Value
OR NextValue <> Value
Params passed to this functions are
some scalar expression (column name in this case);
offset (1 row before or after);
default value (LAG() will return NULL for first row and LEAD() will return NULL for last row, but they don't seem special in your question, so I used column value as default).
Refer the below one for without using LEAD and LAG:
DECLARE #i INT = 1,
#cnt INT,
#dstvalue INT,
#srcvalue INT
CREATE TABLE #result
(
id INT,
mydate DATE
)
CREATE TABLE #temp1
(
rn INT IDENTITY(1, 1),
id INT,
mydate DATE
)
INSERT INTO #temp1
(id,
mydate)
SELECT id,
mydate
FROM table
ORDER BY id,
mydate
SELECT #cnt = Count(*)
FROM #temp1
SELECT #srcvalue = value
FROM #temp1
WHERE rn = #i
WHILE ( #i <= #cnt )
BEGIN
SELECT #dstvalue = value
FROM #temp1
WHERE rn = #i
IF( #srcvalue = #dstvalue )
BEGIN
SET #i = #i + 1
CONTINUE;
END
ELSE
BEGIN
SET #srcvalue = #dstvalue
INSERT INTO #result
(id,
mydate)
SELECT id,
mydate
FROM #temp
WHERE rn = #i - 1
UNION ALL
SELECT id,
mydate
FROM #temp
WHERE rn = #i
END
SET #i = #i + 1
END
SELECT *
FROM #result
The answer using lag() and lead() is the right answer. If you are using a pre-SQL Server 2012 version, then you can do essentially the same thing using cross apply or a correlated subquery:
select t.*
from test t cross apply
(select top 1 tprev.*
from test tprev
where tprev.date < t.date
order by date desc
) tprev cross apply
(select top 1 tnext.*
from test tnext
where tnext.date > t.date
order by date asc
) tnext
where tprev.value <> tnext.value;

How can I update extreme columns within range fast?

I have 40 tables that look like following, and each table contains 30 million records.
Table RawData : PK(CaregoryID, Time)
CategoryID Time IsSampled Value
-----------------------------------------------------------
1 2012-07-01 00:00:00.000 0 -> 1 65.36347
1 2012-07-01 00:00:11.000 0 80.16729
1 2012-07-01 00:00:14.000 0 29.19716
1 2012-07-01 00:00:25.000 0 -> 1 7.05847
1 2012-07-01 00:00:36.000 0 -> 1 98.08257
1 2012-07-01 00:00:57.000 0 75.35524
1 2012-07-01 00:00:59.000 0 35.35524
As of now, the IsSampled column is 0 for all records.
I need to update the records, so that for each CategoryID and for each minute range, the records with Max(Value), Min(Value), and the first record should have 1 for IsSampled.
Following is the procedural query I've created, but it takes too long to run. (approx. 2h 30m for each table)
DECLARE #startRange datetime
DECLARE #endRange datetime
DECLARE #endTime datetime
SET #startRange = '2012-07-01 00:00:00.000'
SET #endTime = '2012-08-01 00:00:00.000'
WHILE (#startRange < #endTime)
BEGIN
SET #endRange = DATEADD(MI, 1, #startRange)
UPDATE r1
SET IsSampled = 1
FROM RawData AS r1
JOIN
(
SELECT r2.CategoryID,
MAX(Value) as MaxValue,
MIN(Value) as MinValue,
MIN([Time]) AS FirstTime
FROM RawData AS r2
WHERE #startRange <= [Time] AND [Time] < #endRange
GROUP BY CategoryID
) as samples
ON r1.CategoryID = samples.CategoryID
AND (r1.Value = samples.MaxValue
OR r1.Value = samples.MinValue
OR r1.[Time] = samples.FirstTime)
AND #startRange <= r1.[Time] AND r1.[Time] < #endRange
SET #startRange = DATEADD(MI, 1, #startRange)
END
Is there a way to update these tables faster(presumably in non-procedural way)? Thanks!
I'm not sure what the performance of this will be like, but it's a more set-based approach than your current one:
declare #T table (CategoryID int not null,Time datetime2 not null,IsSampled bit not null,Value decimal(10,5) not null)
insert into #T (CategoryID,Time,IsSampled,Value) values
(1,'2012-07-01T00:00:00.000',0,65.36347),
(1,'2012-07-01T00:00:11.000',0,80.16729),
(1,'2012-07-01T00:00:14.000',0,29.19716),
(1,'2012-07-01T00:00:25.000',0,7.05847),
(1,'2012-07-01T00:00:36.000',0,98.08257),
(1,'2012-07-01T00:00:57.000',0,75.35524),
(1,'2012-07-01T00:00:59.000',0,35.35524)
;with BinnedValues as (
select CategoryID,Time,IsSampled,Value,DATEADD(minute,DATEDIFF(minute,0,Time),0) as TimeBin
from #T
), MinMax as (
select CategoryID,Time,IsSampled,Value,TimeBin,
ROW_NUMBER() OVER (PARTITION BY CategoryID, TimeBin ORDER BY Value) as MinPos,
ROW_NUMBER() OVER (PARTITION BY CategoryID, TimeBin ORDER BY Value desc) as MaxPos,
ROW_NUMBER() OVER (PARTITION BY CategoryID, TimeBin ORDER BY Time) as Earliest
from
BinnedValues
)
update MinMax set IsSampled = 1 where MinPos=1 or MaxPos=1 or Earliest=1
select * from #T
Result:
CategoryID Time IsSampled Value
----------- ---------------------- --------- ---------------------------------------
1 2012-07-01 00:00:00.00 1 65.36347
1 2012-07-01 00:00:11.00 0 80.16729
1 2012-07-01 00:00:14.00 0 29.19716
1 2012-07-01 00:00:25.00 1 7.05847
1 2012-07-01 00:00:36.00 1 98.08257
1 2012-07-01 00:00:57.00 0 75.35524
1 2012-07-01 00:00:59.00 0 35.35524
It could possibly be sped up if the TimeBin column could be added as a computed column to the table and added to appropriate indexes.
It should also be noted that this will mark a maximum of 3 rows as sampled - if the earliest is also the min or max value, it will only be marked once (obviously), but the next nearest min or max value will not be. Also, if multiple rows have the same Value, and that is the min or max value, one of the rows will be selected arbitrarily.
You could rewrite update in the loop to something like:
UPDATE r1
SET IsSampled = 1
FROM RawData r1
WHERE r1.Time >= #startRange and Time < #endRange
AND NOT EXISTS
(
select *
from RawData r2
where r2.CategoryID = r1.CategoryID
and r2.Time >= #startRange and r2.Time < #endRange
and (r2.Time < r1.Time or r2.Value < r1.Value or r2.Value > r1.Value)
)
To get actual performance improvement you need an index on Time column.
Hi try this query.
declare #T table (CategoryID int not null,Time datetime2 not null,IsSampled bit not null,Value decimal(10,5) not null)
insert into #T (CategoryID,Time,IsSampled,Value) values
(1,'2012-07-01T00:00:00.000',0,65.36347),
(1,'2012-07-01T00:00:11.000',0,80.16729),
(1,'2012-07-01T00:00:14.000',0,29.19716),
(1,'2012-07-01T00:00:25.000',0,7.05847),
(1,'2012-07-01T00:00:36.000',0,98.08257),
(1,'2012-07-01T00:00:57.000',0,75.35524),
(1,'2012-07-01T00:00:59.000',0,35.35524)
;WITH CTE as (SELECT CategoryID,CAST([Time] as Time) as time,IsSampled,Value FROM #T)
,CTE2 as (SELECT CategoryID,Max(time) mx,MIN(time) mn,'00:00:00.0000000' as start FROM CTE where time <> '00:00:00.0000000' Group by CategoryID)
update #T SET IsSampled=1
FROM CTE2 c inner join #T t on c.CategoryID = t.CategoryID and (CAST(t.[Time] as Time)=c.mx or CAST(t.[Time] as Time)=c.mn or CAST(t.[Time] as Time)=c.start)
select * from #T
Hi Here is the latest updated query.
Check the query for performance:
declare #T table (CategoryID int not null,Time datetime2 not null,IsSampled bit not null,Value decimal(10,5) not null)
insert into #T (CategoryID,Time,IsSampled,Value) values
(1,'2012-07-01T00:00:00.000',0,65.36347),
(1,'2012-07-01T00:00:11.000',0,80.16729),
(1,'2012-07-01T00:00:14.000',0,29.19716),
(1,'2012-07-01T00:00:25.000',0,7.05847),
(1,'2012-07-01T00:00:36.000',0,98.08257),
(1,'2012-07-01T00:00:57.000',0,75.35524),
(1,'2012-07-01T00:00:59.000',0,35.35524)
;WITH CTE as (SELECT CategoryID,Time,CAST([Time] as Time) as timepart,IsSampled,Value FROM #T)
--SELECT * FROM CTE
,CTE2 as (SELECT CategoryID,Max(value) mx,MIN(value) mn FROM CTE
where timepart <> '00:00:00.0000000' and Time <=DATEADD(MM,1,Time)
Group by CategoryID)
,CTE3 as (SELECT CategoryID,Max(value) mx,MIN(value) mn FROM CTE
where timepart = '00:00:00.0000000' and Time <=DATEADD(MM,1,Time)
Group by CategoryID)
update #T SET IsSampled=1
FROM #T t left join CTE2 c1
on (t.CategoryID = c1.CategoryID and (t.Value = c1.mn or t.Value =c1.mx))
left join CTE3 c3 on(t.CategoryID = c3.CategoryID and t.Value = c3.mx)
where (c1.CategoryID is not null or c3.CategoryID is not null)
select * from #T

SQL cross tab pivot query

I have a table as below.
CaseID StatusID StageID CaseRegisterTime City
1 1 5 datetime XYZ
2 1 5 datetime ABC
Now I want its Citywise count and only for only specific dates, and also in condition for statusid = 1 and stageid = 5.
Cities CurrentDate-1 CurrentDate-2 January2012-CurrentDate-3
XYZ 5 51 5008
JUS 0 0 125
ABC 1 0 48
I want my header to group cases for CaseRegisterTime as shown above.
Please help.
Use case when to convert your dates of interest to 'CurrentDate-1' and 'CurrentDate-2', and then pivot the results, using this strings as the new columns.
Alternatively, you can do something like this:
select City, sum(Date1) as Date1, sum(Date2) as Date2
from(
select City,
case when CaseRegisterTime='2012-01-01' then 1 else 0 end as Date1,
case when CaseRegisterTime='2012-15-01' then 1 else 0 end as Date2
from sample
) as T
group by City
you'd also have to filter out the registers which doesn't have the desired date.
Here's one of many ways to do it in SQL Server 2008 (using the Date datatype):
select distinct a.City as Cities
, (select count(*)
from MyTable
where CaseRegisterTime >= cast(getdate() - 1 as date)
and CaseRegisterTime < cast(getdate() - 0 as date)
and StatusID = a.StatusID
and StageID = a.StageID
and City = a.City
) as [CurrentDate-1]
, (select count(*)
from MyTable
where CaseRegisterTime >= cast(getdate() - 2 as date)
and CaseRegisterTime < cast(getdate() - 1 as date)
and StatusID = a.StatusID
and StageID = a.StageID
and City = a.City
) as [CurrentDate-2]
, (select count(*)
from MyTable
where CaseRegisterTime >= cast('20120101' as date)
and CaseRegisterTime < cast(getdate() - 2 as date)
and StatusID = a.StatusID
and StageID = a.StageID
and City = a.City
) as [January2012-CurrentDate-3]
from MyTable a
where a.StatusID = 1
and a.StageID = 5
Update
The case and sum method #JotaBe uses is about twice as fast on my box (with many less scans and reads), so here's what that could look like:
select a.City as Cities
, sum(a.[CurrentDate-1]) as [CurrentDate-1]
, sum(a.[CurrentDate-2]) as [CurrentDate-2]
, sum(a.[January2012-CurrentDate-3]) as [January2012-CurrentDate-3]
from (
select City
, case when CaseRegisterTime >= cast(getdate() - 1 as date)
and CaseRegisterTime < cast(getdate() - 0 as date)
then 1 else 0 end [CurrentDate-1]
, case when CaseRegisterTime >= cast(getdate() - 2 as date)
and CaseRegisterTime < cast(getdate() - 1 as date)
then 1 else 0 end [CurrentDate-2]
, case when CaseRegisterTime >= cast('20120101' as date)
and CaseRegisterTime < cast(getdate() - 2 as date)
then 1 else 0 end [January2012-CurrentDate-3]
from MyTable
where StatusID = 1
and StageID = 5
) as a
group by a.City
Something like this will do:
begin tran;
go
create table #t1(
ID int identity,
City varchar,
RegisterDate datetime
);
declare #firstDate datetime, #secondDate datetime;
set #firstDate = '2012-1-1';
set #secondDate = '2012-1-2';
insert into #t1 values
('A', #firstDate),
('A', #firstDate),
('B', #firstDate),
('B', #firstDate),
('B', #firstDate),
('A', #secondDate),
('A', #secondDate),
('A', #secondDate),
('B', #secondDate),
('B', #secondDate);
select * from #t1;
select pvt.*
from(
select ID, City, RegisterDate
from #t1
) a
pivot(
count(a.ID)
for a.RegisterDate in ([2012-1-1], [2012-1-2])
) as pvt;
drop table #t1;
go
rollback tran;

SQL how to select date from 1 - many

In SQL 2005 stored proc I need to run a query that contains a 1-M. I need to return only 1 of the Many table the one with the earliest date.
I have looked at In SQL how do I write a query to return 1 record from a 1 to many relationship?
and SQL conundrum, how to select latest date for part, but only 1 row per part (unique)
But I am not sure what's the best solution in my case as I am also doing a Insert Into temp table and using dynamic sorting and paging.
Here is my SQL. What I want is to return many rows of Foo, but only the earliest b.CreatedDate between the start and end data paramaters I pass in where there is normally about 5 rows in Bar for each Foo.
DECLARE #StartDate datetime
DECLARE #EndDate datetime
INSERT INTO #Results
SELECT distinct
f.Name,
f.Price
b.CreatedDate ,
// loads more columns removed for brevity
FROM
foo f
join bar b on f.Id = b.fooId
// loads more table removed for brevity
WHERE
(#x is null OR f.Id = #x)
AND (#Deal is null OR f.IsDeal = #Deal)
AND (#StartDate is null OR sd.SailingDate >= #StartDate)
AND (#EndDate is null OR sd.SailingDate <= #EndDate)
// loads more filters removed for brevity
declare #firstResult int, #lastResult int
set #firstResult = ((#PageNumber-1) * #ItemsPerPage) + 1;
set #lastResult = #firstResult + #ItemsPerPage;
select #TotalResults = count(1) from #Results;
WITH ResultItems AS
(
SELECT *, ROW_NUMBER() OVER (
ORDER BY
CASE WHEN #SortBy = 'priceLow' THEN Price END ASC,
CASE WHEN #SortBy = 'Soonest' THEN CreatedDate END ASC,
CASE WHEN #SortBy = 'priceHigh' THEN Price END DESC
) As RowNumber
FROM #Results r
)
SELECT * from ResultItems
WHERE RowNumber >= #firstResult AND RowNumber < #lastResult
ORDER BY
CASE
WHEN #SortBy = 'priceHigh' THEN (RANK() OVER (ORDER BY Price desc))
WHEN #SortBy = 'priceLow' THEN (RANK() OVER (ORDER BY Price))
WHEN #SortBy = 'Soonest' THEN (RANK() OVER (ORDER BY CreatedDate ))
END
This query as is will return multiple 'b.CreatedDate' instead of just the earliest one between my Filters
Update
So I want to See
If my source data is:
Foo
___
1 , Hello
2 , There
Boo
___
1, 1, 2011-2-4
2, 1, 2011-3-6
3, 1, 2012-12-21
4, 2, 2012-11-2
The result would be
1, Hello,2011-2-4
2, There, 2012-11-2
I think I just got it working by adding a CTE to the top of my query
;with cteMinDate as (
select FooId, min(CreatedDate) As CreatedDate
from Bar WHERE
(#StartDate is null OR CreatedDate>= #StartDate)
AND (#EndDate is null OR CreatedDate<= #EndDate)
group by FooId
)
Same as shown here SQL conundrum, how to select latest date for part, but only 1 row per part (unique). Doing this allows me to remove the date query part from my main query and only do it once in the CTE