SQL Server window functions: building up a history string - sql

I have a table of longitudinal data that looks like this:
where id is the partition variable, period is the time dimension, and val is the observation value.
I want to build up a history of val for each panel of id, like this:
I'm trying to do this with SQL window functions and not a cursor, but the issue I keep running into is the self-referential nature of the hist column definition. It almost seems like I'd have to create one row/column per period. For example, the closest I could come was this:
IF OBJECT_ID('dbo.my_try', 'U') IS NOT NULL
DROP TABLE dbo.my_try;
GO
SELECT
id, period, val,
CASE
WHEN (
period = MIN(period)
OVER (PARTITION by id order by period ROWS
BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
) THEN CAST (val AS VARCHAR(60))
ELSE NULL
END AS hist
INTO my_try
FROM my_test
SELECT
id, period, val,
CASE
WHEN (
period = MIN(period) OVER
(PARTITION by id order by period ROWS
BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
) THEN hist
ELSE (
CONCAT(
val, ' | ', LAG(hist, 1) OVER (PARTITION by id order by period)
)
)
END AS hist2
FROM my_try
I would have to spool out the iteration and do a hist3, etc. for it to finally work.
Is it possible to accomplish this with SQL window functions, or is cursor the only route?
Sample Data
Here is some code to generate the original table:
CREATE TABLE my_test (
id INT,
period INT,
val INT
)
BEGIN
DECLARE #id INT = 1;
DECLARE #period INT = 1;
WHILE #id <= 3
BEGIN
SET #period = 1
WHILE #period <= 3
BEGIN
INSERT INTO my_test VALUES (#id, #period, #period * POWER(10, #id))
SET #period = #period + 1
END
SET #id = #id + 1
END
END

Actually you don't need recursion here. You can leverage STUFF pretty easily. Of course if you are on 2017 you can use string_agg as suggested above. But if you are like me and your company is not the fastest to adopt the latest and greatest you can use this.
select t1.id
, t1.period
, t1.val
, STUFF((select ' | ' + convert(varchar(10), val)
from my_test t2
where t2.id = t1.id
and t2.period <= t1.period
order by t1.period
FOR XML PATH('')), 1, 3,'')
from my_test t1
order by t1.id
, t1.period

As discussed in the comments try using recursive query
with cte as(
select id, [period], val, convert(varchar(max), val) as agg from my_try where [period] = 1
union all
select t.id, t.[period], t.val, CONCAT(c.agg, ' | ', t.val) from my_try t join cte c on c.[period] +1 = t.[period] and c.id = t.id
)
select * from cte order by id, [period]

Related

Multilevel CTE Expression slows the execution (Reposted with changes) [duplicate]

This question already exists:
Multilevel CTE Expression slows the execution [closed]
Closed 1 year ago.
The community reviewed whether to reopen this question 1 year ago and left it closed:
Duplicate This question has been answered, is not unique, and doesn’t differentiate itself from another question.
In SQL Server, I have a table-valued function which contains a multi-level CTE as shown in the code here:
CREATE FUNCTION [dbo].[GetDataset_Test]
(#XMLBlock XML, #Id INT)
RETURNS
#tempTable TABLE
(
methodID INT,
[Id] INT,
SavingMessage varchar(50)
)
AS
BEGIN
DECLARE #ObjectID INT = 2;
DECLARE #ExchangeRate INT = 2;
DECLARE #Cond INT = 1;
DECLARE #Param1 varchar(50) = 'name1';
DECLARE #Param2 varchar(50) = 'name2';
WITH CTE AS
(
SELECT
Col1,
con,
DiscountLine,
tempNumber,
ItemCostExVAT,
Quantity,
SomeValue,
methodID,
VATAmount,
SubTypeID,
Line,
VATMultiplier,
ROUND(dbo.GetAmountCustomFunction(COALESCE(Id, AnotherId), COALESCE(Price, PriceValue)), 2) As [Amount]
FROM
dbo.GetObject(#Id, #ObjectID, #ParameterBlock) AS BC
INNER JOIN
dbo.fnPrices(#ID, #CurrencyID) BPD ON BPD.Id = BC.productid
),
CTE1 AS
(
SELECT
*,
CASE WHEN con = 0 THEN Quantity ELSE 1 END AS Quantity
FROM
CTE
WHERE
1 = SomeValue
),
CTE2 AS
(
Select *,
MIN(CASE WHEN DiscountLine=1 THEN 1 ELSE 20 END) over (PARTITION by tempNumber) As StockControlled,
SUM(ItemCostExVAT * Quantity) OVER ( PARTITION BY tempNumber ) AS tempCost ,
ROUND(CASE
WHEN methodID = 8 THEN DiscValue
WHEN methodID = 2 THEN END ,
DicsValue,VATAmount),2) AS AmountExVAT
From CTE1
),
CTE3
(
SELECT
*,
ROUND(CASE
WHEN SubTypeID = 1 AND Line = 1 THEN -1 * AmountExVAT
ELSE 20
END, 2) PriceExVAT
FROM
CTE2
),
CTE4
(
SELECT
*,
ROUND(#ExchangeRate * CASE WHEN #Cond = 1 THEN AmountExVAT * VATMultiplier ELSE 20 END, 2) CashBack
FROM
CTE3
),
CTE5
(
SELECT
*,
dbo.FormatMessage(#Param1, #Param2) AS SavingMessage
FROM
CTE4
)
INSERT INTO #tempTable
SELECT
methodID, [Id], SavingMessage
FROM
CTE5
RETURN
END
In above query because of multi-level CTE and table value parameter I can think that its trying to query recursively and taking more execution time.
I know that we cannot use temporary table as function parameter, is there any alternative of this or can I use temporary table by any way in function?
Or can I make some changes in CTE to improve my T-SQL function query execution time?

Selecting data from table where sum of values in a column equal to the value in another column

Sample data:
create table #temp (id int, qty int, checkvalue int)
insert into #temp values (1,1,3)
insert into #temp values (2,2,3)
insert into #temp values (3,1,3)
insert into #temp values (4,1,3)
According to data above, I would like to show exact number of lines from top to bottom where sum(qty) = checkvalue. Note that checkvalue is same for all the records all the time. Regarding the sample data above, the desired output is:
Id Qty checkValue
1 1 3
2 2 3
Because 1+2=3 and no more data is needed to show. If checkvalue was 4, we would show the third record: Id:3 Qty:1 checkValue:4 as well.
This is the code I am handling this problem. The code is working very well.
declare #checkValue int = (select top 1 checkvalue from #temp);
declare #counter int = 0, #sumValue int = 0;
while #sumValue < #checkValue
begin
set #counter = #counter + 1;
set #sumValue = #sumValue + (
select t.qty from
(
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (ORDER BY id ASC) AS rownumber,
id,qty,checkvalue
FROM #temp
) AS foo
WHERE rownumber = #counter
) t
)
end
declare #sql nvarchar(255) = 'select top '+cast(#counter as varchar(5))+' * from #temp'
EXECUTE sp_executesql #sql, N'#counter int', #counter = #counter;
However, I am not sure if this is the best way to deal with it and wonder if there is a better approach. There are many professionals here and I'd like to hear from them about what they think about my approach and how we can improve it. Any advice would be appreciated!
Try this:
select id, qty, checkvalue from (
select t1.*,
sum(t1.qty) over (partition by t2.id) [sum]
from #temp [t1] join #temp [t2] on t1.id <= t2.id
) a where checkvalue = [sum]
Smart self-join is all you need :)
For SQL Server 2012, and onwards, you can easily achieve this using ROWS BETWEEN in your OVER clause and the use of a CTE:
WITH Running AS(
SELECT *,
SUM(qty) OVER (ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS RunningQty
FROM #temp t)
SELECT id, qty, checkvalue
FROM Running
WHERE RunningQty <= checkvalue;
One basic improvement is to try & reduce the no. of iterations. You're incrementing by 1, but if you repurpose the logic behind binary searching, you'd get something close to this:
DECLARE #RoughAverage int = 1 -- Some arbitrary value. The closer it is to the real average, the faster things should be.
DECLARE #CheckValue int = (SELECT TOP 1 checkvalue FROM #temp)
DECLARE #Sum int = 0
WHILE 1 = 1 -- Refer to BREAK below.
BEGIN
SELECT TOP (#RoughAverage) #Sum = SUM(qty) OVER(ORDER BY id)
FROM #temp
ORDER BY id
IF #Sum = #CheckValue
BREAK -- Indicating you reached your objective.
ELSE
SET #RoughAverage = #CheckValue - #Sum -- Most likely incomplete like this.
END
For SQL 2008 you can use recursive cte. Top 1 with ties limits result with first combination. Remove it to see all combinations
with cte as (
select
*, rn = row_number() over (order by id)
from
#temp
)
, rcte as (
select
i = id, id, qty, sumV = qty, checkvalue, rn
from
cte
union all
select
a.id, b.id, b.qty, a.sumV + b.qty, a.checkvalue, b.rn
from
rcte a
join cte b on a.rn + 1 = b.rn
where
a.sumV < b.checkvalue
)
select
top 1 with ties id, qty, checkvalue
from (
select
*, needed = max(case when sumV = checkvalue then 1 else 0 end) over (partition by i)
from
rcte
) t
where
needed = 1
order by dense_rank() over (order by i)

T_SQL Function with 'with' clause

I am wondering why I cannot create the following T-SQL function:
CREATE FUNCTION DaysIncarceratedInYear
--Requires a Patient ID and a year
-- will return the number of days the patient was incarcerated for the year
--incarcerated is stored in a flowsheet
-- the FlowdataID are:
-- Incarceration Start Date: 'ZZZZZ00071
-- Incarceration End Date : 'ZZZZZ00072'
(
#PID varchar(10),
#YEAR int
)
RETURNS int
--DECLARE #PID varchar(10)
--DECLARE #YEAR int
--set #PID = 'ZZZZZ000L6'
--set #YEAR =2012
AS
BEGIN
declare #R int
SELECT #R = Numdays from (
;with startdates as
(
SELECT
dbo.View_PatientFlowValue.FlowValue_Value as val,
ROW_NUMBER() OVER(ORDER BY dbo.View_PatientFlowValue.FlowValue_Value) AS RowNumber
FROM dbo.View_PatientFlowValue
WHERE dbo.View_PatientFlowValue.FlowData_ID = 'ZZZZZ00071'
AND dbo.View_PatientFlowValue.FlowValue_RecordState<>1
AND Patient_ID = #PID
)
,enddates as
(
SELECT
dbo.View_PatientFlowValue.FlowValue_Value val,
ROW_NUMBER() OVER(ORDER BY dbo.View_PatientFlowValue.FlowValue_Value) AS RowNumber
FROM dbo.View_PatientFlowValue
WHERE dbo.View_PatientFlowValue.FlowData_ID = 'ZZZZZ00072'
AND dbo.View_PatientFlowValue.FlowValue_RecordState<>1
AND Person_ID = #PID
)
Select sum(DATEDIFF(d,CalcStart, CalcEnd)) as NumDays
from (
select
case
when DATEDIFF(d, cast(str(#YEAR*10000+1*100+1) as date), s.val) < 1 then '1/1/' + str(#YEAR)
else s.val
end As CalcStart,
ISNULL(e.Val, cast(str((#YEAR+1)*10000+1*100+1) as date)) as CalcEnd,
s.val as realstart, e.val as realend
FROM StartDates s
LEFT OUTER JOIN EndDates e ON s.RowNumber = e.RowNumber
) accountforyear
)
return #R
END
GO
It states there is incorrect syntax near ";" , but if I take the ";" out, it tells me there is incorrect syntax near the keyword "with". What is the proper syntax here?
This query works fine standalone.
I've never seen a CTE as a subquery.
Try re-writing as
;
with startdates as
(
... copy everything
)
Select #R = sum(DATEDIFF(d,CalcStart, CalcEnd))
FROM ...
return #R
You need to put the with statement before the select:
with startdates as
(
SELECT
dbo.View_PatientFlowValue.FlowValue_Value as val,
ROW_NUMBER() OVER(ORDER BY dbo.View_PatientFlowValue.FlowValue_Value) AS RowNumber
FROM dbo.View_PatientFlowValue
WHERE dbo.View_PatientFlowValue.FlowData_ID = 'ZZZZZ00071'
AND dbo.View_PatientFlowValue.FlowValue_RecordState<>1
AND Patient_ID = #PID
)
,enddates as
(
SELECT
dbo.View_PatientFlowValue.FlowValue_Value val,
ROW_NUMBER() OVER(ORDER BY dbo.View_PatientFlowValue.FlowValue_Value) AS RowNumber
FROM dbo.View_PatientFlowValue
WHERE dbo.View_PatientFlowValue.FlowData_ID = 'ZZZZZ00072'
AND dbo.View_PatientFlowValue.FlowValue_RecordState<>1
AND Person_ID = #PID
)
Select #R = sum(DATEDIFF(d,CalcStart, CalcEnd)) as NumDays
from (
select
case
when DATEDIFF(d, cast(str(#YEAR*10000+1*100+1) as date), s.val) < 1 then '1/1/' + str(#YEAR)
else s.val
end As CalcStart,
ISNULL(e.Val, cast(str((#YEAR+1)*10000+1*100+1) as date)) as CalcEnd,
s.val as realstart, e.val as realend
FROM StartDates s
LEFT OUTER JOIN EndDates e ON s.RowNumber = e.RowNumber
) accountforyear;
So, the #R = goes in the select after the with. You don't need a subquery here, so I just added the assignment in.
Define your CTEs before the query:
;with CTE_A( ColA, ColB)
as
(
select
ColA
, ColB
from
SomeTable
)
,CTE_B( ColA, ColC )
as
(
select
ColA
, ColC
from
SomeTable2
)
select
*
from
CTE_A a
inner join CTE_B b
on a.ColA = b.ColB

How to get previous and next row's value effeciently in SQL server

Say I have these rows,
InstrumentID
547
698
708
InstrumentID is not autogenerated column.
Say if I pass the parameter in procedure as 698, I should get previous value as 547 and next value as 708. How do I do this efficiently in SQL?
I have this procedure but it is not efficient (and not correct).
Alter PROCEDURE GetNextAndPreviousInsturmentID
(
#InstrumentID varchar(14),
#PreviousInstrumentID varchar(14) OUT,
#NextInstrumentID varchar(14) OUT
)
AS
BEGIN
Declare #RowNum int = 0
Select #RowNum = ROW_NUMBER() Over (Order by Cast(#InstrumentID as decimal(18))) From Documents Where InstrumentID = #InstrumentID
;With normal As
(
Select ROW_NUMBER() Over (Order by Cast(#InstrumentID as decimal(18))) as RowNum, Cast(InstrumentID as decimal(18)) as InstrumentID
From Documents
)
Select #PreviousInstrumentID = InstrumentID From normal
Where RowNum = #RowNum - 1
Select #NextInstrumentID = InstrumentID From normal
Where RowNum = #RowNum + 1
END
GO
Here is a simpler solution, still it's more efficient
SELECT P.PreviousID, N.NextID
FROM
(SELECT MAX(D.InstrumentID) PreviousID
FROM Documents D
WHERE InstrumentID < #InstrumentID) P
CROSS JOIN
(SELECT MIN(D.InstrumentID) NextID
FROM Documents D
WHERE InstrumentID > #InstrumentID) N
Try this:
Alter PROCEDURE GetNextAndPreviousInsturmentID
(
#InstrumentID varchar(14),
#PreviousInstrumentID varchar(14) OUT,
#NextInstrumentID varchar(14) OUT
)
AS
BEGIN
Declare #Ids TABLE(Id varchar(14))
;With normal As
(
--Numerate our rows
Select ROW_NUMBER() Over (Order by Cast(Documents.InstrumentID as decimal(18)) as RowNumber,
Documents.InstrumentID
From Documents
)
--Insert three rows from our table with our id and previos/next id
INSERT INTO #Ids(Id)
SELECT TOP(3) normal.InstrumentID
FROM normal
WHERE RowNumber >=
(
SELECT RowNumber - 1
FROM normal
WHERE normal.InstrumentID = #InstrumentID
)
ORDER BY normal.RowNumber
--select next and previos ids
SELECT #PreviousInstrumentID = Min(CAST(Id as decimal(18))),
#NextInstrumentID = MAX(CAST(Id as decimal(18)))
FROM #Ids
END
GO
In MS SQL 2012 we have new window functions like FIRST_VALUE and LAST_VALUE, unfortunately in sql 2008 these functions are missing.
WITH CTE AS (
SELECT rownum = ROW_NUMBER() OVER (ORDER BY p.LogDate), p.LogDate
FROM DeviceLogs p
)
SELECT prev.logdate PreviousValue, CTE.logdate, nex.logdate NextValue
FROM CTE
LEFT JOIN CTE prev ON prev.rownum = CTE.rownum - 1
LEFT JOIN CTE nex ON nex.rownum = CTE.rownum + 1
GO
select
LAG(InstrumentID) OVER (ORDER BY InstrumentID) PreviousValue,
InstrumentID,
LEAD(InstrumentID) OVER (ORDER BY InstrumentID) NextValue
from documents
Hi I think this will be much more efficient:
Select Next :
select top 1 ID from mytable
where ID >'698'
order by ID asc
Select Prev:
select top 1 ID from mytable
where ID <'698'
order by ID desc

Subtract all values from data

Table data look like this
id val
1 4
2 2
3 1
I want result of subtract valu of val field in one sql statement.
like it should be like 4-2-1 = 1 if order by id asc, 1-2-4 = -5 if order by id desc.
You can try this
DECLARE #Table TABLE(
ID INT,
Val INT
)
INSERT INTO #Table (ID,Val) SELECT 1, 4
INSERT INTO #Table (ID,Val) SELECT 2, 2
INSERT INTO #Table (ID,Val) SELECT 3, 1
SELECT SUM(Val * CASE WHEN RowID = 1 THEN 1 ELSE -1 END)
FROM (
SELECT *,
ROW_NUMBER() OVER (ORDER BY ID) RowID
FROM #Table
) sub
You can declare a variable and increment it in the select statement:
declare #sum float
select #sum = case when #sum is null then value else #sum - value end
from YourTable
order by id
select #sum
To reverse the subtraction order, change order by id to order by id desc.
if you want to use just the sql without temp tables or variables:
select fromid.val - sumid.val
from (
select val
from t
where id = (
select min(id)
from t
)
) fromid cross join (
select sum(val) as val
from t
where id > (
select min(id)
from t
)
) sumid