SQL server query, sort on multiple columns - sql

We have a nested structure of tasks in which every task can contain other tasks. Order of tasks in a task is important and is defined by the Sequence field starting at zero.
Here is my table structure:
USE [MyDB]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Relation](
[PK_ID] [int] IDENTITY(1,1) NOT NULL,
[SourceEntityId] [uniqueidentifier] NOT NULL,
[TargetEntityId] [uniqueidentifier] NOT NULL,
CONSTRAINT [PK_Relation] PRIMARY KEY CLUSTERED
(
[PK_ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[TaskTable1](
[Id] [uniqueidentifier] NOT NULL,
[Title] [nvarchar](max) NULL,
[SequenceId] [int] NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[TaskTable2](
[Id] [uniqueidentifier] NOT NULL,
[Title] [nvarchar](max) NULL,
[SequenceId] [int] NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
SET IDENTITY_INSERT [dbo].[Relation] ON
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (1, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'5b266fd1-cbc8-c16a-91c4-5675a35c9ecf')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (2, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'e499ca68-8103-b8ec-06ba-110fa3f6eb5b')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (4, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'645ad2eb-df10-0d5b-0526-408aad45a145')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (5, N'785227d1-393c-ae18-02e5-03ab08d577af', N'5655aeb7-b8b5-dca9-38af-37687c668c14')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (6, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'030cdefc-0e45-01e6-e2a5-a69e303bda4b')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (7, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'0375c7a1-8cc5-a4c8-151c-966e4af83f73')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (8, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'785227d1-393c-ae18-02e5-03ab08d577af')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (9, N'030cdefc-0e45-01e6-e2a5-a69e303bda4b', N'8324bba9-252f-bef8-c018-8b86491e2361')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (10, N'030cdefc-0e45-01e6-e2a5-a69e303bda4b', N'f1cbe8a3-3285-4cf0-096d-aad0327bdb0b')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (11, N'dab00c89-961c-84dd-bb43-cffd18e63594', N'0189f0af-5045-a498-2d70-99187bf3f0ae')
GO
INSERT [dbo].[Relation] ([PK_ID], [SourceEntityId], [TargetEntityId]) VALUES (12, N'785227d1-393c-ae18-02e5-03ab08d577af', N'ffecd091-c17b-ee5f-a64d-54ea9ff65aa9')
GO
SET IDENTITY_INSERT [dbo].[Relation] OFF
GO
INSERT [dbo].[TaskTable1] ([Id], [Title], [SequenceId]) VALUES (N'5b266fd1-cbc8-c16a-91c4-5675a35c9ecf', N'First', 0)
GO
INSERT [dbo].[TaskTable1] ([Id], [Title], [SequenceId]) VALUES (N'e499ca68-8103-b8ec-06ba-110fa3f6eb5b', N'Second', 1)
GO
INSERT [dbo].[TaskTable1] ([Id], [Title], [SequenceId]) VALUES (N'0189f0af-5045-a498-2d70-99187bf3f0ae', N'Fourth', 3)
GO
INSERT [dbo].[TaskTable1] ([Id], [Title], [SequenceId]) VALUES (N'0375c7a1-8cc5-a4c8-151c-966e4af83f73', N'Sixth', 5)
GO
INSERT [dbo].[TaskTable2] ([Id], [Title], [SequenceId]) VALUES (N'030cdefc-0e45-01e6-e2a5-a69e303bda4b', N'Fifth', 4)
GO
INSERT [dbo].[TaskTable2] ([Id], [Title], [SequenceId]) VALUES (N'785227d1-393c-ae18-02e5-03ab08d577af', N'Seventh', 6)
GO
INSERT [dbo].[TaskTable2] ([Id], [Title], [SequenceId]) VALUES (N'645ad2eb-df10-0d5b-0526-408aad45a145', N'Third', 2)
GO
INSERT [dbo].[TaskTable2] ([Id], [Title], [SequenceId]) VALUES (N'8324bba9-252f-bef8-c018-8b86491e2361', N'sub1', 0)
GO
INSERT [dbo].[TaskTable2] ([Id], [Title], [SequenceId]) VALUES (N'f1cbe8a3-3285-4cf0-096d-aad0327bdb0b', N'sub2', 1)
GO
INSERT [dbo].[TaskTable1] ([Id], [Title], [SequenceId]) VALUES (N'ffecd091-c17b-ee5f-a64d-54ea9ff65aa9', N'sub 1', 0)
GO
INSERT [dbo].[TaskTable1] ([Id], [Title], [SequenceId]) VALUES (N'5655aeb7-b8b5-dca9-38af-37687c668c14', N'sub 2', 1)
GO
To get the tasks in order with their child tasks right beneath their parents, I tried the following query to no avail:
;With TaskCTE
AS
(
select R.SourceEntityId AS ParentTask_Id, R.TargetEntityId AS Task_Id , cast(null as uniqueidentifier) AS ParentTask, 0 AS Level
, ROW_NUMBER() OVER (ORDER BY (SELECT 100)) / power(10.0,0) as x
from Relation R
where (R.SourceEntityId = 'DAB00C89-961C-84DD-BB43-CFFD18E63594')
UNION ALL
select R1.SourceEntityId , R1.TargetEntityId, TaskCTE.Task_Id , Level + 1
, x + ROW_NUMBER() OVER (ORDER BY (SELECT 100)) / power(10.0,level+1)
from Relation R1
INNER JOIN TaskCTE
ON R1.SourceEntityId = TaskCTE.Task_Id
)
select ParentTask_Id, Task_Id, ParentTask, Level
, COALESCE(TT1.Title, TT2.Title) AS Title
, COALESCE(TT1.SequenceId, TT2.SequenceId) AS SequenceId
, x
from TaskCTE
LEFT OUTER JOIN TaskTable1 TT1
ON TaskCTE.Task_Id = TT1.Id
LEFT OUTER JOIN TaskTable2 TT2
ON TaskCTE.Task_Id = TT2.Id
order by level , SequenceId
If you follow the structure of required output (shown in below image), the sequence ** column along with the **Level column must determine the sort order.
Thanks in advance
Edit: My query output which is wrong:

If your problem is that sequence field in other table rather than relation table, then why do not you join them before running recursion? But it likely will be slower than your initial query. Here's a sample
with cte as (
select
r.SourceEntityId, r.TargetEntityId, t.SequenceId, 0 k
from
Relation r
join (
select * from TaskTable1
union all
select * from TaskTable2
) t on r.TargetEntityId = t.id
---------------------------------------
union all select * from cte where k = 1
---------------------------------------
)
, rcte as (
select
SourceEntityId, TargetEntityId, ParentTask = cast(null as uniqueidentifier)
, SequenceId, rn = cast(row_number() over (order by SequenceId) as varchar(8000)), 1 step
from
cte
where
SourceEntityId = 'DAB00C89-961C-84DD-BB43-CFFD18E63594'
union all
select
a.TargetEntityId, b.TargetEntityId, a.SourceEntityId, b.SequenceId
, cast(concat(a.rn, '.', row_number() over (partition by b.SourceEntityId order by b.SequenceId)) as varchar(8000))
, step + 1
from
rcte a
join cte b on a.TargetEntityId = b.SourceEntityId
)
select
*
from
rcte
order by rn
I have not included your X column, I can not get what are trying to calculate. Also, in your expected output values of ParentTask and ParentTask_Id are same. Should be so?

I am using same query as #Uzi with minor correction.I am having same doubts as him.#Yasser should clearly show what output is desire in proper output and remove unnecessary columns.
if row_number only purpose is to order record then why convert it to varchar(8000).Also you can avoid expensive Row_number all together.
Take advantage of PK_ID instead of expensive row_number,even if PK_ID is not in sequence in this case.
If performance is big issue then user should mention number of rows in 3 TABLE AND WHAT OTHER FILTER be applied IN WHERE CONDITION ?
Why data type is uniqueidentifier ?Will it solve the purpose if it is INT?
Read this
Check this query,
WITH cte
AS (
SELECT r.PK_ID
,r.SourceEntityId
,r.TargetEntityId
,t.SequenceId,0 k
FROM #Relation r
JOIN (
SELECT id
,SequenceId
FROM #TaskTable1
UNION ALL
SELECT id
,SequenceId
FROM #TaskTable2
) t ON r.TargetEntityId = t.id
---------------------------------------
--union all select * from cte where k = 1
---------------------------------------
)
,rcte
AS (
SELECT SourceEntityId
,TargetEntityId
,ParentTask = cast(NULL AS UNIQUEIDENTIFIER)
,SequenceId
, rn = cast(row_number() over (order by SequenceId) as decimal(3,1))
--, rn = cast( SequenceId+1 as decimal(3,1))--**
,1 step
FROM cte
WHERE SourceEntityId = 'DAB00C89-961C-84DD-BB43-CFFD18E63594'
UNION ALL
SELECT a.TargetEntityId
,b.TargetEntityId
,a.SourceEntityId
,b.SequenceId
,cast((a.rn+(b.SequenceId/10.0)) as decimal(3,1))
,step + 1
FROM rcte a
JOIN cte b ON a.TargetEntityId = b.SourceEntityId
)
SELECT *
FROM rcte
ORDER BY rn
--**
--SELECT *
--FROM rcte
--ORDER BY rn,st
-- 2nd Edit,
I understand that there is no way of changing database.
In that case it is very logical to create index view where task table id is Clustered index.
select id, SequenceId from #TaskTable1
union all
select id, SequenceId from #TaskTable2
Create nonclustered index NCI_Relation_SourceID on Relation([SourceEntityId])
Create nonclustered index NCI_Relation_TargetEntityId on Relation([TargetEntityId])
you can once try this combination,
Remove PK_ID as clustered index and make TargetEntityId as clustered index.
you can once try creating view on this query,
SELECT r.PK_ID
,r.SourceEntityId
,r.TargetEntityId
,t.SequenceId
FROM #Relation r
JOIN (
SELECT id
,SequenceId
FROM #TaskTable1
UNION ALL
SELECT id
,SequenceId
FROM #TaskTable2
) t ON r.TargetEntityId = t.id

by adding a new column named Hierarchy in CTE expression and sorting outcome according to this value could solve your requirement
Here is the modified CTE query
;With TaskCTE AS
(
select
R.SourceEntityId AS ParentTask_Id,
R.TargetEntityId AS Task_Id , cast(null as uniqueidentifier) AS ParentTask, 0 AS Level
, ROW_NUMBER() OVER (ORDER BY (SELECT 100)) / power(10.0,0) as x
,CAST( ROW_NUMBER() OVER (ORDER BY R.SourceEntityId) as varchar(max)) Hierarchy
from Relation R
where (R.SourceEntityId = 'DAB00C89-961C-84DD-BB43-CFFD18E63594')
UNION ALL
select R1.SourceEntityId , R1.TargetEntityId, TaskCTE.Task_Id , Level + 1
, x + ROW_NUMBER() OVER (ORDER BY (SELECT 100)) / power(10.0,level+1)
,CAST(Hierarchy + ':' + CAST(ROW_NUMBER() OVER (ORDER BY R1.SourceEntityId) as varchar(max)) as varchar(max)) as Hierarchy
from Relation R1
INNER JOIN TaskCTE
ON R1.SourceEntityId = TaskCTE.Task_Id
)
select ParentTask_Id, Task_Id, ParentTask, Level
, COALESCE(TT1.Title, TT2.Title) AS Title
, COALESCE(TT1.SequenceId, TT2.SequenceId) AS SequenceId
, x
,Hierarchy
from TaskCTE
LEFT OUTER JOIN TaskTable1 TT1
ON TaskCTE.Task_Id = TT1.Id
LEFT OUTER JOIN TaskTable2 TT2
ON TaskCTE.Task_Id = TT2.Id
order by Hierarchy
Please note that I have added Hierarchy column and its value is calculated using a ROW_NUMBER() function which creates a unique integer value for each task
You can find implemantation of this hierarchy query with SQL CTE at refereced tutorial
I hope it helps
I am also adding the output as screenshot here to show how data is sorted according to Hierarchy
Although childs are listed after parents, it does not one-to-one match with your desired outcome as I could see

Related

How to Use Exists in self join

I want those Id whose Orgorder never equal to 1.
CREATE TABLE [dbo].[TEST](
[ORGORDER] [int] NULL,
[Id] [int] NOT NULL,
[ORGTYPE] [varchar](30) NULL,
ORGID INT NULL,
[LEAD] [decimal](19, 2) NULL
) ON [PRIMARY]
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE] ,ORGID, [LEAD]) VALUES (1, 100, N'ABC',1, NULL)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (0, 100, N'ABC',2, 0)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (0, 100, N'ACD',1, NULL)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (0, 101, N'ABC',0, 0)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (2, 101, N'ABC',4, NULL)
GO
I am using exists but getting my result.
Expected result is -
ID
101
You can do this with one pass of the data, and order all ORGORDER = 1 first, then if it's the first row and it has the ORGORDER value you want to exclude, you can just ignore it.
;WITH x AS
(
SELECT Id, rn = ROW_NUMBER() OVER
(PARTITION BY Id ORDER BY CASE WHEN ORGORDER = 1 THEN 1 ELSE 2 END)
FROM dbo.TEST
)
SELECT Id FROM x WHERE rn = 1 AND ORGORDER <> 1;
Example db<>fiddle
Use a subquery in a NOT EXISTS clause, linking the subquery table to the outer query table by ID:
SELECT DISTINCT T1.ID
FROM dbo.TEST AS T1
WHERE NOT EXISTS (
SELECT *
FROM dbo.TEST AS T2
WHERE T1.ID = T2.ID
AND T2.ORGORDER = 1
)
db<>fiddle
An option would be using an aggregation with a suitable HAVING clause such as
SELECT [Id]
FROM [dbo].[TEST]
GROUP BY [Id]
HAVING SUM(CASE WHEN [ORGORDER] = 1 THEN 1 ELSE 0 END) = 0
where if there's at least one value equals to 1 for the concerned column([ORGORDER]), then that [Id] column won't be listed as result.
Demo

Write SQL to identify multiple subgroupings within a grouping

I have a program that summarizes non-normalized data in one table and moves it to another and we frequently get a duplicate key violation on the insert due to bad data. I want to create a report for the users to help them identify the cause of the error.
For example, consider the following contrived simple SQL which summarizes data in the table Companies and inserts it into CompanySum, which has a primary key of State/Zone. In order for the INSERT not to fail, there cannot be more than one distinct combinations of Company/Code for every unique primary key State/Zone combination. If there is, we want the insert to fail so that the data can be corrected.
INSERT INTO CompanySum
(
[State]
,[Zone]
,[Company]
,[Code]
,[Revenue]
)
SELECT
--Keys of target
[State]
,[Zone]
--We are expecting to have one distinct combination of these fields per key grouping
,[Company]
,[Code]
--Aggregate
,SUM([Revenue])
FROM COMPANIES
GROUP BY
[State]
,[Zone]
,[Company]
,[Code]
I would like to create a report to help the users easily identify and correct the data so that there is only one distinct Company/Code combination within a State/Zone. For each distinct State/Zone value, I would like to identify the distinct Company/Code combinations within the State/Zone. If there are more than one Company/Code combinations within a State/Zone, I would like all of the records in the State/Zone to be displayed in the output. For example, here is the sample input and desired output:
Data:
RecordNumber State Zone Company Code Revenue
------------ ----- ---- ------- ---- --------
1 CT B State of CT 65453 10
2 CT B State of CT 65453 3
3 CT B Travelers 33443 20
4 CT C Cigna 45678 24
5 CT C Cigna 45678 234
6 MI A GM 48089 100
7 MI A GM 54555 200
8 MI B Chrysler 43434 44
Desired Output:
RecordNumber State Zone Company Code Revenue
------------ ----- ---- ------- ---- --------
1 CT B State of CT 65453 10
2 CT B State of CT 65453 3
3 CT B Travelers 33443 20
6 MI A GM 48089 100
7 MI A GM 54555 200
Here is the DDL and DML needed to create this test scenario
CREATE TABLE [dbo].[Companies](
[RecordNumber] [int] NULL,
[State] [char](2) NOT NULL,
[Zone] [varchar](30) NOT NULL,
[Company] [varchar](30) NOT NULL,
[Code] [varchar](30) NOT NULL,
[Revenue] [numeric](9, 1) NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[CompanySum](
[State] [char](2) NOT NULL,
[Zone] [varchar](30) NOT NULL,
[Company] [varchar](30) NOT NULL,
[Code] [varchar](30) NOT NULL,
[Revenue] [numeric](9, 1) NULL,
CONSTRAINT [PK_CompanySum] PRIMARY KEY CLUSTERED
(
[State] ASC,
[Zone] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
DELETE FROM [dbo].[Companies]
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (1, N'CT', N'B', N'State of CT', N'65453', CAST(10.0 AS Numeric(9, 1)))
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (2, N'CT', N'B', N'State of CT', N'65453', CAST(3.0 AS Numeric(9, 1)))
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (3, N'CT', N'B', N'Travelers', N'33443', CAST(20.0 AS Numeric(9, 1)))
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (4, N'CT', N'C', N'Cigna', N'45678', CAST(24.0 AS Numeric(9, 1)))
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (5, N'CT', N'C', N'Cigna', N'45678', CAST(234.0 AS Numeric(9, 1)))
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (6, N'MI', N'A', N'GM', N'48089', CAST(100.0 AS Numeric(9, 1)))
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (7, N'MI', N'A', N'GM', N'54555', CAST(200.0 AS Numeric(9, 1)))
GO
INSERT [dbo].[Companies] ([RecordNumber], [State], [Zone], [Company], [Code], [Revenue]) VALUES (8, N'MI', N'B', N'Chrysler', N'43434', CAST(44.0 AS Numeric(9, 1)))
GO
This is a hopefully better re-construction of a previous post of mine SQL to return unique combinations of non key columns within a set of key columns where I am trying to help clarify the question and provide a simple working example that readers can use.
Please see this SQL Fiddle:
http://sqlfiddle.com/#!18/d0141/1
Is this a solution?
Fiddle: http://sqlfiddle.com/#!18/12e9a0/9
select c.*
from
Companies c
inner join (
select State, Zone
from Companies
group by State, Zone
having count(distinct Company + Code) > 1
) as dup_state_zone
on(
c.State = dup_state_zone.State
and c.Zone = dup_state_zone.Zone
)
Edited - Fix the having clause, with a little cheat...
I used windows ranking function to rank the records by state ordering by zone ascending, to get the desired output.
Suggestion: I would like to say that the insert statement of your CompanySum will ail due to your primary key constraint as you select duplicate key records. in this case you need to change your primary key constraint a little.
CONSTRAINT [PK_CompanySum] PRIMARY KEY CLUSTERED
(
[State] ASC,
[Zone] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
Since State and zone both are with duplicate values this insert will fail. better add a auto increment primary key, or include RecordNumber in to Primary key constraint rather than using State and Zone to make it usnique as there are duplicate values in your desired output.
SELECT
A.[RecordNumber]
,A.[State]
,A.[Zone]
,A.[Company]
,A.Code
,A.Revenue
FROM
(
SELECT *
,RANK() OVER (PARTITION BY [STATE] ORDER BY Zone) AS [row]
FROM Companies
) AS A
WHERE [row] =1
Highlighted are duplicates which will make your insert fail.

Trying to accomplish without dynamic SQL (sql server)

All,
I'm trying to pull off an insert from one table to another without using dynamic sql. However, the only solutions I'm coming up with at the moment use dynamic sql. It's been tricky to search for any similar scenarios.
Here are the details:
My starting point is the following legacy table:
CREATE TABLE [dbo].[_Combinations](
[AttributeID] [int] NULL,
[Value] [varchar](50) NULL
) ON [PRIMARY]
GO
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (16, N'1')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (16, N'2')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Red')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Orange')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Yellow')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Green')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Blue')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Indigo')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'Violet')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'A')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'B')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'C')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'D')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'E')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'F')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'G')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'H')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'I')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'J')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'K')
SELECT * FROM _Combinations
The _Combinations table contains a key for different types of attributes (AttributeID) and the possible values for each attribute (Value).
In this case, there are 3 different attributes with multiple possible values, however there can be many more (up to 10).
The requirement is then to create every possible combination of each value and store it normalized, as there will be other data stored with each possible combination. I need to store both the attribute keys and values that make up each combination, so it's not just a simple cross join to display each combination. The target table for storing each combination of attributes is this:
CREATE TABLE [dbo].[_CombinedAttributes](
[GroupKey] [int] NULL,
[AttributeID] [int] NULL,
[Value] [varchar](50) NULL
) ON [PRIMARY]
So attribute combination records using the above data would look like this in the target table:
GroupKey AttributeID Value
1 8 A
1 16 1
1 28 Red
2 8 B
2 16 1
2 28 Red
This gives me what I need. Each group has an identifier and I can track the attributeIDs and values that make up each group. I'm using two scripts to get from the _Combinations table to the format of the _CombinedAttributes table:
-- SCRIPT #1
SELECT Identity(int) AS RowNumber, * INTO #Test
FROM (
SELECT AttributeID AS Attribute1, Value AS Value1 FROM _Combinations WHERE AttributeID = 8) C1
CROSS JOIN
(
SELECT AttributeID AS Attribute2, Value AS Value2 FROM _Combinations WHERE AttributeID = 16) C2
CROSS JOIN
(
SELECT AttributeID AS Attribute3, Value AS Value3 FROM _Combinations WHERE AttributeID = 28) C3
-- SCRIPT #2
INSERT INTO _CombinedAttributes
SELECT RowNumber AS GroupKey, Attribute1, Value1
FROM #Test
UNION ALL
SELECT RowNumber, Attribute2, Value2
FROM #Test
UNION ALL
SELECT RowNumber, Attribute3, Value3
FROM #Test
ORDER BY RowNumber, Attribute1
The above two scripts work, but obviously there's some drawbacks. Namely I need to know how many attributes I'm dealing with and there's hard coding of IDs, so I can't generate this on the fly. The solution I came up with is I build the strings for Script 1 and Script 2 by looping through the attributes in the the _Combinations table and generate execution strings which is long and messy but I can post if needed. Can anyone see a way to pull off the format for the final insert without dynamic sql?
This routine wouldn't be run very much, but it's going to be run enough that I'd like to not be doing any execute string building and use straight SQL.
Thanks in advance.
UPDATE:
When I use a second dataset, Gordon's code is no longer returning correct results, it's creating groups with only 1 attribute near the end, however on this second dataset I get the correct rowcount with Nathan's routine (row count on final result should be 396). But as I stated on the comments, if I use the first dataset, I get the opposite result, Gordon's returns correctly, but Nathan's code has dups. I'm at a loss. Here is the second data set:
DROP TABLE [dbo].[_Combinations]
GO
CREATE TABLE [dbo].[_Combinations](
[AttributeID] [int] NULL,
[Value] varchar NULL
) ON [PRIMARY]
GO
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (16, N'1')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (16, N'2')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'<=39')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'40-44')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'45-49')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'50-54')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'55-64')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (28, N'65+')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'AA')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'JJ')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'CC')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'DD')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'EE')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'KK')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'BB')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'FF')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'GG')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'HH')
INSERT [dbo].[_Combinations] ([AttributeID], [Value]) VALUES (8, N'II')
I think this solves your problem.
Here is the approach. First, observe that the final data has the product of the number of each attribute -- 2*7*11 = 154 rows. Then observe that each value occurs a fixed number of times. For AttributeId = 16, each value occurs 154 / 2, because there are two values.
So, the idea is to calculate the number of times that each value appears. Then, generate the list of all the values. The final challenge is to assign the group numbers to these. For this, I use row_number() partitioned by the attribute id. To be honest, I'm not 100% that the grouping assignment is correct (it makes sense and it passed the eyeball test), but I'm worried that I'm missing a subtlety.
Here is the query:
with attributecount1 as (
select c.AttributeId, count(*) as cnt
from _Combinations c
group by c.AttributeId
),
const as (
select exp(sum(log(cnt))) as tot, count(*) as numattr
from attributecount1
),
attributecount as (
select a.*,
(tot / a.cnt) as numtimes
from attributecount1 a cross join const
),
thevalues as (
select c.AttributeId, c.Value, ac.numtimes, 1 as seqnum
from AttributeCount ac join
_Combinations c
on ac.AttributeId = c.AttributeId
union all
select v.AttributeId, v.Value, v.numtimes, v.seqnum + 1
from thevalues v
where v.seqnum + 1 <= v.numtimes
)
select row_number() over (partition by AttributeId order by seqnum, Value) as groupnum,
*
from thevalues
order by 1, 2
The SQL Fiddle is here.
EDIT:
Unfortunately, I don't have access to SQL Server today and SQL Fiddle is acting up.
The problem is solvable. The above solution works, but -- as stated in my comment -- only when the dimensions are pairwise mutually prime. The problem is the assignment of the group number to the values. It turns out that this is a problem in number theory.
Essentially, we want to enumerate the combinations. If there were 2 in two groups, then it would be:
group 0: 1 1
group 1: 1 2
group 2: 2 1
group 3: 2 2
You can see a relationship between the group number and which values are assigned -- based on the binary representation of the group number. If this were 2x3, then it would look like:
group 0: 1 1
group 1: 1 2
group 2: 1 3
group 3: 2 1
group 4: 2 2
group 5: 2 3
Same idea, but now there is not "binary" representation. Each position in the number would have a different base. No problem.
So, the challenge is mapping a number (such as the group number) to each digit. This requires appropriate division and modulo arithmetic.
The following implements this in Postgres:
with c as (
select 1 as attrid, '1' as val union all
select 1 as attrid, '2' as val union all
select 2 as attrid, 'A' as val union all
select 2 as attrid, 'B' as val union all
select 3 as attrid, '10' as val union all
select 3 as attrid, '20' as val
),
c1 as (
select c.*, dense_rank() over (order by attrid) as attrnum,
dense_rank() over (partition by attrid order by val) as valnum,
count(*) over (partition by attrid) as cnt
from c
),
a1 as (
select attrid, count(*) as cnt,
cast(round(exp(sum(ln(count(*))) over (order by attrid rows between unbounded preceding and current row))) as int)/count(*) as cum
from c
group by attrid
),
a2 as (
select a.*,
(select cast(round(exp(sum(ln(cnt)))) as int)
from a1
where a1.attrid <= a.attrid
) / cnt as cum
from a1 a
),
const as (
select cast(round(exp(sum(ln(cnt)))) as int) as numrows
from a1
),
nums as (
select 1 as n union all select 2 union all select 3 union all select 4 union all
select 5 union all select 6 union all select 7 union all select 8
from const
),
ac as (
select c1.*, a1.cum, const.numrows
from c1 join
a1 on c1.attrid = a1.attrid cross join
const
)
select *
from nums join
ac
on (nums.n/cum) % cnt = valnum - 1
order by 1, 2;
(Note: generate_series() was not working correctly for some reason with certain joins, which is why it manually generates the sequence of numbers.)
When SQL Fiddle gets working again, I should be able to translate this back to SQL Server.
EDIT II:
Here is the version that works in SQL Server:
with attributecount1 as (
select c.AttributeId, count(*) as cnt
from _Combinations c
group by c.AttributeId
),
const as (
select cast(round(exp(sum(log(cnt))), 1) as int) as tot, count(*) as numattr
from attributecount1
),
attributecount as (
select a.*,
(tot / a.cnt) as numtimes,
(select cast(round(exp(sum(log(ac1.cnt))), 1) as int)
from attributecount1 ac1
where ac1.AttributeId <= a.AttributeId
) / a.cnt as cum
from attributecount1 a cross join const
),
c as (
select c.*, ac.numtimes, ac.cum, ac.cnt,
dense_rank() over (order by c.AttributeId) as attrnum,
dense_rank() over (partition by c.AttributeId order by Value) as valnum
from _Combinations c join
AttributeCount ac
on ac.AttributeId = c.AttributeId
),
nums as (
select 1 as n union all
select 1 + n
from nums cross join const
where 1 + n <= const.tot
)
select *
from nums join
c
on (nums.n / c.cum)%c.cnt = c.valnum - 1
option (MAXRECURSION 1000)
THe SQL Fiddle is here.
Years ago I faced a similar problem with a fixed EAV schema not unlike yours. Peter Larsson came up with the below solution to address my "dynamic combinations" query.
I've adapted it to fit your schema. Hope this helps!
SqlFiddle Here
;with cteSource (Iteration, AttributeID, recID, Items, Unq, Perm) as
(
select v.Number + 1,
s.AttributeId,
row_number() over (order by v.Number, s.AttributeID) - 1,
s.Items,
u.Unq,
f.Perm
from (select AttributeID, count(*) from _Combinations group by AttributeID) s(AttributeId, Items)
cross
join (select count(distinct AttributeID) from _Combinations) u (Unq)
join master..spt_values as v on v.Type = 'P'
outer
apply (
select top(1) cast(exp(sum(log(count(*))) over ()) as bigint)
from _Combinations as w
where w.AttributeID >= s.AttributeID
group
by w.AttributeID
having count(*) > 1
) as f(Perm)
where v.Number < (select top(1) exp(sum(log(count(*))) over()) from _Combinations as x group by x.AttributeID)
)
select s.Iteration,
s.AttributeID,
w.Value
from cteSource as s
cross
apply (
select Value,
row_number() over (order by Value) - 1
from _Combinations
where AttributeID = s.AttributeID
) w(Value, recID)
where coalesce(s.recID / (s.Perm * s.Unq / s.Items), 0) % s.Items = w.recID
order
by s.Iteration, s.AttributeId;
I've decided to post this, just for the sake of a procedural solution appearing in parallel with the CTE-based ones.
The following produces a zero-based GroupKey column. If you want it to start from 1, simply change #i to #i+1 in the last insert ... select.
-- Add a zero-based row number, partitioned by AttributeId
declare #Attrs table (AttributeId int,Value varchar(50),RowNum int)
insert into #Attrs
select
AttributeId,Value,
ROW_NUMBER()over(partition by AttributeId order by AttributeId,Value)-1
from _Combinations
-- AttributeId value counts
declare #AttCount table (AttributeId int,n int)
insert into #AttCount
select AttributeId,COUNT(*) n from #Attrs
group by AttributeID
-- Total number of combos -- Multiply all AttributeId counts
-- EXP(SUM(LOG(n))) didnt work as expected
-- so fall back to good old cursors...
declare #ncombos int,#num int
declare mulc cursor for select n from #AttCount
open mulc
set #ncombos=1
fetch next from mulc into #num
while ##FETCH_STATUS=0
begin
set #ncombos=#ncombos*#num
fetch next from mulc into #num
end
close mulc
deallocate mulc
-- Now let's get our hands dirty...
declare #i int,#m int,#atid int,#n int,#r int
declare c cursor for select AttributeId,n from #AttCount
open c
fetch next from c into #atid,#n
set #m=1
while ##FETCH_STATUS=0
begin
set #i=0
while #i<#ncombos
begin
set #r=(#i/#m)%#n
insert into _CombinedAttributes (GroupKey,AttributeId,Value)
select #i,#atid,value from #Attrs where AttributeId=#atid and RowNum=#r
set #i=#i+1
end
set #m=#m*#n
fetch next from c into #atid,#n
end
close c
deallocate c
Hint: Here's why I didn't use exp(sum(log())) to emulate a mul() aggregate.
Recursive Solution
The following is a recursive solution, SQLFiddle is here:
with a as ( -- unique AttributeIDs
select AttributeID
,Row_Number() over(order by AttributeID) as rowNo
,count(*) as cnt
from [dbo].[_Combinations]
group by AttributeID
),
r as (
-- start recursion: list all values of the first attribute
select Dense_Rank() over(order by c.[Value]) - 1 as GroupKey
,c.AttributeID
,c.[Value]
,a.cnt as factor
,1 as level
from a
join [dbo].[_Combinations] as c on a.AttributeID = c.AttributeID
where a.rowNo = 1
union all
-- recursion step: add the combinations with the values of the next attribute
select GroupKey
,case when AttributeID = 'prev' then prevAttribID else currAttribID end as AttributeID
,[Value]
,factor
,level
from (select r.Value as prev
,c.Value as curr
,(Dense_Rank() over(order by c.[Value]) - 1) * r.factor + r.GroupKey as GroupKey
,r.level + 1 as level
,r.factor * a.cnt as factor
,r.AttributeID as prevAttribID
,a.AttributeID as currAttribID
from r
join a on r.level + 1 = a.rowNo
join [dbo].[_Combinations] as c on a.AttributeID = c.AttributeID
) as p
unpivot ( Value for AttributeID in (prev, curr)) as up
)
-- get result: this is the data from the deepest level
select distinct
GroupKey + 1 as GroupKey -- start with one instead of zero
,AttributeID
,[Value]
from r
where level = (select count(*) from a)
order by GroupKey, AttributeID, [Value]
Dynamic Solution
And this is a slightly shorter version using a dynamic statement:
declare #stmt varchar(max);
with a as ( -- unique attribute keys, cast here to avoid casting when building the dynamic statement
select distinct cast(AttributeID as varchar(10)) as ID
from [dbo].[_Combinations]
)
select #stmt = 'select GroupKey, Cast(SubString(AttributeIDStr, 2, 100) as int) as AttributeID, Value
from
(
select '
+ (select ' C' + ID + '.Value as V' + ID + ', ' from a for xml path(''))
+ ' Row_Number() over(order by '
+ stuff((select ', C' + ID + '.Value' from a for xml path('')), 1, 2, '')
+ ') AS GroupKey from '
+ stuff((select ' cross join [dbo].[_Combinations] as C' + ID from a for xml path('')), 1, 11, '')
+ ' where '
+ stuff((select ' and C' + ID + '.AttributeID = ' + ID from a for xml path('')), 1, 4, '')
+ ') as p unpivot (Value for AttributeIDStr in ('
+ stuff((select ', V' + ID from a for xml path('')), 1, 2, '')
+ ')) as up'
;
exec (#stmt)
As SQL Server does not have the nice list aggregate function that other databases have, one must use the ugly stuff((select ... for xml path(''))) expression.
The statement produced for the sample data is - apart from whitespace differences - the following:
select GroupKey, Cast(SubString(AttributeIDStr, 2, 100) as int) as AttributeID, Value
from
(
select C16.Value as V16
,C28.Value as V28
,C8.Value as V8
,Row_Number() over(order by C16.Value, C28.Value, C8.Value) AS GroupKey
from [dbo].[_Combinations] as C16
cross join
[dbo].[_Combinations] as C28
cross join
[dbo].[_Combinations] as C8
where C16.AttributeID = 16
and C28.AttributeID = 28
and C8.AttributeID = 8
) as p
unpivot ( Value for AttributeIDStr in (V16, V28, V8)) as up
Both solutions avoid the multiplication aggregation workaround using exp(log()) that is used in some other answers, which is very sensitive to rounding errors.
Regarding the issue with exp(sum(log(count(*))) over ()), the answer for me seemed to be to introduce the ROUND function to the mix. Thus, the following snippet seems to produce a reliable answer (so far at least):
ROUND(exp(sum(log(count(*))) over ()), 0)

How to get result from parent child table

Work on SQL-Server. My table structure is below
CREATE TABLE [dbo].[AgentInfo](
[AgentID] [int] NOT NULL,
[ParentID] [int] NULL,
CONSTRAINT [PK_AgentInfo] PRIMARY KEY CLUSTERED
(
[AgentID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT [dbo].[AgentInfo] ([AgentID], [ParentID]) VALUES (1, -1)
INSERT [dbo].[AgentInfo] ([AgentID], [ParentID]) VALUES (2, -1)
INSERT [dbo].[AgentInfo] ([AgentID], [ParentID]) VALUES (3, 1)
INSERT [dbo].[AgentInfo] ([AgentID], [ParentID]) VALUES (4, 2)
Required output
Use my below syntax get required output but not satisfied. Is there any better way to get the required output
--get parent child list
---step--1
SELECT *
INTO #temp1
FROM ( SELECT a.AgentID ,
a.ParentID,
a.AgentID AS BaseAgent
FROM dbo.AgentInfo a WHERE ParentID=-1
UNION ALL
SELECT a.ParentID ,
0 as AgentID,
a.AgentID AS BaseAgent
FROM dbo.AgentInfo a WHERE ParentID!=-1
UNION ALL
SELECT a.AgentID ,
a.ParentID,
a.AgentID AS BaseAgent
FROM dbo.AgentInfo a
WHERE ParentID!=-1
) AS d
SELECT * FROM #temp1
DROP TABLE #temp1
Help me to improve my syntax. If you have any questions please ask.
You could use a recursive SELECT, see the examples in the documentation for WITH, starting with example D.
The general idea within the recursive WITH is: You have a first select that is the starting point, and then a UNION ALL and a second SELECT which describes the step from on level to the next, where the previous level can either be the result of the first select or the result of the previous run of the second SELECT.
You can try this, to get a tree of the elements:
WITH CTE_AgentInfo(AgentID, ParentID, BaseAgent)
AS(
SELECT
AgentID,
ParentID,
AgentID AS BaseAgent
FROM AgentInfo
WHERE ParentID = -1
UNION ALL
SELECT
a.AgentID,
a.ParentID,
a.AgentID AS BaseAgent
FROM AgentInfo a
INNER JOIN CTE_AgentInfo c ON
c.AgentID = a.ParentID
)
SELECT * FROM CTE_AgentInfo
And here is an SQLFiddle demo to see it.
Try something like this:
WITH Merged (AgentId, ParentId) AS (
SELECT AgentId, ParentId FROM AgentInfo WHERE ParentId = -1
UNION ALL
SELECT AgentInfo.AgentId, AgentInfo.ParentId FROM AgentInfo INNER JOIN Merged ON AgentInfo.AgentId = Merged.ParentId
)
SELECT * FROM Merged
You can use a Common Table Expression to do this.
The sql statement will then look like this:
WITH [Parents]([AgentID], [ParentID], [BaseAgent])
AS
(
SELECT
[AgentID],
[ParentID],
[AgentID] AS [BaseAgent]
FROM [AgentInfo]
WHERE [ParentID] = -1
UNION ALL
SELECT
[ai].[AgentID],
[ai].[ParentID],
[p].[BaseAgent]
FROM [AgentInfo] [ai]
INNER JOIN [Parents] [p]
ON [ai].[ParentID] = [p].[AgentID]
)
SELECT *
FROM [Parents]
ORDER BY
[BaseAgent] ASC,
[AgentID] ASC
But, the results are different from your desired output, since every Agent is only listed once.
The output is:
AGENTID PARENTID BASEAGENT
1 -1 1
3 1 1
2 -1 2
4 2 2
The Fiddle is over here.
And here is a nice post on working with hierarchies: What are the options for storing hierarchical data in a relational database?

Counting ordered data

I have the following problem to solve and I can't seem to be able to come up with an algorithm yet, nevermind an actual solution.
I have a table of similar structure/data as the following, where IDs are not always in sequence for the same Ticker/QuouteType:
ID Ticker PriceDateTime QuoteType OpenPrice HighPrice LowPrice ClosePrice
------- ------ ---------------- --------- --------- --------- -------- ----------
2036430 ^COMP 2012-02-10 20:50 95/Minute 2901.57 2905.04 2895.37 2901.71
2036429 ^COMP 2012-02-10 19:15 95/Minute 2909.63 2910.98 2899.95 2901.67
2036428 ^COMP 2012-02-10 17:40 95/Minute 2905.9 2910.27 2904.29 2909.64
2036427 ^COMP 2012-02-10 16:05 95/Minute 2902 2908.29 2895.1 2905.89
2036426 ^COMP 2012-02-09 21:00 95/Minute 2926.12 2928.01 2925.53 2927.21
The information I need to extract from this data is the following:
How many consecutive rows are there? Counting downwards from the most recent (as recorded in PriceDateTime), looking at ClosePrice?
IE: For the current example the answer should be 2. ClosePrice (row 1) = 2901.71 which is greater than ClosePrice (row 2) = 2901.67 but lower than ClosePrice (row 3) = 2909.64. As such, looking back from the most recent price, we have 2 rows that "go in the same direction".
Of course I have to do this across a lot of other names, so speed is quite important.
PS: Thank you all for your help, I've drawn inspiration from all your answers when building the final procedure. You're all very kind!
Try this: (I have simplified the test data I'm using as it only requires 2 columns to demonstrate the logic).
CREATE TABLE #Test (PriceDateTime DATETIME, ClosePrice DECIMAL(6, 2))
INSERT #Test VALUES
('20120210 20:50:00.000', 2901.71),
('20120210 19:15:00.000', 2901.67),
('20120210 17:40:00.000', 2900.64),
('20120210 16:05:00.000', 2905.89),
('20120209 21:00:00.000', 2927.21)
-- FIRST CTE, JUST DEFINES A VIEW GIVING EACH ENTRY A ROW NUMBER
;WITH CTE AS
( SELECT *,
ROW_NUMBER() OVER(ORDER BY PriceDateTime DESC) [RowNumber]
FROM #Test
),
-- SECOND CTE, ASSIGNES EACH ENTRY +1 OR -1 DEPENDING ON HOW THE VALUE HAS CHANGED COMPARED TO THE PREVIOUS RECORD
CTE2 AS
( SELECT a.*, SIGN(a.ClosePrice - b.ClosePrice) [Movement]
FROM CTE a
LEFT JOIN CTE b
ON a.RowNumber = b.RowNumber - 1
),
-- THIRD CTE, WILL LOOP THROUGH THE DATA AS MANY TIMES AS POSSIBLE WHILE THE PREVIOUS ENTRY HAS THE SAME "MOVEMENT"
CTE3 AS
( SELECT *, 1 [Recursion]
FROM CTE2
UNION ALL
SELECT a.PriceDateTime, a.ClosePrice, a.RowNumber, a.Movement, b.Recursion + 1
FROM CTE2 a
INNER JOIN CTE3 b
ON a.RowNumber = b.RowNumber - 1
AND a.Movement = b.Movement
)
SELECT MAX(Recursion) + 1 -- ADD 1 TO THE RECORD BECAUSE THERE WILL ALWAYS BE AT LEAST TWO ROWS
FROM CTE3
WHERE RowNumber = 1 -- LATEST ENTRY
DROP TABLE #Test
I've tried to comment the answer to explain as I go. If anything is not clear from the comments let me know and I will try and explain further
Solution below should be efficient enough, but it will fail if there are gaps in ID sequence.
Please update your topic, if it is the point.
DECLARE #t TABLE (
ID INT,
ClosePrice DECIMAL(10, 5)
)
INSERT #t (ID, ClosePrice)
VALUES (2036430, 2901.71), (2036429, 2901.67), (2036428, 2909.64), (2036427, 2905.89), (2036426, 2927.21)
;WITH CTE AS (
SELECT TOP 1 ID, ClosePrice, 1 AS lvl
FROM #t
ORDER BY ID DESC
UNION ALL
SELECT s.ID, s.ClosePrice, CTE.lvl + 1
FROM #t AS s
INNER JOIN CTE
ON s.ID = CTE.ID - 1 AND s.ClosePrice < CTE.ClosePrice
)
SELECT MAX(lvl) AS answer
FROM CTE
I'd join your data on itself (with +1 on your primary key / ordering key) then use a simple CASE to track the change (assuming i've understood your question properly).
For example:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[tbl_NumericSequence](
[ID] [int] NULL,
[Value] [int] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (1, 1)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (2, 2)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (3, 3)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (4, 2)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (5, 1)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (6, 3)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (7, 3)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (8, 8)
GO
INSERT [dbo].[tbl_NumericSequence] ([ID], [Value]) VALUES (9, 1)
GO
WITH RawData ( [ID], [Value] )
AS ( SELECT [ID] ,
[Value]
FROM [Test].[dbo].[tbl_NumericSequence]
)
SELECT RawData.ID ,
RawData.Value ,
CASE WHEN RawDataLag.Value = RawData.Value THEN 'No Change'
WHEN RawDataLag.Value > RawData.Value THEN 'Down'
WHEN RawDataLag.Value < RawData.Value THEN 'Up'
END AS Change
FROM RawData
LEFT OUTER JOIN RawData RawDataLag ON RawData.ID = RawDataLag.iD + 1
ORDER BY RawData.ID ASC
I would approach it with recursive common table expressions:
CREATE TABLE #MyTable (ID INT, ClosePrice MONEY)
INSERT INTO #MyTable ( ID, ClosePrice )
VALUES (2036430,2901.71),
(2036429,2901.67),
(2036428,2909.64),
(2036427,2905.89),
(2036426,2927.21)
WITH CTE AS (
SELECT TOP 1 id, closeprice, 1 Consecutive
FROM #MyTable
ORDER BY id DESC
UNION ALL
SELECT A.id, A.closeprice, CASE WHEN A.ClosePrice < B.ClosePrice THEN Consecutive+1 ELSE 1 END
FROM #MyTable A INNER JOIN cte B ON A.ID=B.id -1
)
SELECT * FROM cte
--OR to just get the max consecutive
--select max(Consecutive) from cte
DROP TABLE #MyTable