Merging groups of interval data - SQL Server - sql

I have two sets of interval data I.E.
Start End Type1 Type2
0 2 L NULL
2 5 L NULL
5 7 L NULL
7 10 L NULL
2 3 NULL S
3 5 NULL S
5 8 NULL S
11 12 NULL S
What I'd like to do is merge these sets into one. This seems possible by utilising an islands and gaps solution but due to the non-continuous nature of the intervals I'm not sure how to go about applying it... The output I'm expecting would be:
Start End Type1 Type2
0 2 L NULL
2 3 L S
3 5 L S
5 7 L S
7 8 L S
8 10 L NULL
11 12 NULL S
Anyone out there done something like this before??? Thanks!
Create script below:
CREATE TABLE Table1
([Start] int, [End] int, [Type1] varchar(4), [Type2] varchar(4))
;
INSERT INTO Table1
([Start], [End], [Type1], [Type2])
VALUES
(0, 2, 'L', NULL),
(2, 3, NULL, 'S'),
(2, 5, 'L', NULL),
(3, 5, NULL, 'S'),
(5, 7, 'L', NULL),
(5, 8, NULL, 'S'),
(7, 10, 'L', NULL),
(11, 12, NULL, 'S')
;

I assume that Start is inclusive, End is exclusive and given intervals do not overlap.
CTE_Number is a table of numbers. Here it is generated on the fly. I have it as a permanent table in my database.
CTE_T1 and CTE_T2 expand each interval into the corresponding number of rows using a table of numbers. For example, interval [2,5) generates rows with Values
2
3
4
This is done twice: for Type1 and Type2.
Results for Type1 and Type2 are FULL JOINed together on Value.
Finally, a gaps-and-islands pass groups/collapses intervals back.
Run the query step-by-step, CTE-by-CTE and examine intermediate results to understand how it works.
Sample data
I added few rows to illustrate a case when there is a gap between values.
DECLARE #Table1 TABLE
([Start] int, [End] int, [Type1] varchar(4), [Type2] varchar(4))
;
INSERT INTO #Table1 ([Start], [End], [Type1], [Type2]) VALUES
( 0, 2, 'L', NULL),
( 2, 3, NULL, 'S'),
( 2, 5, 'L', NULL),
( 3, 5, NULL, 'S'),
( 5, 7, 'L', NULL),
( 5, 8, NULL, 'S'),
( 7, 10, 'L', NULL),
(11, 12, NULL, 'S'),
(15, 20, 'L', NULL),
(15, 20, NULL, 'S');
Query
WITH
e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
) -- 10
,e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b) -- 10*10
,e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2) -- 10*100
,CTE_Numbers
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY n) AS Number
FROM e3
)
,CTE_T1
AS
(
SELECT
T1.[Start] + CA.Number - 1 AS Value
,T1.Type1
FROM
#Table1 AS T1
CROSS APPLY
(
SELECT TOP(T1.[End] - T1.[Start]) CTE_Numbers.Number
FROM CTE_Numbers
ORDER BY CTE_Numbers.Number
) AS CA
WHERE
T1.Type1 IS NOT NULL
)
,CTE_T2
AS
(
SELECT
T2.[Start] + CA.Number - 1 AS Value
,T2.Type2
FROM
#Table1 AS T2
CROSS APPLY
(
SELECT TOP(T2.[End] - T2.[Start]) CTE_Numbers.Number
FROM CTE_Numbers
ORDER BY CTE_Numbers.Number
) AS CA
WHERE
T2.Type2 IS NOT NULL
)
,CTE_Values
AS
(
SELECT
ISNULL(CTE_T1.Value, CTE_T2.Value) AS Value
,CTE_T1.Type1
,CTE_T2.Type2
,ROW_NUMBER() OVER (ORDER BY ISNULL(CTE_T1.Value, CTE_T2.Value)) AS rn
FROM
CTE_T1
FULL JOIN CTE_T2 ON CTE_T2.Value = CTE_T1.Value
)
,CTE_Groups
AS
(
SELECT
Value
,Type1
,Type2
,rn
,ROW_NUMBER() OVER
(PARTITION BY rn - Value, Type1, Type2 ORDER BY Value) AS rn2
FROM CTE_Values
)
SELECT
MIN(Value) AS [Start]
,MAX(Value) + 1 AS [End]
,Type1
,Type2
FROM CTE_Groups
GROUP BY rn-rn2, Type1, Type2
ORDER BY [Start];
Result
+-------+-----+-------+-------+
| Start | End | Type1 | Type2 |
+-------+-----+-------+-------+
| 0 | 2 | L | NULL |
| 2 | 8 | L | S |
| 8 | 10 | L | NULL |
| 11 | 12 | NULL | S |
| 15 | 20 | L | S |
+-------+-----+-------+-------+

A step-by-step way is:
-- Finding all break points
;WITH breaks AS (
SELECT Start
FROM yourTable
UNION
SELECT [End]
FROM yourTable
) -- Finding Possible Ends
, ends AS (
SELECT Start
, (SELECT Min([End]) FROM yourTable WHERE yourTable.Start = breaks.Start) End1
, (SELECT Max([End]) FROM yourTable WHERE yourTable.Start < breaks.Start) End2
FROM breaks
) -- Finding periods
, periods AS (
SELECT Start,
CASE
WHEN End1 > End2 And End2 > Start THEN End2
WHEN End1 IS NULL THEN End2
ELSE End1
END [End]
FROM Ends
WHERE NOT(End1 IS NULL AND Start = End2)
) -- Generating results
SELECT p.Start, p.[End], Max(Type1) Type1, Max(Type2) Type2
FROM periods p, yourTable t
WHERE p.start >= t.Start AND p.[End] <= t.[End]
GROUP BY p.Start, p.[End];
In above query some situations may not fit at analyzing all of them, you can improve it as you want ;).

First getting all the numbers of start and end via a Union.
Then joining those numbers on both the 'L' and 'S' records.
Uses a table variable for the test.
DECLARE #Table1 TABLE (Start int, [End] int, Type1 varchar(4), Type2 varchar(4));
INSERT INTO #Table1 (Start, [End], Type1, Type2)
VALUES (0, 2, 'L', NULL),(2, 3, NULL, 'S'),(2, 5, 'L', NULL),(3, 5, NULL, 'S'),
(5, 7, 'L', NULL),(5, 8, NULL, 'S'),(7, 10, 'L', NULL),(11, 12, NULL, 'S');
select
n.Num as Start,
(case when s.[End] is null or l.[End] <= s.[End] then l.[End] else s.[End] end) as [End],
l.Type1,
s.Type2
from
(select Start as Num from #Table1 union select [End] from #Table1) n
left join #Table1 l on (n.Num >= l.Start and n.Num < l.[End] and l.Type1 = 'L')
left join #Table1 s on (n.Num >= s.Start and n.Num < s.[End] and s.Type2 = 'S')
where (l.Start is not null or s.Start is not null)
order by Start, [End];
Output:
Start End Type1 Type2
0 2 L NULL
2 3 L S
3 5 L S
5 7 L S
7 8 L S
8 10 L NULL
11 12 NULL S

Related

change data format from rows to columns

I have a SQL table in the below format. Every ID has 2 entries in the table for a particular date.
Input:
ID date rownum subid value1 value2
A 200911 1 X 10 20
A 200911 2 Y 15 25
B 201001 2 S 60 35
B 201001 1 R 40 50
I want to write a SQL query to change this to the below format, so that every ID/date combination has only 1 entry as shown below. The rownum is already included and the values should be represented so that rownnum 1 is displayed first and then the value with rownum second as shown below.
Output:
ID date row1subid row1value1 row1value2 row2subid row2value1 row2value2
A 200911 X 10 20 Y 15 25
B 201001 R 40 50 S 60 35
Let me know if something is not clear.
Thanks for all your help!
Here is what you need to do as a SQL Fiddle.
And for reference:
CREATE TABLE TestData
([ID] varchar(1),
[date] int,
[rownum] int,
[subid] varchar(1),
[value1] int,
[value2] int)
;
INSERT INTO TestData
([ID], [date], [rownum], [subid], [value1], [value2])
VALUES
('A', 200911, 1, 'X', 10, 20),
('A', 200911, 2, 'Y', 15, 25),
('B', 201001, 2, 'S', 60, 35),
('B', 201001, 1, 'R', 40, 50)
;
SELECT A.ID, A.date, A.rownum, A.subid, A.value1, A.value2, B.value1 AS r2value1, B.value2 AS r2value2
FROM TestData AS A
INNER JOIN TestData B ON A.id = B.id AND B.rownum = 2
WHERE A.rownum = 1

Efficient way to sum overlapping date intervals with several groupings in PostgreSQL version 8.4

Hello I'm new to stackoverflow and relatively new to psql so please be lenient if I do things wrong.
I have a large data set showing animal movements that looks a little like this:
animalid | movementdate | offmovementdate | location | rsk
==========================================================
1 | 1998-01-01 | 1998-04-01 | 3 | Y
1 | 1998-04-01 | 1999-04-01 | 1 | Y
1 | 1999-04-01 | 1999-07-01 | 2 | N
2 | 1998-05-01 | 1999-04-01 | 3 | Y
3 | 1998-02-01 | 1999-01-01 | 2 | N
3 | 1999-01-01 | 1999-06-01 | 1 | Y
4 | 1997-12-01 | 1998-05-01 | 1 | Y
4 | 1998-05-01 | 1999-04-01 | 2 | N
I want to sum all the contact days of an animal (i.e. in a location shared by another animal) stratified by risk. The intervals are movementdate-offmovementdate. The variable lcd should sum the number of days spent on the same location with another individual. So if I spent 3 days in the same place as 2 people and 2 days in the same place as 1 person my lcd is 3+3+2=8.
So my output should look something like this:
animalid | rsk | lcd
=======================
1 | Y | 120
1 | N | 0
2 | Y | 0
3 | Y | 90
3 | N | 245
4 | Y | 30
4 | N | 245
Thus the value 120 in the first row is obtained by adding the overlapping intervals for locations 3 (0 days) and 1 (1999-01-01 to 1999-04-01 + 1998-04-01 to 1998-05-01).
I have tried the following queries:
CREATE TABLE tmpcpy AS
SELECT ta.animalid,ta.location,ta.rsk,
SUM(AGE(LEAST(ta.offmovementdate,tb.offmovementdate),
GREATEST(ta.movementdate,tb.movementdate))) ctc_ds
FROM tmpd ta, tmpd tb
WHERE ta.location=tb.location
AND ta.animalid IS DISTINCT FROM tb.animalid
AND LEAST(ta.offmovementdate,tb.offmovementdate) >
GREATEST(ta.movementdate,tb.movementdate)
GROUP BY ta.animalid, ta.rsk, ta.location;
CREATE TABLE lcd_out AS
SELECT animalid, rsk, SUM(ctc_ds) lcd
FROM tmpcpy
GROUP BY animalid, rsk;
But I get the following error message.
ERROR: could not write block 24905954 of temporary file: No space left on device
Is there a more efficient way of getting the desired output?
The output of explain for the first query with my real dataset is the following:
GroupAggregate (cost=677015920636.46..691909507980.53 rows=3804913 width=42)
-> Sort (cost=677015920636.46..679994626690.54 rows=1191482421630 width=42)
Sort Key: ta.animalid, ta.rsk, ta.location
-> Merge Join (cost=18773271.33..71508531671.51 rows=1191482421630 width=42)
Merge Cond: (ta.location = tb.location)
Join Filter: ((ta.animalid IS DISTINCT FROM tb.animalid) AND (LEAST(ta.offmovementdate, tb.offmovementdate) > GREATEST(ta.movementdate, tb.movementdate)))
-> Sort (cost=9646734.67..9741857.48 rows=38049124 width=26)
Sort Key: ta.location
-> Seq Scan on moves ta (cost=0.00..1214663.24 rows=38049124 width=26)
-> Materialize (cost=9126536.67..9602150.72 rows=38049124 width=24)
-> Sort (cost=9126536.67..9221659.48 rows=38049124 width=24)
Sort Key: tb.location
-> Seq Scan on moves tb (cost=0.00..1214663.24 rows=38049124 width=24)
Without knowing how to treat those records with OFFMOVEMENTDATE and LOCATION null I can give you this query(it should be a little more efficient because it doesn't perform an expensive self join) that simply ignore those rows:
with act_data (animalid, movementdate, offmovementdate, move, location, death, rsk) as (
values(1, date'1998-01-01', date'1998-04-01', 1, 3, 'f', 'Y')
union all
values(1, date'1998-04-01', date'1999-04-01', 2, 1, 'f', 'Y')
union all
values(1, date'1999-04-01', date'1999-07-01', 3, 2, 'f', 'N')
union all
values(1, date'1999-07-01', cast(null as date), 4, cast(null as integer), 't', 'N')
union all
values(2, date'1998-05-01', date'1999-04-01', 1, 3, 'f', 'Y')
union all
values(2, date'1999-04-01', cast(null as date), 2, cast(null as integer), 't', 'N')
union all
values(3, date'1998-02-01', date'1999-01-01', 1, 2, 'f', 'N')
union all
values(3, date'1999-01-01', date'1999-06-01', 2, 1, 'f', 'Y')
union all
values(3, date'1999-06-01', cast(null as date), 3, cast(null as integer), 't', 'N')
union all
values(4, date'1997-12-01', date'1998-05-01', 1, 1, 'f', 'Y')
union all
values(4, date'1998-05-01', date'1999-04-01', 2, 2, 'f', 'N')
union all
values(4, date'1999-04-01', cast(null as date), 3, cast(null as integer), 't', 'N')
), my_data as (
select row_number() over() as id,t.*
from act_data t
), dates as (
select movementdate as day
from my_data
union
select offmovementdate
from my_data
), my_intevals as (
select day as start_int, lead(day) over(order by day nulls last) as end_int
from dates
where day is not null
order by day nulls last
), intervals as (
select row_number() over(order by start_int nulls last) as interval_id, start_int, end_int, end_int - start_int as duration
from my_intevals
), overlapping_intervals as (
select rsk, location, interval_id, start_int, end_int, duration, array_agg(animalid) as animals
from intervals i
join my_data d on (i.start_int>=d.movementdate and i.end_int<=d.offmovementdate)
group by rsk, location, interval_id, start_int, end_int, duration
having count(*) > 1
)
select a as animalid, i.rsk, sum(i.duration) as lcd
from overlapping_intervals i
cross join unnest(animals) a
group by a, i.rsk
order by animalid, i.rsk
It returns your excepted output
animalid | rsk | lcd
----------+-----+-----
1 | Y | 120
3 | N | 245
3 | Y | 90
4 | N | 245
4 | Y | 30
UPDATE
To perform the same extraction on 8.4 without using the cross join on the array column you can use the following script. Switch references to my_data with the name of your main table and if you already have locations table in your environment use it in place of the evaluated one. It re-executes over the distinct locations the same query to populate a temp table in several steps. You can also commit at the end of each loop to check if execution time is acceptable.
create table my_data (animalid, movementdate, offmovementdate, move, location, death, rsk) as (
values(1, date'1998-01-01', date'1998-04-01', 1, 3, 'f', 'Y')
union all
values(1, date'1998-04-01', date'1999-04-01', 2, 1, 'f', 'Y')
union all
values(1, date'1999-04-01', date'1999-07-01', 3, 2, 'f', 'N')
union all
values(1, date'1999-07-01', cast(null as date), 4, cast(null as integer), 't', 'N')
union all
values(2, date'1998-05-01', date'1999-04-01', 1, 3, 'f', 'Y')
union all
values(2, date'1999-04-01', cast(null as date), 2, cast(null as integer), 't', 'N')
union all
values(3, date'1998-02-01', date'1999-01-01', 1, 2, 'f', 'N')
union all
values(3, date'1999-01-01', date'1999-06-01', 2, 1, 'f', 'Y')
union all
values(3, date'1999-06-01', cast(null as date), 3, cast(null as integer), 't', 'N')
union all
values(4, date'1997-12-01', date'1998-05-01', 1, 1, 'f', 'Y')
union all
values(4, date'1998-05-01', date'1999-04-01', 2, 2, 'f', 'N')
union all
values(4, date'1999-04-01', cast(null as date), 3, cast(null as integer), 't', 'N')
);
create table locations as (
select distinct location
from my_data
where location is not null
);
create local temp table tmp_result_table (
animalid bigint,
location bigint,
rsk text,
lcd bigint
) ON COMMIT preserve ROWS;
DO $$DECLARE r record;
BEGIN
FOR r IN SELECT location FROM locations
LOOP
insert into tmp_result_table(animalid, rsk , lcd)
with dates as (
select movementdate as day
from my_data d
where d.location = r.location
union
select offmovementdate
from my_data d
where d.location = r.location
), intervals as (
select start_int, end_int, end_int - start_int as duration
from (
select day as start_int, lead(day) over(order by day nulls last) as end_int
from dates
where day is not null
) a
), overlapping_intervals as (
select rsk, start_int, end_int, duration, array_agg(animalid) as animals,
count(*)-1 as factor
from intervals i
join my_data d on (i.start_int>=d.movementdate and i.end_int<=d.offmovementdate)
where d.location = r.location
group by rsk, start_int, end_int, duration
having count(*) > 1
)
select unnest(animals), rsk, lcd
from (
select rsk, animals, sum(duration*factor) as lcd
from overlapping_intervals
group by rsk, animals
) a;
END LOOP;
RETURN;
END;$$
select animalid, rsk, sum(lcd) as lcd
from tmp_result_table
group by animalid, rsk
order by animalid, rsk desc;

T-SQL results in to columns

I have a table (t1) like below
Id Name RelId
1 a 2
2 b 3
3 c 4
4 d 3
5 e 6
The other table (t2)
Id data FK Order
1 aa 2 2
2 bb 2 3
3 cc 2 1
4 dd 2 4
5 ee 2 5
6 ff 3 3
7 gg 3 2
8 hh 3 1
9 ii 4 7
10 jj 4 4
11 kk 4 1
12 ll 4 3
13 mm 6 1
14 nn 6 2
15 oo 6 3
16 pp 6 4
My output result am looking for is
+----+------+-------+-------+------+----------+
| id | name | RelId | Col 1 | Col2 | Col-Oth |
+----+------+-------+-------+------+----------+
| 1 | a | 2 | cc | aa | bb,dd,ee |
| 2 | b | 3 | hh | gg | ff |
| 3 | c | 4 | kk | ll | jj,ii |
| 4 | d | 3 | hh | gg | ff |
| 5 | e | 6 | mm | nn | oo,pp |
+----+------+-------+-------+------+----------+
based on the Relid in T1 table join with FK column in T2 and populate col1 with the least order data, col2 with the next higher order data and col-oth with remaining data comma separated ordered.
Need your help on same.
SELECT id,name,RelId, (select data,rownumber() (partition by data order by order asc) from t2 inner join t1 on t1.relid= t2.FK) from t1
Try following query:
DECLARE #TEMP TABLE
(
Id INT,
Name VARCHAR(10),
RelId INT
)
INSERT INTO #TEMP VALUES (1,'a',2),(2,'b',3),(3,'c',4),(4,'d',3),(5,'e',6)
DECLARE #TEMP1 TABLE
(
Id INT,
Data varchar(10),
FK INT,
[order] INT
)
INSERT INTO #TEMP1 VALUES
(1 ,'aa',2,2),(2 ,'bb',2,3),(3 ,'cc',2,1),(4 ,'dd',2,4),(5 ,'ee',2,5),
(6 ,'ff',3,3),(7 ,'gg',3,2),(8 ,'hh',3,1),(9 ,'ii',4,7),(10,'jj',4,4),
(11,'kk',4,1),(12,'ll',4,3),(13,'mm',6,1),(14,'nn',6,2),(15,'oo',6,3),(16,'pp',6,4)
SELECT
t1.*,
(SELECT Data FROM (SELECT ROW_NUMBER() OVER(ORDER BY t2.[order]) As RowNo,Data FROM #TEMP1 t2 WHERE t2.FK = t1.RelId)t3 WHERE t3.RowNo=1),
(SELECT Data FROM (SELECT ROW_NUMBER() OVER(ORDER BY t2.[order]) As RowNo,Data FROM #TEMP1 t2 WHERE t2.FK = t1.RelId)t3 WHERE t3.RowNo=2),
STUFF((SELECT DISTINCT ',' + Data FROM (SELECT ROW_NUMBER() OVER(ORDER BY t2.[order]) As RowNo,Data FROM #TEMP1 t2 WHERE t2.FK = t1.RelId)t3 WHERE t3.RowNo > 2 FOR XML PATH ('')), 1, 1, '')
FROM
#TEMP t1
Using PIVOT:
DECLARE #t1 TABLE
(
ID INT ,
Name CHAR(1) ,
RelID INT
)
DECLARE #t2 TABLE
(
ID INT ,
Data CHAR(2) ,
RelID INT ,
Ordering INT
)
INSERT INTO #t1
VALUES ( 1, 'a', 2 ),
( 2, 'b', 3 ),
( 3, 'c', 4 ),
( 4, 'd', 3 ),
( 5, 'e', 6 )
INSERT INTO #t2
VALUES ( 1, 'aa', 2, 2 ),
( 2, 'bb', 2, 3 ),
( 3, 'cc', 2, 1 ),
( 4, 'dd', 2, 4 ),
( 5, 'ee', 2, 5 ),
( 6, 'ff', 3, 3 ),
( 7, 'gg', 3, 2 ),
( 8, 'hh', 3, 1 ),
( 9, 'ii', 4, 7 ),
( 10, 'jj', 4, 4 ),
( 11, 'kk', 4, 1 ),
( 12, 'll', 4, 3 ),
( 13, 'mm', 6, 1 ),
( 14, 'nn', 6, 2 ),
( 15, 'oo', 6, 3 ),
( 16, 'pp', 6, 4 );
WITH cte1
AS ( SELECT t1.ID ,
t1.Name ,
t1.RelID ,
t2.Data ,
ROW_NUMBER() OVER ( PARTITION BY t1.ID ORDER BY t2.Ordering ) AS rn
FROM #t1 t1
JOIN #t2 t2 ON t1.RelID = t2.RelID
),
cte2
AS ( SELECT ID ,
Name ,
RelID ,
Data ,
rn ,
STUFF(( SELECT ',' + Data
FROM cte1 ci
WHERE co.ID = ci.ID
AND rn > 2
FOR
XML PATH('')
), 1, 1, '') AS Col3
FROM cte1 co
)
SELECT ID ,
Name ,
RelID ,
[1] AS Col1 ,
[2] AS Col2 ,
Col3
FROM cte2 PIVOT( MAX(data) FOR rn IN ( [1], [2] ) ) p
Output:
ID Name RelID Col1 Col2 Col3
1 a 2 cc aa bb,dd,ee
2 b 3 hh gg ff
3 c 4 kk ll jj,ii
4 d 3 hh gg ff
5 e 6 mm nn oo,pp
Execution plan of my statement
Execution plan of accepted statement:
Which is better? :)

Need a query to insert 'level' into an adjacent list

I have a table like so
ID Node ParentID
1 A 0
2 B 1
3 C 1
4 D 2
5 E 2
6 F 3
7 G 3
8 H 3
9 I 4
10 J 4
11 K 10
12 L 11
I need a query to generate a 'level' field that shows how many levels deep a particular node is. Example below
ID Node ParentID Level
1 A 0 1
2 B 1 2
3 C 1 2
4 D 2 3
5 E 2 3
6 F 3 4
7 G 3 4
8 H 3 4
9 I 4 5
10 J 4 5
11 K 10 6
12 L 11 7
Select Id,
Node,
ParentID,
Dense_Rank() Over(Order by ParentID) as Level
from Table_Name
SQL Fiddle Demo
You can use DENSE_RANK function
SELECT i.ID, p.Node, i.ParentID
,Dense_Rank() Over(Order by ParentID) as Level
FROM TableName AS i;
for more detail visit: http://blog.sqlauthority.com/2007/10/09/sql-server-2005-sample-example-of-ranking-functions-row_number-rank-dense_rank-ntile/
I think the correct way to do it will be to get the parent level and increment it by 1 when inserting the data since all other ways are expensive performance wise.
Something like:
;with tree (ID, ParentID, Level)
as (
select ID, ParentID, 1 from TableName where ParentID = 0
union all
select t.ID, t.ParentID, 1 + tree.Level
from Tree join TableName t on t.ParentID = Tree.ID
)
select ID, Level from Tree
Try this
CREATE TABLE #Table1
([ID] int, [Node] varchar(1), [ParentID] int)
;
INSERT INTO #Table1
([ID], [Node], [ParentID])
VALUES
(1, 'A', 0),
(2, 'B', 1),
(3, 'C', 1),
(4, 'D', 2),
(5, 'E', 2),
(6, 'F', 3),
(7, 'G', 3),
(8, 'H', 3),
(9, 'I', 4),
(10, 'J', 4),
(11, 'K', 10),
(12, 'L', 11)
;
;WITH CTE ([ID], [ParentID], [Node], [Level])
as (
SELECT [ID], [ParentID], [Node], 1 FROM #Table1 WHERE ParentID = 0
UNION all
select t.[ID], t.[ParentID], t.[Node], 1 + c.[Level]
from CTE c inner join #Table1 t ON t.[ParentID] = c.[ID]
)
select ID, [Node], [ParentID], [Level] from CTE
ORDER BY [Node]
DROP TABLE #Table1
Here, you need to set level by grouping ParentID then join both tables by ParentID.
WITH CTE (ParentID, Level)
AS (
SELECT ParentID
, Row_Number() OVER (ORDER BY ParentID) AS Level
FROM Table1
GROUP BY ParentID
)
SELECT t1.ID, t1.Node, t1.ParentID, CTE.Level
FROM Table1 t1
JOIN CTE ON t1.ParentID = CTE.ParentID;
See this SQLFiddle
Update: (for MySQL - just to help others)
To do the same in MySQL try to get row number like this:
SELECT t1.ID, t1.Node, t1.ParentID, Tbl.Level
FROM Table1 t1
JOIN
(
SELECT #Level:=#Level+1 AS Level , ParentID
FROM (SELECT DISTINCT ParentID FROM Table1) t
, (SELECT #Level:=0) r
ORDER BY ParentID
) Tbl
ON t1.ParentID = Tbl.ParentID;
See this SQLFiddle

Consolidating subsets in a table

I have a table in SqlServer 2008 with data of the form
UserID StartWeek EndWeek Type
1 1 3 A
1 4 5 A
1 6 10 A
1 11 13 B
1 14 16 A
2 1 5 A
2 6 9 A
2 10 16 B
I'd like to consolidate/condense the adjacent types so that the resulting table looks like this.
UserID StartWeek EndWeek Type
1 1 10 A
1 11 13 B
1 14 16 A
2 1 9 A
2 10 16 B
Does anyone have any suggestions as to the best way to accomplish this? I've been looking at using Row_number and Partition, but I can't get it to behave exactly as I'd like.
There's probably a neater way to do it, but this produces the correct result
DECLARE #t TABLE
(UserId TINYINT
,StartWeek TINYINT
,EndWeek TINYINT
,TYPE CHAR(1)
)
INSERT #t
SELECT 1,1,3,'A'
UNION SELECT 1,4,5,'A'
UNION SELECT 1,6,10,'A'
UNION SELECT 1,11,13,'B'
UNION SELECT 1,14,16,'A'
UNION SELECT 2,1,5,'A'
UNION SELECT 2,6,9,'A'
UNION SELECT 2,10,16,'B'
;WITH srcCTE
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY t1.UserID, t1.Type
ORDER BY t1.EndWeek
) AS rn
FROM #t AS t1
)
,recCTE
AS
(
SELECT *
,0 AS grp
FROM srcCTE
WHERE rn = 1
UNION ALL
SELECT s.UserId
,s.StartWeek
,s.EndWeek
,s.TYPE
,s.rn
,CASE WHEN s.StartWeek - 1 = r.EndWeek
THEN r.grp
ELSE r.grp+ 1
END AS GRP
FROM srcCTE AS s
JOIN recCTE AS r
ON r.UserId = s.UserId
AND r.TYPE = s.TYPE
AND r.rn = s.rn - 1
)
SELECT UserId
,MIN(StartWeek) AS StartWeek
,MAX(EndWeek) AS EndWeek
,TYPE
FROM recCTE AS s1
GROUP BY UserId
,TYPE
,grp
Also using a CTE, but in a slightly different way
DECLARE #Consolidate TABLE (
UserID INTEGER, StartWeek INTEGER,
EndWeek INTEGER, Type CHAR(1))
INSERT INTO #Consolidate VALUES (1, 1, 3, 'A')
INSERT INTO #Consolidate VALUES (1, 4, 5, 'A')
INSERT INTO #Consolidate VALUES (1, 6, 10, 'A')
INSERT INTO #Consolidate VALUES (1, 14, 16, 'A')
INSERT INTO #Consolidate VALUES (1, 11, 13, 'B')
INSERT INTO #Consolidate VALUES (2, 1, 5, 'A')
INSERT INTO #Consolidate VALUES (2, 6, 9, 'A')
INSERT INTO #Consolidate VALUES (2, 10, 16, 'B')
;WITH ConsolidateCTE AS
(
SELECT UserID, StartWeek, EndWeek, Type
FROM #Consolidate
UNION ALL
SELECT cte.UserID, cte.StartWeek, c.EndWeek, c.Type
FROM ConsolidateCTE cte
INNER JOIN #Consolidate c ON
c.UserID = cte.UserID
AND c.StartWeek = cte.EndWeek + 1
AND c.Type = cte.Type
)
SELECT UserID, [StartWeek] = MIN(Startweek), EndWeek, Type
FROM (
SELECT UserID, Startweek, [EndWeek] = MAX(EndWeek), Type
FROM ConsolidateCTE
GROUP BY UserID, StartWeek, Type
) c
GROUP BY UserID, EndWeek, Type
ORDER BY 1, 2, 3