Consolidating subsets in a table

Consolidating subsets in a table - sql

I have a table in SqlServer 2008 with data of the form
UserID StartWeek EndWeek Type
1 1 3 A
1 4 5 A
1 6 10 A
1 11 13 B
1 14 16 A
2 1 5 A
2 6 9 A
2 10 16 B
I'd like to consolidate/condense the adjacent types so that the resulting table looks like this.
UserID StartWeek EndWeek Type
1 1 10 A
1 11 13 B
1 14 16 A
2 1 9 A
2 10 16 B
Does anyone have any suggestions as to the best way to accomplish this? I've been looking at using Row_number and Partition, but I can't get it to behave exactly as I'd like.

There's probably a neater way to do it, but this produces the correct result
DECLARE #t TABLE
(UserId TINYINT
,StartWeek TINYINT
,EndWeek TINYINT
,TYPE CHAR(1)
)
INSERT #t
SELECT 1,1,3,'A'
UNION SELECT 1,4,5,'A'
UNION SELECT 1,6,10,'A'
UNION SELECT 1,11,13,'B'
UNION SELECT 1,14,16,'A'
UNION SELECT 2,1,5,'A'
UNION SELECT 2,6,9,'A'
UNION SELECT 2,10,16,'B'
;WITH srcCTE
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY t1.UserID, t1.Type
ORDER BY t1.EndWeek
) AS rn
FROM #t AS t1
)
,recCTE
AS
(
SELECT *
,0 AS grp
FROM srcCTE
WHERE rn = 1
UNION ALL
SELECT s.UserId
,s.StartWeek
,s.EndWeek
,s.TYPE
,s.rn
,CASE WHEN s.StartWeek - 1 = r.EndWeek
THEN r.grp
ELSE r.grp+ 1
END AS GRP
FROM srcCTE AS s
JOIN recCTE AS r
ON r.UserId = s.UserId
AND r.TYPE = s.TYPE
AND r.rn = s.rn - 1
)
SELECT UserId
,MIN(StartWeek) AS StartWeek
,MAX(EndWeek) AS EndWeek
,TYPE
FROM recCTE AS s1
GROUP BY UserId
,TYPE
,grp

Also using a CTE, but in a slightly different way
DECLARE #Consolidate TABLE (
UserID INTEGER, StartWeek INTEGER,
EndWeek INTEGER, Type CHAR(1))
INSERT INTO #Consolidate VALUES (1, 1, 3, 'A')
INSERT INTO #Consolidate VALUES (1, 4, 5, 'A')
INSERT INTO #Consolidate VALUES (1, 6, 10, 'A')
INSERT INTO #Consolidate VALUES (1, 14, 16, 'A')
INSERT INTO #Consolidate VALUES (1, 11, 13, 'B')
INSERT INTO #Consolidate VALUES (2, 1, 5, 'A')
INSERT INTO #Consolidate VALUES (2, 6, 9, 'A')
INSERT INTO #Consolidate VALUES (2, 10, 16, 'B')
;WITH ConsolidateCTE AS
(
SELECT UserID, StartWeek, EndWeek, Type
FROM #Consolidate
UNION ALL
SELECT cte.UserID, cte.StartWeek, c.EndWeek, c.Type
FROM ConsolidateCTE cte
INNER JOIN #Consolidate c ON
c.UserID = cte.UserID
AND c.StartWeek = cte.EndWeek + 1
AND c.Type = cte.Type
)
SELECT UserID, [StartWeek] = MIN(Startweek), EndWeek, Type
FROM (
SELECT UserID, Startweek, [EndWeek] = MAX(EndWeek), Type
FROM ConsolidateCTE
GROUP BY UserID, StartWeek, Type
) c
GROUP BY UserID, EndWeek, Type
ORDER BY 1, 2, 3

Related

Find groups containing 6 consecutive 1s in one column

I have a table with 2 columns:
val with values: 0 or 1
id with unique identifiers
with cte(val, id) as (
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
)
select *
into #tmp
from cte
How do I to find id with 6 values = 1 in a row.
In the example above: id = 9, id = 10.
It is desirable not to use loops (cursors or while), but something like sum(...) over(...).

Why not LAG() (but you need an order column):
SELECT id
FROM (
SELECT
id,
val,
val1 = LAG(val, 1) OVER (ORDER BY id),
val2 = LAG(val, 2) OVER (ORDER BY id),
val3 = LAG(val, 3) OVER (ORDER BY id),
val4 = LAG(val, 4) OVER (ORDER BY id),
val5 = LAG(val, 5) OVER (ORDER BY id)
FROM YourTable
) t
WHERE val = 1 AND val1 = 1 AND val2 = 1 AND val3 = 1 AND val4 = 1 AND val5 = 1

You can use running sum over a window frame that contains exactly 6 rows (5 prior plus current row):
with cte as (
select *, sum(val) over (
order by id
rows between 5 preceding and current row
) as rsum
from #tmp
)
select *
from cte
where rsum = 6
Adjust the size of the window and where clause to match the desired value.

Another approach is using ROW_NUMBER on the LAG values
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
select t2.id,
t2.islandcount
from ( select t.id,
t.val,
t.priorval,
row_number() over (partition by t.val, t.priorval order by t.id) as islandcount
from ( select id,
val,
lag(val, 1) over (order by id) priorval
from #tmp
) t
) t2
where t2.islandcount >= 6
the result is
id islandcount
9 6
10 7
Try it yourself in this DBFiddle
The advantage of this method is that you can easy set the value from 6 to any other value
EDIT
As #Zhorov mentioned in the comment, there is a flaw in my code, it returns the wrong results when you add certain rows
This solution will fix that, it is based on the solution of #SalmanA so the credit for accepted answer should go to him
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
-- these are the certains rows added
,(0, 11), (1, 12), (1, 13)
select t.id,
t.val,
t.islandcount
from ( select id,
val,
sum(val) over (order by id rows between 5 preceding and current row) as islandcount
from #tmp
) t
where t.islandcount >= 6
order by t.id
And again a DBFiddle

Sql Server 2014 - Deep Recursive Parent-Child Self Join

I am trying to build a deep recursive self-join query. Having the table like:
Id | ParentId
1 | NULL
2 | 1
3 | 1
4 | 2
5 | 3
6 | 8
7 | 9
For Id 1 my query should be fetching 1,2,3,4,5 since they are either the children of 1 or children of the children of 1. In the given example 6 and 7 should not be included in the query result.
I tried using CTE but I am getting tons of duplicates:
WITH CTE AS (
SELECT Id, ParentId
FROM dbo.Table
WHERE ParentId IS NULL
UNION ALL
SELECT t.Id, t.ParentId
FROM dbo.Table t
INNER JOIN CTE c ON t.ParentId = c.Id
)
SELECT * FROM CTE
Ideas?

You can try to use DISTINCT to filter duplicate rows.
;WITH CTE AS (
SELECT Id, ParentId
FROM T
WHERE ParentId IS NULL
UNION ALL
SELECT t.Id, t.ParentId
FROM T
INNER JOIN CTE c ON t.ParentId = c.Id
)
SELECT DISTINCT Id, ParentId
FROM CTE

Try the following query with CTE where you can set parentId by #parentID:
DECLARE #parentID INT = 1
;WITH cte AS
(
SELECT
t.ID
, t.ParentId
FROM #table t
),
cteParent AS
(
SELECT
t.ID
, t.ParentId
FROM #table t
WHERE t.ParentId IN (SELECT t1.ID FROM #table t1 WHERE T1.ParentId = #parentID)
)
SELECT
DISTINCT c1.ID
, c1.ParentId
FROM cte c1
INNER JOIN cte c2 ON c2.ParentId = c1.ID
UNION ALL
SELECT *
FROM cteParent
And the sample data:
DECLARE #table TABLE
(
ID INT
, ParentId INT
)
INSERT INTO #table
(
ID,
ParentId
)
VALUES
(1, NULL )
, (2, 1 )
, (3, 1 )
, (4, 2 )
, (5, 3 )
, (6, 8 )
, (7, 9 )
OUTPUT:
ID ParentId
1 NULL
2 1
3 1
4 2
5 3

I don't see duplicates.
Your code returns the following on the data you provided:
Id ParentId
1
2 1
3 1
5 3
4 2
which is what you want.
Here is a db<>fiddle.
Here is the code:
WITH t as (
SELECT *
FROM (VALUES (1, NULL), (2, 1), (3, 1), (4, 2), (5, 3), (6, 8), (7, 9)
) v(id, parentId)
),
CTE AS (
SELECT Id, ParentId
FROM t
WHERE ParentId IS NULL
UNION ALL
SELECT t.Id, t.ParentId
FROM t
INNER JOIN CTE c ON t.ParentId = c.Id
)
SELECT *
FROM CTE;
If you are getting duplicates in your actual result set, then you presumably have duplicates in your original table. I would recommend removing them before doing the recursive logic:
with t as (
select distinct id, parentid
from <your query>
),
. . .
Then run the recursive logic.

Try this sql script which result Parent Child Hierarchy
;WITH CTE(Id , ParentId)
AS
(
SELECT 1 , NULL UNION ALL
SELECT 2 , 1 UNION ALL
SELECT 3 , 1 UNION ALL
SELECT 4 , 2 UNION ALL
SELECT 5 , 3 UNION ALL
SELECT 6 , 8 UNION ALL
SELECT 7 , 9
)
,Cte2
AS
(
SELECT Id ,
ParentId ,
CAST('\'+ CAST(Id AS VARCHAR(MAX))AS VARCHAR(MAX)) AS [Hierarchy]
FROM CTE
WHERE ParentId IS NULL
UNION ALL
SELECT c1.Id ,
c1.ParentId ,
[Hierarchy]+'\'+ CAST(c1.Id AS VARCHAR(MAX)) AS [Hierarchy]
FROM Cte2 c2
INNER JOIN CTE c1
ON c1.ParentId = c2.Id
)
SELECT Id,
RIGHT([Hierarchy],LEN([Hierarchy])-1) AS ParentChildHierarchy
FROM Cte2
GO
Result
Id ParentChildHierarchy
-------------------------
1 1
2 1\2
3 1\3
5 1\3\5
4 1\2\4

This query will help you
CREATE TABLE #table( ID INT, ParentId INT )
INSERT INTO #table(ID,ParentId)
VALUES (1, NULL ), (2, 1 ), (3, 1 ), (4, 2 ), (5, 3 ), (6, 8 ), (7, 9 )
;WITH CTE AS (
SELECT ID FROM #table WHERE PARENTID IS NULL
UNION ALL
SELECT T.ID FROM #table T
INNER JOIN #table T1 ON T.PARENTID =T1.ID
) SELECT * FROM CTE

SQL server, how to get a number of distinct items

I am using SQL server,
id 3 | 4 | 5 | 6
items 1 2 3 | 2 3 5| 6 | 1 2 5
-------------------------
# of items 3 | 4 | 5 | 5
so, each id has items (ex, 3 has 3 items - 1,2,3)
and for each item, I'd like to get the number of distinct items accrued.
so, 3 has 3 distinct items - 1, 2, 3
4 has 4 distinct items - 1, 2, 3, 5
5 has 5 distinc items - 1, 2, 3, 5, 6
6 has 5 distinct items - 1, 2, 3, 5, 6
I can do this by running, 1 through 2, 1 though 3, 1 through 5 and 1 through 6 by doing count(distinct items)
But I want to automate this process and get the same results in one run.
The idea is to create a temp table and put an item in it while checking if the item is already in the temp table and print number of distinct items for each id.

CREATE TABLE TEST
(
id int, items int
)
INSERT INTO TEST
VALUES
(3, 1),
(3, 2),
(3, 3),
(4, 2),
(4, 3),
(4, 5),
(5, 6),
(6, 1),
(6, 2),
(6, 5)
SELECT B.id, COUNT(DISTINCT(A.ITEMS)) AS itemCount
FROM TEST A
INNER JOIN TEST B ON A.id <= B.id
GROUP BY B.ID
DROP TABLE TEST
Output:
id itemCount
3 3
4 4
5 5
6 5

Assuming your data in below format:
Declare #table table
(
id int,
items varchar(10)
)
insert into #table values (3, '1 2 3');
insert into #table values (4, '2 3 5');
insert into #table values (5, '6');
insert into #table values (6, '1 2 5');
with cte as
(
Select id, b.Item
from #table a
cross apply [dbo].[Split] (items, ' ') b
)
Select y.id, count(distinct(x.Item)) AS [# of items]
from cte x
join cte y on x.id <= y.id
group by y.id
Use the table valued function [dbo].[Split] from LINK.

You can as the below:
DECLARE #Tbl TABLE (Id VARCHAR(10), Column3 VARCHAR(100), Column4 VARCHAR(100), Column5 VARCHAR(100), Column6 VARCHAR(100))
INSERT #Tbl
VALUES
('items', '1 2 3', '2 3 5', '6', '1 2 5')
;WITH CTE1
AS
(
SELECT T.Id, T.Column3 AS ColumnId, CAST('<X>' + REPLACE(T.Column3,' ','</X><X>') + '</X>' as XML) AS FilterColumn FROM #Tbl T UNION ALL
SELECT T.Id, T.Column4 AS ColumnId, CAST('<X>' + REPLACE(T.Column4,' ','</X><X>') + '</X>' as XML) AS FilterColumn FROM #Tbl T UNION ALL
SELECT T.Id, T.Column5 AS ColumnId, CAST('<X>' + REPLACE(T.Column5,' ','</X><X>') + '</X>' as XML) AS FilterColumn FROM #Tbl T UNION ALL
SELECT T.Id, T.Column6 AS ColumnId, CAST('<X>' + REPLACE(T.Column6,' ','</X><X>') + '</X>' as XML) AS FilterColumn FROM #Tbl T
), CTE2
AS
(
SELECT
A.*,
B.SplitData
FROM
CTE1 A CROSS APPLY
(SELECT fdata.D.value('.','varchar(50)') AS SplitData FROM A.FilterColumn.nodes('X') as fdata(D)) B
)
SELECT
T.Id ,
(SELECT COUNT(DISTINCT C.SplitData) FROM CTE2 C WHERE C.Id = T.Id AND C.ColumnId IN (T.Column3)) Column3OfDistinct,
(SELECT COUNT(DISTINCT C.SplitData) FROM CTE2 C WHERE C.Id = T.Id AND C.ColumnId IN (T.Column3, T.Column4)) Column4OfDistinct,
(SELECT COUNT(DISTINCT C.SplitData) FROM CTE2 C WHERE C.Id = T.Id AND C.ColumnId IN (T.Column3, T.Column4, T.Column5)) Column5OfDistinct,
(SELECT COUNT(DISTINCT C.SplitData) FROM CTE2 C WHERE C.Id = T.Id AND C.ColumnId IN (T.Column3, T.Column4, T.Column5, T.Column6)) Column6OfDistinct
FROM
#Tbl T
Result:
Id Column3OfDistinct Column4OfDistinct Column5OfDistinct Column6OfDistinct
---------- ----------------- ----------------- ----------------- -----------------
items 3 4 5 5

This should help you:
select
id,
count(items)
from table_name
group by id

Need a query to insert 'level' into an adjacent list

I have a table like so
ID Node ParentID
1 A 0
2 B 1
3 C 1
4 D 2
5 E 2
6 F 3
7 G 3
8 H 3
9 I 4
10 J 4
11 K 10
12 L 11
I need a query to generate a 'level' field that shows how many levels deep a particular node is. Example below
ID Node ParentID Level
1 A 0 1
2 B 1 2
3 C 1 2
4 D 2 3
5 E 2 3
6 F 3 4
7 G 3 4
8 H 3 4
9 I 4 5
10 J 4 5
11 K 10 6
12 L 11 7

Select Id,
Node,
ParentID,
Dense_Rank() Over(Order by ParentID) as Level
from Table_Name
SQL Fiddle Demo

You can use DENSE_RANK function
SELECT i.ID, p.Node, i.ParentID
,Dense_Rank() Over(Order by ParentID) as Level
FROM TableName AS i;
for more detail visit: http://blog.sqlauthority.com/2007/10/09/sql-server-2005-sample-example-of-ranking-functions-row_number-rank-dense_rank-ntile/

I think the correct way to do it will be to get the parent level and increment it by 1 when inserting the data since all other ways are expensive performance wise.

Something like:
;with tree (ID, ParentID, Level)
as (
select ID, ParentID, 1 from TableName where ParentID = 0
union all
select t.ID, t.ParentID, 1 + tree.Level
from Tree join TableName t on t.ParentID = Tree.ID
)
select ID, Level from Tree

Try this
CREATE TABLE #Table1
([ID] int, [Node] varchar(1), [ParentID] int)
;
INSERT INTO #Table1
([ID], [Node], [ParentID])
VALUES
(1, 'A', 0),
(2, 'B', 1),
(3, 'C', 1),
(4, 'D', 2),
(5, 'E', 2),
(6, 'F', 3),
(7, 'G', 3),
(8, 'H', 3),
(9, 'I', 4),
(10, 'J', 4),
(11, 'K', 10),
(12, 'L', 11)
;
;WITH CTE ([ID], [ParentID], [Node], [Level])
as (
SELECT [ID], [ParentID], [Node], 1 FROM #Table1 WHERE ParentID = 0
UNION all
select t.[ID], t.[ParentID], t.[Node], 1 + c.[Level]
from CTE c inner join #Table1 t ON t.[ParentID] = c.[ID]
)
select ID, [Node], [ParentID], [Level] from CTE
ORDER BY [Node]
DROP TABLE #Table1

Here, you need to set level by grouping ParentID then join both tables by ParentID.
WITH CTE (ParentID, Level)
AS (
SELECT ParentID
, Row_Number() OVER (ORDER BY ParentID) AS Level
FROM Table1
GROUP BY ParentID
)
SELECT t1.ID, t1.Node, t1.ParentID, CTE.Level
FROM Table1 t1
JOIN CTE ON t1.ParentID = CTE.ParentID;
See this SQLFiddle
Update: (for MySQL - just to help others)
To do the same in MySQL try to get row number like this:
SELECT t1.ID, t1.Node, t1.ParentID, Tbl.Level
FROM Table1 t1
JOIN
(
SELECT #Level:=#Level+1 AS Level , ParentID
FROM (SELECT DISTINCT ParentID FROM Table1) t
, (SELECT #Level:=0) r
ORDER BY ParentID
) Tbl
ON t1.ParentID = Tbl.ParentID;
See this SQLFiddle

Is it possible to write a sql query that is grouped based on a running total of a column?

It would be easier to explain with an example. Suppose I wanted to get at most 5 items per group.
My input would be a table looking like this:
Item Count
A 2
A 3
A 3
B 4
B 4
B 5
C 1
And my desired output would look like this:
Item Count
A 5
A>5 3
B 4
B>5 9
C 1
An alternative output that I could also work with would be
Item Count RunningTotal
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
I can use ROW_NUMBER() to get the top X records in each group, however my requirement is to get the top X items for each group, not X records. My mind is drawing a blank as to how to do this.

declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.Item, t1.Count, sum(t2.count) as RunningTotal from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
Result:
Item Count RunningTotal
---- ----------- ------------
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1

Considering the clarifications from your comment, you should be able to produce the second kid of output from your post by running this query:
select t.Item
, t.Count
, (select sum(tt.count)
from mytable tt
where t.item=tt.item and (tt.creating_user_priority < t.creating_user_priority or
( tt.creating_user_priority = t.creating_user_priority and tt.created_date < t.createdDate))
) as RunningTotal
from mytable t

declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.row, t1.Item, t1.Count, sum(t2.count) as RunningTotal
into #RunTotal
from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
alter table #RunTotal
add GrandTotal int
update rt
set GrandTotal = gt.Total
from #RunTotal rt
left join (
select Item, sum(Count) Total
from #RunTotal rt
group by Item) gt
on rt.Item = gt.Item
select Item, max(RunningTotal)
from #RunTotal
where RunningTotal <= 5
group by Item
union
select a.Item + '>5', total - five
from (
select Item, max(GrandTotal) total
from #RunTotal
where GrandTotal > 5
group by Item
) a
left join (
select Item, max(RunningTotal) five
from #RunTotal
where RunningTotal <= 5
group by Item
) b
on a.Item = b.Item
I've updated the accepted answer and got your desired result.

SELECT Item, SUM(Count)
FROM mytable t
GROUP BY Item
HAVING SUM(Count) <=5
UNION
SELECT Item, 5
FROM mytable t
GROUP BY Item
HAVING SUM(Count) >5
UNION
SELECT t2.Item + '>5', Sum(t2.Count) - 5
FROM mytable t2
GOUP BY Item
HAVING SUM(Count) > 5
ORDER BY 1, 2

select 'A' as Name, 2 as Cnt
into #tmp
union all select 'A',3
union all select 'A',3
union all select 'B',4
union all select 'B',4
union all select 'B',5
union all select 'C',1
select Name, case when sum(cnt) > 5 then 5 else sum(cnt) end Cnt
from #tmp
group by Name
union
select Name+'>5', sum(cnt)-5 Cnt
from #tmp
group by Name
having sum(cnt) > 5
Here is what I have so far. I know it's not complete but... this should be a good starting point.

I can get your second output by using a temp table and an update pass:
DECLARE #Data TABLE
(
ID INT IDENTITY(1,1) PRIMARY KEY
,Value VARCHAR(5)
,Number INT
,Total INT
)
INSERT INTO #Data (Value, Number) VALUES ('A',2)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',5)
INSERT INTO #Data (Value, Number) VALUES ('C',1)
DECLARE
#Value VARCHAR(5)
,#Count INT
UPDATE #Data
SET
#Count = Total = CASE WHEN Value = #Value THEN Number + #Count ELSE Number END
,#Value = Value
FROM #Data AS D
SELECT
Value
,Number
,Total
FROM #Data
There may be better ways, but this should work.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Consolidating subsets in a table - sql

Related

Find groups containing 6 consecutive 1s in one column

Sql Server 2014 - Deep Recursive Parent-Child Self Join

SQL server, how to get a number of distinct items

Need a query to insert 'level' into an adjacent list

Is it possible to write a sql query that is grouped based on a running total of a column?

Categories

Resources