I need to extract continous ranges from a table based on consecutive numbers (column N) and same "category" these numbers relate to (column C below). Graphically it looks like this:
N C D
--------
1 x a C N1 N2 D1 D2
2 x b ------------------
3 x c x 1 4 a d (continuous range with same N)
4 x d ==> x 6 7 e f (new range because "5" is missing)
6 x e y 8 10 g h (new range because C changed to "y")
7 x f
8 y g
9 y h
10 y i
SQL Server is 2005. Thanks.
DECLARE #myTable Table
(
N INT,
C CHAR(1),
D CHAR(1)
)
INSERT INTO #myTable(N,C,D) VALUES(1, 'x', 'a');
INSERT INTO #myTable(N,C,D) VALUES(2, 'x', 'b');
INSERT INTO #myTable(N,C,D) VALUES(3, 'x', 'c');
INSERT INTO #myTable(N,C,D) VALUES(4, 'x', 'd');
INSERT INTO #myTable(N,C,D) VALUES(6, 'x', 'e');
INSERT INTO #myTable(N,C,D) VALUES(7, 'x', 'f');
INSERT INTO #myTable(N,C,D) VALUES(8, 'y', 'g');
INSERT INTO #myTable(N,C,D) VALUES(9, 'y', 'h');
INSERT INTO #myTable(N,C,D) VALUES(10, 'y', 'i');
WITH StartingPoints AS(
SELECT A.*, ROW_NUMBER() OVER(ORDER BY A.N) AS rownum
FROM #myTable AS A
WHERE NOT EXISTS(
SELECT *
FROM #myTable B
WHERE B.C = A.C
AND B.N = A.N - 1
)
),
EndingPoints AS(
SELECT A.*, ROW_NUMBER() OVER(ORDER BY A.N) AS rownum
FROM #myTable AS A
WHERE NOT EXISTS (
SELECT *
FROM #myTable B
WHERE B.C = A.C
AND B.N = A.N + 1
)
)
SELECT StartingPoints.C,
StartingPoints.N AS [N1],
EndingPoints.N AS [N2],
StartingPoints.D AS [D1],
EndingPoints.D AS [D2]
FROM StartingPoints
JOIN EndingPoints ON StartingPoints.rownum = EndingPoints.rownum
Results:
C N1 N2 D1 D2
---- ----------- ----------- ---- ----
x 1 4 a d
x 6 7 e f
y 8 10 g i
The RANK function is a safer bet than ROW_NUMBER, in case any N values are duplicated, as in the following example:
declare #ncd table(N int, C char, D char);
insert into #ncd
select 1,'x','a' union all
select 2,'x','b' union all
select 3,'x','c' union all
select 4,'x','d' union all
select 4,'x','e' union all
select 7,'x','f' union all
select 8,'y','g' union all
select 9,'y','h' union all
select 10,'y','i' union all
select 10,'y','j';
with a as (
select *
, r = N-rank()over(partition by C order by N)
from #ncd
)
select C=MIN(C)
, N1=MIN(N)
, N2=MAX(N)
, D1=MIN(D)
, D2=MAX(D)
from a
group by r;
Result, which correctly withstands the duplicated 4 and 10:
C N1 N2 D1 D2
---- ----------- ----------- ---- ----
x 1 4 a e
x 7 7 f f
y 8 10 g j
Using this answer as a starting point, I ended up with the following:
;
WITH data (N, C, D) AS (
SELECT 1, 'x', 'a' UNION ALL
SELECT 2, 'x', 'b' UNION ALL
SELECT 3, 'x', 'c' UNION ALL
SELECT 4, 'x', 'd' UNION ALL
SELECT 6, 'x', 'e' UNION ALL
SELECT 7, 'x', 'f' UNION ALL
SELECT 8, 'y', 'g' UNION ALL
SELECT 9, 'y', 'h' UNION ALL
SELECT 10, 'y', 'i'
),
ranked AS (
SELECT
curr.*,
Grp = curr.N - ROW_NUMBER() OVER (PARTITION BY curr.C ORDER BY curr.N),
IsStart = CASE WHEN pred.C IS NULL THEN 1 ELSE 0 END,
IsEnd = CASE WHEN succ.C IS NULL THEN 1 ELSE 0 END
FROM data AS curr
LEFT JOIN data AS pred ON curr.C = pred.C AND curr.N = pred.N + 1
LEFT JOIN data AS succ ON curr.C = succ.C AND curr.N = succ.N - 1
)
SELECT
C,
N1 = MIN(N),
N2 = MAX(N),
D1 = MAX(CASE IsStart WHEN 1 THEN D END),
D2 = MAX(CASE IsEnd WHEN 1 THEN D END)
FROM ranked
WHERE 1 IN (IsStart, IsEnd)
GROUP BY C, Grp
Write a stored procedure. It will create and fill a temporary table witch will contain C, N1, N2, D1 and D2 columns.
Create the temporary table
use a cursor to loop on entries in table containing N, C, D ordered by N
use a variable to detect a new range (Ni < N(i-1)-1) and to store N1, N2, D1 and D2
INSERT into the temporary table for each range detected (new range detected or and of the cursor)
Tell me if you need a code example.
Related
I would like to split comma separated values based on another table
I cannot normalize it since original table has over 8 million rows. It crushed my laptop when I tried it.
How can I put data into relevant columns and create a new column if data is not found.
For example:
TableA,
Type1 Type2
---------------------
A F
B G
C H
D I
E NULL
TableB
ID Country AllTypes
---------------------------------
1 Italy A, B, C
2 USA D, E, A, F
4 Japan I, O, Z
5 UK NULL
By using these two tables, I would like to get the output such as
ID Country AllTypes Type1 Type2 UnCaptured
----------------------------------------------------------------------
1 Italy A, B, C A, B, C NULL NULL
2 USA D, E, G, F D, E G, F NULL
4 Japan I, O, Z NULL I O, Z
5 UK NULL NULL NULL NULL
This is I have done so far
with TableA as (
select 'A' as Type1, 'F' as Type2 union all
select 'B', 'G' union all
select 'C', 'H' union all
select 'D', 'I' union all
select 'E', NULL
),
TableB as (
select 1 as ID, 'Italy' as Country, 'A, B, C' as Alltypes union all
select 2, 'USA', 'D, E, A, F' union all
select 4, 'Japan', 'I', 'O', 'Z' union all
select 5, 'UK', NULL
)
select b.Id, b.Country, b.Alltypes,
String_Agg(v.type1,',') Type1,
String_Agg(v.type2,',') Type2
**String_Agg(v.Type3,',') Uncaptured*** ------- This query
from tableb b
outer apply (
select Trim(value) t,
case when exists
(select * from tablea a where a.type1=Trim(value))
then Trim(value) end type1,
case when exists
(select * from tablea a where a.type2=Trim(value))
then Trim(value) end Type2,
Case when not exists ------------This query
( (select * from tablea a where a.type1=Trim(value)) -------
and ------
(select * from tablea a where a.type2=Trim(value))------
) then Trim(value) end Type3** -------------
from String_Split(alltypes, ',')
)v
group by Id, Country, AllTypes
Without highlighted queries(-----) which are for creating a new column (Uncaptured), it works ok like below.
Id Country Alltypes Type1 Type2
1 Italy A, B, C A,B,C NULL
2 USA D, E, A, F D,E,A F
4 Japan I, O, Z I NULL
5 UK NULL NULL NULL
But if I add those highlighted queries, it shows error. I was also thinking of else but did not work as well.
Could someone help me please?
----------------------- DDL+DML: Should have been provided by the OP !
DROP TABLE IF EXISTS TableA,TableB
GO
create table TableA(Type1 CHAR(1), Type2 char(1))
GO
INSERT TableA (Type1,Type2) VALUES
('A', 'F' ),
('B', 'G' ),
('C', 'H' ),
('D', 'I' ),
('E', NULL )
GO
CREATE TABLE TableB (ID INT, Country NVARCHAR(100), AllTypes NVARCHAR(100))
GO
INSERT TableB (ID,Country,AllTypes)VALUES
(1, 'Italy','A, B, C' ),
(2, 'USA ','D, E, G, F' ),
(4, 'Japan','I, O, Z' ),
(5, 'UK ','NULL' )
GO
----------------------- Solution
;WITH MyCTE AS (
SELECT ID,Country,AllTypes, MyType = TRIM([value])
FROM TableB
CROSS APPLY string_split(AllTypes,',')
)
,MyCTE02 as (
SELECT ID,Country,AllTypes, MyType,a1.Type1,a2.Type2,
UnCaptured = CASE WHEN a1.Type1 IS NULL and a2.Type2 IS NULL THEN MyType END
FROM MyCTE c
LEFT JOIN TableA a1 ON c.MyType = a1.Type1
LEFT JOIN TableA a2 ON c.MyType = a2.Type2
)
SELECT ID,Country,AllTypes--,MyType
,Type1 = STRING_AGG(Type1,','),Type2 = STRING_AGG(Type2,','),UnCaptured = STRING_AGG(UnCaptured,',')
FROM MyCTE02
GROUP BY ID,Country,AllTypes
GO
How about
outer apply (
select Trim(value) t, a1.type1, a2.type2,
CASE WHEN COALESCE(a1.type1, a2.type2) IS NULL THEN Trim(s.value) END unCaptured
from String_Split(alltypes, ',') s
left join tablea a1 where a1.type1=Trim(s.value)
left join tablea a2 where a2.type2=Trim(s.value)
)v
I have this table
ID S E
1 a b
1 b c
2 a b
2 b d
2 b e
2 e d
3 a c
3 c f
I want to extract indirect relations from this table for example:
ID S E
1 a c
2 a d
2 a e
2 b d
3 a f
I could do it in C# or Java but I am wondering if there is a solution for doing it in SQL or not.
You can try this...
Create table #a (ID int, S varchar(1), E varchar(1))
Insert into #a
select 1, 'a', 'b' Union all
select 1, 'b', 'c' Union all
select 2, 'a', 'b' Union all
select 2, 'b', 'd'Union all
select 2, 'b', 'e'Union all
select 2, 'e', 'd'Union all
select 3, 'a', 'c'Union all
select 3, 'c', 'f'
select * from #a
select a.S, b.E
from #a a
Inner join #a b on a.E = b.S and a.ID = b.Id
drop table #a
I'm trying to work out an efficient way to identify common data points based on iterative multi-joins. For example:
INPUT
-----
ID1 ID2
X Y
Y Z
Z 1
A B
C D
1 A
B A
X joins to Y. Y joins to Z. Z joins to 1.
Hence, X and 1 are ("common") joined through Y and Z, and so forth, to create the following output:
OUTPUT
------
ID1 ID2 CommonKey
X Y 1
Y Z 1
Z 1 1
A B 1
C D 2
1 A 1
B A 1
If data points are interrelated in any way, current or future, they should be given the same "CommonKey"
I've looked into using CTE's for this solution but have been unsuccessful so far.
So, I can with a WHILE loop.
DECLARE #Tbl TABLE (Id1 NVARCHAR(10), Id2 NVARCHAR(10))
DECLARE #Result TABLE (Id1 NVARCHAR(10), Id2 NVARCHAR(10), CommonKey INT)
DECLARE #RootCounter INT = 1
DECLARE #TempKey NVARCHAR(10)
INSERT INTO #Tbl
VALUES
('X', 'Y'),
('Y', 'Z'),
('Z', '1'),
('A', 'B'),
('C', 'D'),
('1', 'A'),
('B', '10'),
('D', '4'),
('8', '9'),
('9', 'J'),
('J', 'R')
IF OBJECT_ID('tempdb..#RootItems') IS NOT NULL DROP TABLE #RootItems
SELECT
T.Id1 ,
T.Id2,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RowId
INTO #RootItems
FROM
#Tbl T
WHERE
T.Id1 NOT IN (SELECT I.Id2 FROM #Tbl I)
WHILE (#RootCounter <= (SELECT COUNT(1) FROM #RootItems))
BEGIN
IF OBJECT_ID('tempdb..#RootTemp') IS NOT NULL DROP TABLE #RootTemp
IF OBJECT_ID('tempdb..#CurrTemp') IS NOT NULL DROP TABLE #CurrTemp
SELECT * INTO #RootTemp FROM #RootItems
WHERE RowId = #RootCounter
SELECT Id1,Id2,#RootCounter AS CommonKey INTO #CurrTemp FROM #RootTemp
INSERT INTO #Result SELECT * FROM #CurrTemp
WHILE 1 = 1
BEGIN
IF OBJECT_ID('tempdb..#NextTemp') IS NOT NULL
DROP TABLE #NextTemp
SELECT Id1,Id2,#RootCounter AS CommonKey INTO #NextTemp FROM #Tbl WHERE Id1 = (SELECT C.Id2 FROM #CurrTemp C)
IF EXISTS (SELECT 1 FROM #CurrTemp C INNER JOIN #NextTemp N ON C.Id1 = N.Id2) OR NOT EXISTS(SELECT 1 FROM #NextTemp) BREAK
INSERT INTO #Result SELECT * FROM #NextTemp
DELETE FROM #CurrTemp
INSERT INTO #CurrTemp SELECT * FROM #NextTemp
END
SET #RootCounter += 1
END
SELECT * FROM #Result
Result:
Id1 Id2 CommonKey
X Y 1
Y Z 1
Z 1 1
1 A 1
A B 1
B 10 1
C D 2
D 4 2
8 9 3
9 J 3
J R 3
Using a CTE and adding another 'RowId' to get things in link order:
CREATE TABLE Tbl (Id1 NVARCHAR(10), Id2 NVARCHAR(10))
INSERT INTO Tbl
VALUES
('X', 'Y'),
('Y', 'Z'),
('Z', '1'),
('A', 'B'),
('C', 'D'),
('1', 'A'),
('B', '10'),
('D', '4'),
('8', '9'),
('9', 'J'),
('J', 'R'),
('D','B');
WITH LinkList(Id1,Id2, CommonKey, RowId)
AS
(SELECT Id1, id2, row_number() Over (ORDER BY Id1) AS CommonKey, 1 As RowId FROM tbl WHERE Id1 Not in (select Id2 from tbl)
UNION ALL
SELECT tbl.id1 as id1 , tbl.id2 as Id2 , LinkList.CommonKey as CommonKey, RowId + 1 as RowId
FROM tbl
INNER JOIN LinkList
On Linklist.Id2 = tbl.Id1)
SELECT l.Id1, l.Id2 , l.CommonKey, k.MinKey, ISNULL (k.Minkey, l.CommonKey) AS NewCommonKey
FROM LinkList l
LEFT OUTER JOIN
(SELECT x.CommonId, X.CommonKey, y.MinKey FROM
(SELECT Min(Id2) AS CommonId, CommonKey FROM LinkList WHERE ID2 IN
(SELECT Id2
FROM LinkList
GROUP BY Id2
Having Count(CommonKey) > 1)
GROUP BY CommonKey) as X CROSS APPLY (SELECT MIN(CommonKey) as MinKey FROM LinkList WHERE Id2 = x.CommonId) AS y) AS k
ON l.CommonKey = k.CommonKey
DROP Table Tbl
Result Is:
Id1 Id2 CommonKey MinKey NewCommonKey
8 9 1 NULL 1
C D 2 2 2
X Y 3 2 2
Y Z 3 2 2
Z 1 3 2 2
1 A 3 2 2
A B 3 2 2
B 10 3 2 2
D 4 2 2 2
D B 2 2 2
B 10 2 2 2
9 J 1 NULL 1
J R 1 NULL 1
It needs another select and group wrapped around it to get rid of the duplicates,
but you can see the list being merged this way, pardon my formatting skills.
Is that what you're after?
How do I remove duplicates from the table where all the columns are significant apart from PK.
declare #dummy table
(
pk int,
a char(1),
b char(1),
c char(1)
)
insert into #dummy
select 1, 'A', 'B', 'B' union all
select 2, 'A', 'B', 'B' union all
select 3, 'P', 'Q', 'R' union all
select 4, 'P', 'Q', 'R' union all
select 5, 'X', 'Y', 'Z' union all
select 6, 'X', 'Y', 'Z' union all
select 7, 'A', 'B', 'Z'
what I get with out distinction:
select * from #dummy
pk a b c
----------- ---- ---- ----
1 A B B
2 A B B
3 P Q R
4 P Q R
5 X Y Z
6 X Y Z
7 A B Z
What I'd like is:
select ... do magic ....
pk a b c
----------- ---- ---- ----
1 A B B
3 P Q R
5 X Y Z
7 A B Z
Found it:
select min(pk), a, b, c
from #dummy
group by a, b, c
You want something like this, I think:
DELETE FROM f
FROM #dummy AS f INNER JOIN #dummy AS g
ON g.data = f.data
AND f.id < g.id
Check out this article: http://www.simple-talk.com/sql/t-sql-programming/removing-duplicates-from-a-table-in-sql-server/
At first, I thought distinct would do it, but I'm fairly certain what you want is group by:
select * from #dummy group by a,b,c
Since there's a unique primary key, all rows are distinct.
I have a table in SqlServer 2008 with data of the form
UserID StartWeek EndWeek Type
1 1 3 A
1 4 5 A
1 6 10 A
1 11 13 B
1 14 16 A
2 1 5 A
2 6 9 A
2 10 16 B
I'd like to consolidate/condense the adjacent types so that the resulting table looks like this.
UserID StartWeek EndWeek Type
1 1 10 A
1 11 13 B
1 14 16 A
2 1 9 A
2 10 16 B
Does anyone have any suggestions as to the best way to accomplish this? I've been looking at using Row_number and Partition, but I can't get it to behave exactly as I'd like.
There's probably a neater way to do it, but this produces the correct result
DECLARE #t TABLE
(UserId TINYINT
,StartWeek TINYINT
,EndWeek TINYINT
,TYPE CHAR(1)
)
INSERT #t
SELECT 1,1,3,'A'
UNION SELECT 1,4,5,'A'
UNION SELECT 1,6,10,'A'
UNION SELECT 1,11,13,'B'
UNION SELECT 1,14,16,'A'
UNION SELECT 2,1,5,'A'
UNION SELECT 2,6,9,'A'
UNION SELECT 2,10,16,'B'
;WITH srcCTE
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY t1.UserID, t1.Type
ORDER BY t1.EndWeek
) AS rn
FROM #t AS t1
)
,recCTE
AS
(
SELECT *
,0 AS grp
FROM srcCTE
WHERE rn = 1
UNION ALL
SELECT s.UserId
,s.StartWeek
,s.EndWeek
,s.TYPE
,s.rn
,CASE WHEN s.StartWeek - 1 = r.EndWeek
THEN r.grp
ELSE r.grp+ 1
END AS GRP
FROM srcCTE AS s
JOIN recCTE AS r
ON r.UserId = s.UserId
AND r.TYPE = s.TYPE
AND r.rn = s.rn - 1
)
SELECT UserId
,MIN(StartWeek) AS StartWeek
,MAX(EndWeek) AS EndWeek
,TYPE
FROM recCTE AS s1
GROUP BY UserId
,TYPE
,grp
Also using a CTE, but in a slightly different way
DECLARE #Consolidate TABLE (
UserID INTEGER, StartWeek INTEGER,
EndWeek INTEGER, Type CHAR(1))
INSERT INTO #Consolidate VALUES (1, 1, 3, 'A')
INSERT INTO #Consolidate VALUES (1, 4, 5, 'A')
INSERT INTO #Consolidate VALUES (1, 6, 10, 'A')
INSERT INTO #Consolidate VALUES (1, 14, 16, 'A')
INSERT INTO #Consolidate VALUES (1, 11, 13, 'B')
INSERT INTO #Consolidate VALUES (2, 1, 5, 'A')
INSERT INTO #Consolidate VALUES (2, 6, 9, 'A')
INSERT INTO #Consolidate VALUES (2, 10, 16, 'B')
;WITH ConsolidateCTE AS
(
SELECT UserID, StartWeek, EndWeek, Type
FROM #Consolidate
UNION ALL
SELECT cte.UserID, cte.StartWeek, c.EndWeek, c.Type
FROM ConsolidateCTE cte
INNER JOIN #Consolidate c ON
c.UserID = cte.UserID
AND c.StartWeek = cte.EndWeek + 1
AND c.Type = cte.Type
)
SELECT UserID, [StartWeek] = MIN(Startweek), EndWeek, Type
FROM (
SELECT UserID, Startweek, [EndWeek] = MAX(EndWeek), Type
FROM ConsolidateCTE
GROUP BY UserID, StartWeek, Type
) c
GROUP BY UserID, EndWeek, Type
ORDER BY 1, 2, 3