Get Starting and ending of a Dataset in SQL Server 2008 - sql

I need to extract the starting and ending points of a data set from a table. For Ex if data is like:
1
5
10
15
20
40
45
50
55
60
65
70
Now the 2 data sets are 1 - 20 and 40 - 70. So the Data will always be sequential and the difference between points in a single dataset will max be 7. So the resultant query should give me 3 columns:
1. 5 15
2. 45 65
i.e second and second last point in the dataset.
Is it possible to do without using a cursor of forloop. Please post a query if you can.
I tried doing is using over and partition by but no luck

If I understand you properly, this returns what you're asking for.
DECLARE #tmp TABLE
(
numVal INT PRIMARY KEY
);
INSERT #tmp
VALUES
( 1 )
,( 5 )
,( 10 )
,( 15 )
,( 20 )
,( 40 )
,( 45 )
,( 50 )
,( 55 )
,( 60 )
,( 65 )
,( 70 );
;WITH breaks AS
(
SELECT
t.numval breakMax
, ROW_NUMBER()
OVER(
ORDER BY t.numval
) breakGroup
FROM
#tmp t
WHERE
NOT EXISTS
(
SELECT
NULL
FROM
#tmp t1
WHERE
t1.numVal > t.numVal
AND
t1.numVal <= t.numVal + 7
)
)
SELECT
v.breakGroup
, MIN(v.numval) secondNum
, MAX(v.numVal) secondLastNum
FROM
(
SELECT
t.numVal
, br.breakGroup
, ROW_NUMBER()
OVER(
PARTITION BY
br.breakGroup
ORDER BY
t.numval
) ar
, ROW_NUMBER()
OVER(
PARTITION BY
br.breakGroup
ORDER BY
t.numval DESC
) dr
FROM
#tmp t
CROSS APPLY
(
SELECT
TOP 1
breakGroup
FROM
breaks b
WHERE
b.breakMax >= t.numVal
ORDER BY
b.breakGroup
) br
) v
WHERE
v.ar = 2
OR
v.dr = 2
GROUP BY
v.breakGroup

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

SQL Rank() function excluding rows

Consider I have the following table.
ID value
1 100
2 200
3 200
5 250
6 1
I have the following query which gives the result as follows. I want to exclude the value 200 from rank function, but still that row has to be returned.
SELECT
CASE WHEN Value = 200 THEN 0
ELSE DENSE_RANK() OVER ( ORDER BY VALUE DESC)
END AS RANK,
ID,
VALUE
FROM #table
RANK ID VALUE
1 5 250
0 2 200
0 3 200
4 1 100
5 6 1
But I want the result as follows. How to achieve it?
RANK ID VALUE
1 5 250
0 2 200
0 3 200
2 1 100
3 6 1
If VAL column is not nullable, taking into account NULL is the last value in ORDER BY .. DESC
select *, dense_rank() over (order by nullif(val,200) desc) * case val when 200 then 0 else 1 end
from myTable
order by val desc;
There is no way to exclude Val in Dense Rank currently ,unless you filter in where clause..that is the reason ,you get below result
RANK ID VALUE
1 5 250
0 2 200
0 3 200
4 1 100
5 6 1
You will need to filter once and then do a union all
;with cte(id,val)
as
(
select 1, 100 union all
select 2, 200 union all
select 3, 200 union all
select 5, 250 union all
select 6, 1 )
select *, dense_rank() over (order by val desc)
from cte
where val<>200
union all
select 0,id,val from cte where val=200
You could split the ranking in to separate queries for the values you want to include/exclude from the ranking and UNION ALL the results like so:
Standalone executable example:
CREATE TABLE #temp ( [ID] INT, [value] INT );
INSERT INTO #temp
( [ID], [value] )
VALUES ( 1, 100 ),
( 2, 200 ),
( 3, 200 ),
( 5, 250 ),
( 6, 1 );
SELECT *
FROM ( SELECT 0 RANK ,
ID ,
value
FROM #temp
WHERE value = 200 -- set rank to 0 for value = 200
UNION ALL
SELECT DENSE_RANK() OVER ( ORDER BY value DESC ) AS RANK ,
ID ,
value
FROM #temp
WHERE value != 200 -- perform ranking on records != 200
) t
ORDER BY value DESC ,
t.ID
DROP TABLE #temp
Produces:
RANK ID value
1 5 250
0 2 200
0 3 200
2 1 100
3 6 1
You can modify the ordering at the end of the statement if required, I set it to produce your desired results.
You can also try this, too:
SELECT ISNULL(R, 0) AS Rank ,t.id ,t.value
FROM tbl1 AS t
LEFT JOIN ( SELECT id ,DENSE_RANK() OVER ( ORDER BY value DESC ) AS R
FROM dbo.tbl1 WHERE value <> 200
) AS K
ON t.id = K.id
ORDER BY t.value DESC
The solution in the original question was actually pretty close. Just adding a partition clause to the dense_rank can do the trick.
SELECT CASE
WHEN VALUE = 200 THEN 0
ELSE DENSE_RANK() OVER(
PARTITION BY CASE WHEN VALUE = 200 THEN 0 ELSE 1 END
ORDER BY VALUE DESC
)
END AS RANK
,ID
,VALUE
FROM #table
ORDER BY VALUE DESC;
The 'partition by' creates separate groups for the dense_rank such that the order is performed on these groups individually. This essentially means you create two ranks at the same time, one for the group without the 200 value and one for the group with only the 200 value. The latter one to be set to 0 in the 'case when'.
Standalone executable example:
DECLARE #table TABLE
(
ID INT NOT NULL PRIMARY KEY
,VALUE INT NULL
)
INSERT INTO #table
(
ID
,VALUE
)
SELECT 1, 100
UNION SELECT 2, 200
UNION SELECT 3, 200
UNION SELECT 5, 250
UNION SELECT 6, 1;
SELECT CASE
WHEN VALUE = 200 THEN 0
ELSE DENSE_RANK() OVER(
PARTITION BY CASE WHEN VALUE = 200 THEN 0 ELSE 1 END
ORDER BY VALUE DESC
)
END AS RANK
,ID
,VALUE
FROM #table
ORDER BY VALUE DESC;
RANK ID VALUE
1 5 250
0 2 200
0 3 200
2 1 100
3 6 1

sql : get consecutive group 'n' rows (could be inbetween)

Below is my theater table:
create table theater
(
srno integer,
seatno integer,
available boolean
);
insert into theater
values
(1, 100,true),
(2, 200,true),
(3, 300,true),
(4, 400,false),
(5, 500,true),
(6, 600,true),
(7, 700,true),
(8, 800,true);
I want a sql which should take input as 'n' and returns me the first 'n' consecutive available seats, like
if n = 2 output should be 100,200
if n = 4 output should be 500,600,700,800
NOTE: I am trying to build an query for postgres 9.3
In SQL-Server you can do It in following:
DECLARE #num INT = 4
;WITH cte AS
(
SELECT *,COUNT(1) OVER(PARTITION BY cnt) pt FROM
(
SELECT tt.*
,(SELECT COUNT(srno) FROM theater t WHERE available <> 'true' and srno < tt.srno) AS cnt
FROM theater tt
WHERE available = 'true'
) t1
)
SELECT TOP (SELECT #num) srno, seatno, available
FROM cte
WHERE pt >= #num
OUTPUT
srno seatno available
5 500 true
6 600 true
7 700 true
8 800 true
This will find the available seats. written for sqlserver 2008+:
DECLARE #num INT = 4
;WITH CTE as
(
SELECT
srno-row_number() over (partition by available order by srno) grp,
srno, seatno, available
FROM theater
), CTE2 as
(
SELECT grp, count(*) over (partition by grp) cnt,
srno, seatno, available
FROM CTE
WHERE available = 'true'
)
SELECT top(#num)
srno, seatno, available
FROM CTE2
WHERE cnt >= #num
ORDER BY srno
Result:
srno seatno available
5 500 1
6 600 1
7 700 1
8 800 1
-- naive solution without window using functions
-- [the funny +-100 constants are caused by
-- "consecutive" seats being 100 apart]
-- -------------------------------------------
WITH bot AS ( -- start of an island --
SELECT seatno FROM theater t
WHERE t.available
AND NOT EXISTS (select * from theater x
where x.available AND x.seatno = t.seatno -100)
)
, top AS ( -- end of an island --
SELECT seatno FROM theater t
WHERE t.available
AND NOT EXISTS (select * from theater x
where x.available AND x.seatno = t.seatno +100)
)
, mid AS ( -- [start,end] without intervening gaps --
SELECT l.seatno AS bot, h.seatno AS top
FROM bot l
JOIN top h ON h.seatno >= l.seatno
AND NOT EXISTS (
SELECT * FROM theater x
WHERE NOT x.available
AND x.seatno >= l.seatno AND x.seatno <= h.seatno)
)
-- all the consecutive ranges
-- [ the end query should select from this
-- , using "cnt >= xxx" ]
SELECT bot, top
, 1+(top-bot)/100 AS cnt
FROM mid;
Result:
bot | top | cnt
-----+-----+-----
100 | 300 | 3
500 | 800 | 4
(2 rows)
thanks guys, but i have done achieved it like below,
select srno, seatno from (
select *, count(0) over (order by grp) grp1 from (
select t1.*,
sum(group_flag) over (order by srno) as grp
from (
select *,
case
when lag(available) over (order by srno) = available then null
else 1
end as group_flag
from theater
) t1 ) tx ) tr where tr.available=true and tr.grp1 >= 2 limit 2

SQL grouping interescting/overlapping rows

I have the following table in Postgres that has overlapping data in the two columns a_sno and b_sno.
create table data
( a_sno integer not null,
b_sno integer not null,
PRIMARY KEY (a_sno,b_sno)
);
insert into data (a_sno,b_sno) values
( 4, 5 )
, ( 5, 4 )
, ( 5, 6 )
, ( 6, 5 )
, ( 6, 7 )
, ( 7, 6 )
, ( 9, 10)
, ( 9, 13)
, (10, 9 )
, (13, 9 )
, (10, 13)
, (13, 10)
, (10, 14)
, (14, 10)
, (13, 14)
, (14, 13)
, (11, 15)
, (15, 11);
As you can see from the first 6 rows data values 4,5,6 and 7 in the two columns intersects/overlaps that need to partitioned to a group. Same goes for rows 7-16 and rows 17-18 which will be labeled as group 2 and 3 respectively.
The resulting output should look like this:
group | value
------+------
1 | 4
1 | 5
1 | 6
1 | 7
2 | 9
2 | 10
2 | 13
2 | 14
3 | 11
3 | 15
Assuming that all pairs exists in their mirrored combination as well (4,5) and (5,4). But the following solutions work without mirrored dupes just as well.
Simple case
All connections can be lined up in a single ascending sequence and complications like I added in the fiddle are not possible, we can use this solution without duplicates in the rCTE:
I start by getting minimum a_sno per group, with the minimum associated b_sno:
SELECT row_number() OVER (ORDER BY a_sno) AS grp
, a_sno, min(b_sno) AS b_sno
FROM data d
WHERE a_sno < b_sno
AND NOT EXISTS (
SELECT 1 FROM data
WHERE b_sno = d.a_sno
AND a_sno < b_sno
)
GROUP BY a_sno;
This only needs a single query level since a window function can be built on an aggregate:
Get the distinct sum of a joined table column
Result:
grp a_sno b_sno
1 4 5
2 9 10
3 11 15
I avoid branches and duplicated (multiplicated) rows - potentially much more expensive with long chains. I use ORDER BY b_sno LIMIT 1 in a correlated subquery to make this fly in a recursive CTE.
Create a unique index on a non-unique column
Key to performance is a matching index, which is already present provided by the PK constraint PRIMARY KEY (a_sno,b_sno): not the other way round (b_sno, a_sno):
Is a composite index also good for queries on the first field?
WITH RECURSIVE t AS (
SELECT row_number() OVER (ORDER BY d.a_sno) AS grp
, a_sno, min(b_sno) AS b_sno -- the smallest one
FROM data d
WHERE a_sno < b_sno
AND NOT EXISTS (
SELECT 1 FROM data
WHERE b_sno = d.a_sno
AND a_sno < b_sno
)
GROUP BY a_sno
)
, cte AS (
SELECT grp, b_sno AS sno FROM t
UNION ALL
SELECT c.grp
, (SELECT b_sno -- correlated subquery
FROM data
WHERE a_sno = c.sno
AND a_sno < b_sno
ORDER BY b_sno
LIMIT 1)
FROM cte c
WHERE c.sno IS NOT NULL
)
SELECT * FROM cte
WHERE sno IS NOT NULL -- eliminate row with NULL
UNION ALL -- no duplicates
SELECT grp, a_sno FROM t
ORDER BY grp, sno;
Less simple case
All nodes can be reached in ascending order with one or more branches from the root (smallest sno).
This time, get all greater sno and de-duplicate nodes that may be visited multiple times with UNION at the end:
WITH RECURSIVE t AS (
SELECT rank() OVER (ORDER BY d.a_sno) AS grp
, a_sno, b_sno -- get all rows for smallest a_sno
FROM data d
WHERE a_sno < b_sno
AND NOT EXISTS (
SELECT 1 FROM data
WHERE b_sno = d.a_sno
AND a_sno < b_sno
)
)
, cte AS (
SELECT grp, b_sno AS sno FROM t
UNION ALL
SELECT c.grp, d.b_sno
FROM cte c
JOIN data d ON d.a_sno = c.sno
AND d.a_sno < d.b_sno -- join to all connected rows
)
SELECT grp, sno FROM cte
UNION -- eliminate duplicates
SELECT grp, a_sno FROM t -- add first rows
ORDER BY grp, sno;
Unlike the first solution, we don't get a last row with NULL here (caused by the correlated subquery).
Both should perform very well - especially with long chains / many branches. Result as desired:
SQL Fiddle (with added rows to demonstrate difficulty).
Undirected graph
If there are local minima that cannot be reached from the root with ascending traversal, the above solutions won't work. Consider Farhęg's solution in this case.
I want to say another way, it may be useful, you can do it in 2 steps:
1. take the max(sno) per each group:
select q.sno,
row_number() over(order by q.sno) gn
from(
select distinct d.a_sno sno
from data d
where not exists (
select b_sno
from data
where b_sno=d.a_sno
and a_sno>d.a_sno
)
)q
result:
sno gn
7 1
14 2
15 3
2. use a recursive cte to find all related members in groups:
with recursive cte(sno,gn,path,cycle)as(
select q.sno,
row_number() over(order by q.sno) gn,
array[q.sno],false
from(
select distinct d.a_sno sno
from data d
where not exists (
select b_sno
from data
where b_sno=d.a_sno
and a_sno>d.a_sno
)
)q
union all
select d.a_sno,c.gn,
d.a_sno || c.path,
d.a_sno=any(c.path)
from data d
join cte c on d.b_sno=c.sno
where not cycle
)
select distinct gn,sno from cte
order by gn,sno
Result:
gn sno
1 4
1 5
1 6
1 7
2 9
2 10
2 13
2 14
3 11
3 15
here is the demo of what I did.
Here is a start that may give some ideas on an approach. The recursive query starts with a_sno of each record and then tries to follow the path of b_sno until it reaches the end or forms a cycle. The path is represented by an array of sno integers.
The unnest function will break the array into rows, so a sno value mapped to the path array such as:
4, {6, 5, 4}
will be transformed to a row for each value in the array:
4, 6
4, 5
4, 4
The array_agg then reverses the operation by aggregating the values back into a path, but getting rid of the duplicates and ordering.
Now each a_sno is associated with a path and the path forms the grouping. dense_rank can be used to map the grouping (cluster) to a numeric.
SELECT array_agg(DISTINCT map ORDER BY map) AS cluster
,sno
FROM ( WITH RECURSIVE x(sno, path, cycle) AS (
SELECT a_sno, ARRAY[a_sno], false FROM data
UNION ALL
SELECT b_sno, path || b_sno, b_sno = ANY(path)
FROM data, x
WHERE a_sno = x.sno
AND NOT cycle
)
SELECT sno, unnest(path) AS map FROM x ORDER BY 1
) y
GROUP BY sno
ORDER BY 1, 2
Output:
cluster | sno
--------------+-----
{4,5,6,7} | 4
{4,5,6,7} | 5
{4,5,6,7} | 6
{4,5,6,7} | 7
{9,10,13,14} | 9
{9,10,13,14} | 10
{9,10,13,14} | 13
{9,10,13,14} | 14
{11,15} | 11
{11,15} | 15
(10 rows)
Wrap it one more time for the ranking:
SELECT dense_rank() OVER(order by cluster) AS rank
,sno
FROM (
SELECT array_agg(DISTINCT map ORDER BY map) AS cluster
,sno
FROM ( WITH RECURSIVE x(sno, path, cycle) AS (
SELECT a_sno, ARRAY[a_sno], false FROM data
UNION ALL
SELECT b_sno, path || b_sno, b_sno = ANY(path)
FROM data, x
WHERE a_sno = x.sno
AND NOT cycle
)
SELECT sno, unnest(path) AS map FROM x ORDER BY 1
) y
GROUP BY sno
ORDER BY 1, 2
) z
Output:
rank | sno
------+-----
1 | 4
1 | 5
1 | 6
1 | 7
2 | 9
2 | 10
2 | 13
2 | 14
3 | 11
3 | 15
(10 rows)

create range using with cte in sql

I have a table with only single column which have values like as follow
declare #numberrange table
(
num int
)
insert into #numberrange (num)
values(24) ,
(29) ,
( 34 ),
( 39 ),
( 44 ),
( 49 ),
( 54 ),
( 59 ),
( 64 ),
( 69 ),
( 74 ),
( 99 )
Now i want to show the result like as follow
24 24-29
29 30-34
34 35-39
39 40-44
44 45-49
49 50-54
54 55-59
59 60-64
64 65-69
69 70-74
74 75-99
99
i have tried with sql cte function but not able to get the desired result.
You can use this CTE query. The query builds the range from the last record so that the last record has empty range and the first one has a range.
;with cte
as
(
select top 1 num, sortorder, convert(varchar(21), null) rangevalue
from (select num, ROW_NUMBER() over (order by num) as sortorder from #numberrange) x
order by sortorder desc
union all
select x.num, x.sortorder, convert(varchar(10), x.num) + '-' + convert(varchar(10), cte.num)
from (select num, ROW_NUMBER() over (order by num) as sortorder from #numberrange) x
inner join cte
on cte.sortorder = x.sortorder + 1
)
select num, rangevalue from cte
order by num